From cdf6537d27c004f12e199067ce4fcb3cf0f7215a Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Tue, 16 Apr 2024 20:44:21 -0700 Subject: [PATCH 01/18] Add cmake package manager, CPM --- cmake/CPM.cmake | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 cmake/CPM.cmake diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake new file mode 100644 index 00000000..d0fd0e8e --- /dev/null +++ b/cmake/CPM.cmake @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: MIT +# +# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors + +set(CPM_DOWNLOAD_VERSION 0.39.0) +set(CPM_HASH_SUM "66639bcac9dd2907b2918de466783554c1334446b9874e90d38e3778d404c2ef") + +if(CPM_SOURCE_CACHE) + set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +elseif(DEFINED ENV{CPM_SOURCE_CACHE}) + set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +else() + set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +endif() + +# Expand relative path. This is important if the provided path contains a tilde (~) +get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) + +file(DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake + ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM} +) + +include(${CPM_DOWNLOAD_LOCATION}) From 21f5b34847688031d267638eaf04b73cccc584fc Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Wed, 17 Apr 2024 12:19:23 -0700 Subject: [PATCH 02/18] Add cuColletions with priority queue --- CMakeLists.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a9f53e03..3692b227 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,6 +79,22 @@ FetchContent_Declare(cereal ) FetchContent_Populate(cereal) +# Package Management +# TODO: Consider using CPM for the various libraries above +include(cmake/CPM.cmake) + +# Add cuCollection with priority queue. This should eventually come from +# NVIDIA. +CPMAddPackage( + NAME cuco + GITHUB_REPOSITORY andrewbriand/cuCollections + GIT_TAG d58dd9fedde721a264c8ae960f7393a3a3b08c58 + OPTIONS + "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF" + "BUILD_EXAMPLES OFF" +) + # Auto-detect GPU architecture include("cmake/AutoDetectCudaArch.cmake") From f02f3512ad688421eeba73772136fc7204b94dd2 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Wed, 17 Apr 2024 12:53:53 -0700 Subject: [PATCH 03/18] Add initial shortest edge collapse with priority queue. This currently includes simple functionality to demonstrate bulk build and pop via host interface. --- apps/CMakeLists.txt | 1 + apps/SECPriority/CMakeLists.txt | 38 +++++++++++++ apps/SECPriority/main.cu | 96 +++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+) create mode 100644 apps/SECPriority/CMakeLists.txt create mode 100644 apps/SECPriority/main.cu diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index bc94993e..b6380c68 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -8,4 +8,5 @@ add_subdirectory( XPBD ) #add_subdirectory( Simplification ) add_subdirectory( ShortestEdgeCollapse ) add_subdirectory( Remesh ) +add_subdirectory( SECPriority ) diff --git a/apps/SECPriority/CMakeLists.txt b/apps/SECPriority/CMakeLists.txt new file mode 100644 index 00000000..930ef76e --- /dev/null +++ b/apps/SECPriority/CMakeLists.txt @@ -0,0 +1,38 @@ +add_executable(SECPriority) + +set(SOURCE_LIST + main.cu +) + +set(COMMON_LIST + ../common/openmesh_trimesh.h + ../common/openmesh_report.h +) + +target_sources(SECPriority + PRIVATE + ${SOURCE_LIST} ${COMMON_LIST} +) + +if (WIN32) + target_compile_definitions(SECPriority + PRIVATE _USE_MATH_DEFINES + PRIVATE NOMINMAX + PRIVATE _CRT_SECURE_NO_WARNINGS) +endif() + +set_target_properties(SECPriority PROPERTIES FOLDER "apps") + +set_property(TARGET SECPriority PROPERTY CUDA_SEPARABLE_COMPILATION ON) + +source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "SECPriority" FILES ${SOURCE_LIST}) + +target_link_libraries(SECPriority + PRIVATE RXMesh + PRIVATE gtest_main + PRIVATE OpenMeshCore + PRIVATE OpenMeshTools + PRIVATE cuco +) + +#gtest_discover_tests( SECPriority ) \ No newline at end of file diff --git a/apps/SECPriority/main.cu b/apps/SECPriority/main.cu new file mode 100644 index 00000000..e6062fd6 --- /dev/null +++ b/apps/SECPriority/main.cu @@ -0,0 +1,96 @@ +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#include +#include + +using namespace cuco; +namespace cg = cooperative_groups; + +// grab some bits from priority queue tests and benchmarks + +// -- simulate reading the mesh, computing edge length +// -- cuco:pair +// +// setup pair_less template +// +// setup device function to pop items from queue +// + +template +struct pair_less +{ + __host__ __device__ bool operator()(const T& a, const T& b) const + { + return a.first < b.first; + } +}; + +template +void generate_kv_pairs_uniform(OutputIt output_begin, OutputIt output_end) +{ + std::random_device rd; + std::mt19937 gen{rd()}; + + const auto num_keys = std::distance(output_begin, output_end); + for(auto i = 0; i < num_keys; i++) + { + output_begin[i] = {static_cast(gen()), + static_cast(i)}; + } +} + +void sp_pair() +{ + // Setup the cuco::priority_queue + const size_t insertion_size = 200; + const size_t deletion_size = 100; + using PairType = cuco::pair; + using Compare = pair_less; + + cuco::priority_queue pq(insertion_size); + + // Generate data for the queue + std::vector h_pairs(insertion_size); + generate_kv_pairs_uniform(h_pairs.begin(), h_pairs.end()); + + for(auto i = 0; i < h_pairs.size(); i++) + { + std::cout << "Priority: " << h_pairs[i].first + << "\tID: " << h_pairs[i].second << "\n"; + } + + // Fill the priority queue + thrust::device_vector d_pairs(h_pairs); + pq.push(d_pairs.begin(), d_pairs.end()); + cudaDeviceSynchronize(); + + // Pop the priority queue + thrust::device_vector d_popped(deletion_size); + pq.pop(d_popped.begin(), d_popped.end()); + cudaDeviceSynchronize(); + + std::cout << "-----After Pop-----\n"; + thrust::host_vector h_popped(d_popped); + for(auto i = 0; i < h_popped.size(); i++) + { + std::cout << "Priority: " << h_popped[i].first + << "\tID: " << h_popped[i].second << "\n"; + } +} + +int main(int argc, char* argv[]) +{ + sp_pair(); + + return 0; +} \ No newline at end of file From b625753e9293f62da1005698abe66c689ee52cd4 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Thu, 18 Apr 2024 16:31:50 -0700 Subject: [PATCH 04/18] Initial rxmesh priority queue-based edge collapse --- apps/SECPriority/CMakeLists.txt | 3 +- apps/SECPriority/secp.cu | 91 +++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 apps/SECPriority/secp.cu diff --git a/apps/SECPriority/CMakeLists.txt b/apps/SECPriority/CMakeLists.txt index 930ef76e..e47f7ab5 100644 --- a/apps/SECPriority/CMakeLists.txt +++ b/apps/SECPriority/CMakeLists.txt @@ -1,7 +1,8 @@ add_executable(SECPriority) set(SOURCE_LIST - main.cu + #main.cu + secp.cu ) set(COMMON_LIST diff --git a/apps/SECPriority/secp.cu b/apps/SECPriority/secp.cu new file mode 100644 index 00000000..a078ee8a --- /dev/null +++ b/apps/SECPriority/secp.cu @@ -0,0 +1,91 @@ +#include "gtest/gtest.h" +#include "rxmesh/util/log.h" +#include "rxmesh/util/macros.h" +#include "rxmesh/util/util.h" + +#include "rxmesh/rxmesh_dynamic.h" + +struct arg +{ + std::string obj_file_name = STRINGIFY(INPUT_DIR) "dragon.obj"; + std::string output_folder = STRINGIFY(OUTPUT_DIR); + uint32_t target = 0; + uint32_t device_id = 0; + char** argv; + int argc; +} Arg; + +//#include "secp_rxmesh.cuh" + +TEST(Apps, SECPriority) +{ + using namespace rxmesh; + + // Select device + cuda_query(Arg.device_id); + + RXMeshDynamic rx(Arg.obj_file_name); + rx.save(STRINGIFY(OUTPUT_DIR) + extract_file_name(Arg.obj_file_name) + + "_patches"); + + // RXMeshDynamic rx(Arg.obj_file_name, + // STRINGIFY(OUTPUT_DIR) + + // extract_file_name(Arg.obj_file_name) + "_patches", + // true); + + ASSERT_TRUE(rx.is_edge_manifold()); + +// secp_rxmesh(rx, Arg.target); +} + + +int main(int argc, char** argv) +{ + using namespace rxmesh; + Log::init(); + + ::testing::InitGoogleTest(&argc, argv); + Arg.argv = argv; + Arg.argc = argc; + + + if (argc > 1) { + if (cmd_option_exists(argv, argc + argv, "-h")) { + // clang-format off + RXMESH_INFO("\nUsage: SECPriority.exe < -option X>\n" + " -h: Display this massage and exit\n" + " -input: Input file. Input file should be under the input/ subdirectory\n" + " Default is {} \n" + " Hint: Only accept OBJ files\n" + " -target: The final/target number of faces in the output mesh\n" + " -o: JSON file output folder. Default is {} \n" + " -device_id: GPU device ID. Default is {}", + Arg.obj_file_name, Arg.output_folder, Arg.device_id); + // clang-format on + exit(EXIT_SUCCESS); + } + + if (cmd_option_exists(argv, argc + argv, "-input")) { + Arg.obj_file_name = + std::string(get_cmd_option(argv, argv + argc, "-input")); + } + if (cmd_option_exists(argv, argc + argv, "-o")) { + Arg.output_folder = + std::string(get_cmd_option(argv, argv + argc, "-o")); + } + if (cmd_option_exists(argv, argc + argv, "-device_id")) { + Arg.device_id = + atoi(get_cmd_option(argv, argv + argc, "-device_id")); + } + if (cmd_option_exists(argv, argc + argv, "-target")) { + Arg.target = false; + } + } + + RXMESH_TRACE("input= {}", Arg.obj_file_name); + RXMESH_TRACE("output_folder= {}", Arg.output_folder); + RXMESH_TRACE("device_id= {}", Arg.device_id); + RXMESH_TRACE("target= {}", Arg.target); + + return RUN_ALL_TESTS(); +} \ No newline at end of file From 1c0a4ba9479cce5a1f9177b6b51bd92e089a6b63 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Mon, 6 May 2024 21:48:13 -0700 Subject: [PATCH 05/18] ignore build_debug directory --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index cff3e42b..8d612cc7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ input/* !input/plane_5.obj !input/sphere1.obj build/ +build_debug/ include/rxmesh/util/git_sha1.cpp .vscode/ -scripts/*.log \ No newline at end of file +scripts/*.log From f825a1d80669d993a120574049c4d3bb088017be Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Mon, 6 May 2024 22:03:40 -0700 Subject: [PATCH 06/18] Add initial cuCollections priority queue Currently adds the pair using the priority queue device-side api. --- CMakeLists.txt | 2 +- apps/SECPriority/CMakeLists.txt | 2 + apps/SECPriority/secp.cu | 4 +- apps/SECPriority/secp_kernels.cuh | 380 ++++++++++++++++++++++++++++++ apps/SECPriority/secp_rxmesh.cuh | 316 +++++++++++++++++++++++++ 5 files changed, 701 insertions(+), 3 deletions(-) create mode 100644 apps/SECPriority/secp_kernels.cuh create mode 100644 apps/SECPriority/secp_rxmesh.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index 3692b227..38888200 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -149,7 +149,7 @@ set(cxx_flags set(MSVC_XCOMPILER_FLAGS "/openmp:experimental /MP /std:c++17 /Zi") set(cuda_flags - -Xcompiler=$<$:-Wall -fopenmp -O3 -Wno-unused-function> + -Xcompiler=$<$:-rdynamic -Wall -fopenmp -O3 -Wno-unused-function> -Xcompiler=$<$:${MSVC_XCOMPILER_FLAGS}> #Disables warning #177-D "function XXX was declared but never referenced" diff --git a/apps/SECPriority/CMakeLists.txt b/apps/SECPriority/CMakeLists.txt index e47f7ab5..4f391430 100644 --- a/apps/SECPriority/CMakeLists.txt +++ b/apps/SECPriority/CMakeLists.txt @@ -3,6 +3,8 @@ add_executable(SECPriority) set(SOURCE_LIST #main.cu secp.cu + secp_rxmesh.cuh + secp_kernels.cuh ) set(COMMON_LIST diff --git a/apps/SECPriority/secp.cu b/apps/SECPriority/secp.cu index a078ee8a..1cdd2b2d 100644 --- a/apps/SECPriority/secp.cu +++ b/apps/SECPriority/secp.cu @@ -15,7 +15,7 @@ struct arg int argc; } Arg; -//#include "secp_rxmesh.cuh" +#include "secp_rxmesh.cuh" TEST(Apps, SECPriority) { @@ -35,7 +35,7 @@ TEST(Apps, SECPriority) ASSERT_TRUE(rx.is_edge_manifold()); -// secp_rxmesh(rx, Arg.target); + secp_rxmesh(rx, Arg.target); } diff --git a/apps/SECPriority/secp_kernels.cuh b/apps/SECPriority/secp_kernels.cuh new file mode 100644 index 00000000..1fc2ca39 --- /dev/null +++ b/apps/SECPriority/secp_kernels.cuh @@ -0,0 +1,380 @@ +#pragma once +#include "rxmesh/cavity_manager.cuh" + +#include +#include + +template +__global__ static void secp(rxmesh::Context context, + rxmesh::VertexAttribute coords, + // const CostHistogram histo, + const int reduce_threshold, + rxmesh::EdgeAttribute edge_status, + rxmesh::EdgeAttribute e_attr, + int* d_num_cavities) +{ + using namespace rxmesh; + auto block = cooperative_groups::this_thread_block(); + ShmemAllocator shrd_alloc; + CavityManager cavity( + block, context, shrd_alloc, true); + + const uint32_t pid = cavity.patch_id(); + + if (pid == INVALID32) { + return; + } + + // we first use this mask to set the edge we want to collapse (and then + // filter them). Then after cavity.prologue, we reuse this bitmask to mark + // the newly added edges + Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + + // we use this bitmask to mark the other end of to-be-collapse edge during + // checking for the link condition + Bitmask v0_mask(cavity.patch_info().num_vertices[0], shrd_alloc); + Bitmask v1_mask(cavity.patch_info().num_vertices[0], shrd_alloc); + + + // Precompute EV + Query ev_query(context, pid); + ev_query.prologue(block, shrd_alloc); + block.sync(); + + // 1) mark edge we want to collapse + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + assert(eh.local_id() < cavity.patch_info().num_edges[0]); + + if (edge_status(eh) != UNSEEN) { + return; + } + const VertexIterator iter = + ev_query.template get_iterator(eh.local_id()); + + const VertexHandle v0 = iter[0]; + const VertexHandle v1 = iter[1]; + + const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + + T len2 = glm::distance2(p0, p1); + + /*if (histo.get_bin(len2) <= reduce_threshold) { + //::atomicAdd(d_num_cavities + 1, 1); + // cavity.create(eh); + edge_mask.set(eh.local_id(), true); + }*/ + }); + block.sync(); + + + // 2) check edge link condition. Here, for each edge marked in edge_mask, + // all threads in the block collaborate to check the edge link condition of + // this edge + __shared__ int s_num_shared_one_ring; + for (uint16_t e = 0; e < edge_mask.size(); ++e) { + + if (edge_mask(e)) { + // the edge two end vertices + const VertexIterator iter = + ev_query.template get_iterator(e); + + const uint16_t v0 = iter.local(0); + const uint16_t v1 = iter.local(1); + + if (threadIdx.x == 0) { + s_num_shared_one_ring = 0; + } + + v0_mask.reset(block); + v1_mask.reset(block); + block.sync(); + + // each thread will be assigned to an edge (including not-owned one) + // and mark in v0_mask/v1_mask if one of its two ends are v0/v1 + for_each_edge( + cavity.patch_info(), + [&](EdgeHandle eh) { + if (eh.local_id() == e) { + return; + } + const VertexIterator v_iter = + ev_query.template get_iterator( + eh.local_id()); + + const uint16_t vv0 = v_iter.local(0); + const uint16_t vv1 = v_iter.local(1); + + + if (vv0 == v0) { + v0_mask.set(vv1, true); + } + if (vv0 == v1) { + v1_mask.set(vv1, true); + } + + if (vv1 == v0) { + v0_mask.set(vv0, true); + } + if (vv1 == v1) { + v1_mask.set(vv0, true); + } + }, + true); + block.sync(); + + for (int v = threadIdx.x; v < v0_mask.size(); v += blockThreads) { + if (v0_mask(v) && v1_mask(v)) { + ::atomicAdd(&s_num_shared_one_ring, 1); + } + } + + block.sync(); + if (s_num_shared_one_ring > 2) { + edge_mask.reset(e, true); + } + } + } + block.sync(); + + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + assert(eh.local_id() < cavity.patch_info().num_edges[0]); + if (edge_mask(eh.local_id())) { + cavity.create(eh); + } else { + edge_status(eh) = OKAY; + } + }); + block.sync(); + + ev_query.epilogue(block, shrd_alloc); + + // create the cavity + if (cavity.prologue(block, shrd_alloc, coords, edge_status, e_attr)) { + + // if (threadIdx.x == 0) { + // uint16_t num_actual_cavities = 0; + // for (int i = 0; i < cavity.m_s_active_cavity_bitmask.size(); ++i) + // { + // if (cavity.m_s_active_cavity_bitmask(i)) { + // num_actual_cavities++; + // } + // } + // ::atomicAdd(d_num_cavities, num_actual_cavities); + // } + edge_mask.reset(block); + block.sync(); + + // fill in the cavities + cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { + const EdgeHandle src = cavity.template get_creator(c); + + // TODO handle boundary edges + + VertexHandle v0, v1; + + cavity.get_vertices(src, v0, v1); + + const VertexHandle new_v = cavity.add_vertex(); + + if (new_v.is_valid()) { + + coords(new_v, 0) = (coords(v0, 0) + coords(v1, 0)) * 0.5; + coords(new_v, 1) = (coords(v0, 1) + coords(v1, 1)) * 0.5; + coords(new_v, 2) = (coords(v0, 2) + coords(v1, 2)) * 0.5; + + + DEdgeHandle e0 = + cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); + + e_attr(e0.get_edge_handle())++; + + if (e0.is_valid()) { + edge_mask.set(e0.local_id(), true); + + const DEdgeHandle e_init = e0; + + for (uint16_t i = 0; i < size; ++i) { + const DEdgeHandle e = cavity.get_cavity_edge(c, i); + + const VertexHandle v_end = + cavity.get_cavity_vertex(c, (i + 1) % size); + + const DEdgeHandle e1 = + (i == size - 1) ? + e_init.get_flip_dedge() : + cavity.add_edge( + cavity.get_cavity_vertex(c, i + 1), new_v); + + if (!e1.is_valid()) { + break; + } + + if (i != size - 1) { + edge_mask.set(e1.local_id(), true); + } + + const FaceHandle new_f = cavity.add_face(e0, e, e1); + + if (!new_f.is_valid()) { + break; + } + e0 = e1.get_flip_dedge(); + } + } + } + }); + } + + + cavity.epilogue(block); + block.sync(); + + if (cavity.is_successful()) { + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (edge_mask(eh.local_id())) { + edge_status(eh) = ADDED; + } + }); + } +} + +//template +template +__global__ static void compute_edge_priorities( + rxmesh::Context context, + const rxmesh::VertexAttribute coords, + PQView_t pq_view, + size_t pq_num_bytes) +{ + using namespace rxmesh; + + // shared mem variable + // Pair_t my_local_pair_array + // index into above + // sdp: needs to be the number of edges in the batch, + // which is not known at compile time + //__shared__ PriorityPair_t intermediatePairs[blockThreads]; + +// __shared__ int my_pair_count; + // some thread needs to initialize the above to 0 + // sync + //extern __shared__ int shmem[]; + namespace cg = cooperative_groups; + cg::thread_block g = cg::this_thread_block(); + ShmemAllocator shrd_alloc; + + Query query(context); + auto intermediatePairs = shrd_alloc.alloc(query.get_patch_info().num_edges[0]); + __shared__ int pair_counter; + pair_counter = 0; + +/* 05/06 + char * pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes); + printf("blockThreads:%u\n", blockThreads); + printf("pq_shrd_mem:%p\t\n", (void*)pq_shrd_mem); + */ + + auto edge_len = [&](const EdgeHandle& eh, const VertexIterator& iter) { + const VertexHandle v0 = iter[0]; + const VertexHandle v1 = iter[1]; + + const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + + T len2 = glm::distance2(p0, p1); + + //PriorityPair_t p{(double)len2, (double)len2}; + //PriorityPair_t p{len2, len2}; + auto p_e = rxmesh::detail::unpack(eh.unique_id()); + //printf("e_id:%llu\t, len:%f\n", eh.unique_id(), len2); + PriorityPair_t p{len2, p_e.second}; + //PriorityPair_t p{len2, eh}; + + auto val_counter = atomicAdd(&pair_counter, 1); + intermediatePairs[val_counter] = p; + + //PriorityPair_t p{len2, eh}; + //PriorityPair_t p{len2, eh.patch_id()}; + //PriorityPair_t p{len2, eh.m_handle}; //same as just eh + //alignas(64) PriorityPair_t arr_p[1] = {p}; + //PriorityPair_t p{len2, eh}; + //PriorityPair_t p{len2, eh.patch_id()}; + //PriorityPair_t p{len2, eh.m_handle}; //same as just eh + //alignas(64) PriorityPair_t arr_p[1] = {p}; +/* 05/06 + alignas(16) PriorityPair_t arr_p[1] = {p}; + //pq_view.push(g, arr_p, *(&arr_p + 1), shmem); + pq_view.push(g, arr_p, arr_p + 1, pq_shrd_mem); + */ + //SDP: looks like the push might need to happen outside of this + //lambda. I think I need a static array where a thread?/EdgeHandle + //can be stored. Then, after the query.dispatch below, + //I can do a pq_view.push(cg, &array[0], &array[size], shmem) + + //Alternatively, this kernel computes the edge_length for EdgeHandle + //and saves to a thrust::device_vector d_pairs. Given an EdgeHandle, + //what's my index into the device_vector? + //Then, call pq_view.push(d_pairs.begin(), d_pairs.end()) from host side. +//SDP remove for build auto my_pair = {len2, eh}; +//SDP remove for build until can revisit pq_view.push(cg, &my_pair[0], &my_pair[0]+1, ); + +//SDP remove for build pq_view.push(); + //atomicMin(histo.min_value(), len2); + //atomicMax(histo.max_value(), len2); +// int old_value = atomicAdd(&my_pair_count, 1); + }; + + auto block = cooperative_groups::this_thread_block(); + + //ShmemAllocator shrd_alloc; + + // need to account for this when the kernel is launched +//SDP remove for build PriorityPair_t* my_local_array = shrd_alloc.alloc(query.patch_info().m_edge_capacity[0]); + // allocate memory for the priority_queue and keep that pointer to pass + query.dispatch(block, shrd_alloc, edge_len); + // sync just in case for now + // actually do the push to the queue + block.sync(); + //if(block.thread_rank() == 0) { + // printf("iterators: begin = %p\t end = %p\n", (void*)intermediatePairs, (void*)(intermediatePairs + blockThreads)); + + char * pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes); + // printf("blockThreads:%u\n", blockThreads); + // printf("pq_shrd_mem:%p\t\n", (void*)pq_shrd_mem); + // for(size_t i = 0; i < blockThreads; i++) { + // printf("imp:%d\t%f\t%u\t%p\n", i, intermediatePairs[i].first, intermediatePairs[i].second, &(intermediatePairs[i])); + // } + pq_view.push(block, intermediatePairs, intermediatePairs + pair_counter, pq_shrd_mem); + //shrd_alloc.dealloc(pq_num_bytes); + //} +} + +/* +template +__global__ static void populate_histogram( + rxmesh::Context context, + const rxmesh::VertexAttribute coords, + CostHistogram histo) +{ + using namespace rxmesh; + + auto edge_len = [&](const EdgeHandle& eh, const VertexIterator& iter) { + const VertexHandle v0 = iter[0]; + const VertexHandle v1 = iter[1]; + + const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + + T len2 = glm::distance2(p0, p1); + + histo.insert(len2); + }; + + auto block = cooperative_groups::this_thread_block(); + + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, edge_len); +} +*/ \ No newline at end of file diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh new file mode 100644 index 00000000..c1d7eee3 --- /dev/null +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -0,0 +1,316 @@ +#pragma once + +#define GLM_ENABLE_EXPERIMENTAL +#include +#include + + +#include "rxmesh/query.cuh" +#include "rxmesh/rxmesh_dynamic.h" + +// Priority Queue related includes +#include +#include + +#include +#include + +// Priority queue setup. Use 'pair_less' to prioritize smaller values. +template +struct pair_less +{ + __host__ __device__ bool operator()(const T& a, const T& b) const + { + return a.first < b.first; + } +}; +//using PriorityPair_t = cuco::pair; +//using PriorityPair_t = cuco::pair; + +using PriorityPair_t = cuco::pair; + +//using PriorityPair_t = cuco::pair; +//using PriorityPair_t = cuco::pair; +using PriorityCompare = pair_less; +using PriorityQueue_t = cuco::priority_queue; +using PQView_t = PriorityQueue_t::device_mutable_view; + + +template +using Vec3 = glm::vec<3, T, glm::defaultp>; + +using EdgeStatus = int8_t; +enum : EdgeStatus +{ + UNSEEN = 0, // means we have not tested it before for e.g., split/flip/col + OKAY = 1, // means we have tested it and it is okay to skip + UPDATE = 2, // means we should update it i.e., we have tested it before + ADDED = 3, // means it has been added to during the split/flip/collapse +}; + +//#include "histogram.cuh" +#include "secp_kernels.cuh" + + +inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, + const uint32_t final_num_faces) +{ + //EXPECT_TRUE(rx.validate()); +//return; + using namespace rxmesh; + constexpr uint32_t blockThreads = 32; + + + auto coords = rx.get_input_vertex_coordinates(); + + auto edge_status = rx.add_edge_attribute("EdgeStatus", 1); + + LaunchBox launch_box; + + float total_time = 0; + float app_time = 0; + float slice_time = 0; + float cleanup_time = 0; + + const int num_bins = 256; + +// CostHistogram histo(num_bins); + PriorityQueue_t pq(rx.get_num_edges()); + + auto e_attr = rx.add_edge_attribute("eMark", 1); + + +#if USE_POLYSCOPE + rx.render_vertex_patch(); + rx.render_edge_patch(); + rx.render_face_patch(); + // polyscope::show(); +#endif + + bool validate = false; + + int* d_num_cavities = nullptr; + CUDA_ERROR(cudaMalloc((void**)&d_num_cavities, 2 * sizeof(int))); + CUDA_ERROR(cudaMemset(d_num_cavities, 0, 2 * sizeof(int))); + + float reduce_ratio = 0.05; + + // Priority Queue Setup + // + + // + // Priority Queue Setup End + + CUDA_ERROR(cudaProfilerStart()); + GPUTimer timer; + timer.start(); + while (rx.get_num_faces() > final_num_faces) { + + rx.prepare_launch_box({Op::EV}, + launch_box, + (void*)compute_edge_priorities, + false, + false, + false, + false, + [&](uint32_t v, uint32_t e, uint32_t f){ + // Allocate enough additional memory + // for the priority queue. + // SDP: not enough info here to + // determine the memory the + // priority queue needs. Probably don't + // want to use this. + // See below. + //return 0;//e*sizeof(PriorityPair_t); + return pq.get_shmem_size(blockThreads) + (e*sizeof(PriorityPair_t)); + }); + //launch_box.smem_bytes_dyn += pq.get_shmem_size(blockThreads); + + RXMESH_TRACE("hello serban 11111111\n"); + RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); + RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); + RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); + RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); + RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); + RXMESH_TRACE("sizeof(PriorityPair_t){}", sizeof(PriorityPair_t)); + compute_edge_priorities + <<>>(rx.get_context(), *coords, pq.get_mutable_device_view(), pq.get_shmem_size(blockThreads)); + + cudaDeviceSynchronize(); + RXMESH_TRACE("hello serban 2222222\n"); + RXMESH_TRACE("launch_box.smem_bytes_dyn = {}", launch_box.smem_bytes_dyn); + RXMESH_TRACE("pq.get_shmem_size = {}", pq.get_shmem_size(blockThreads)); + + // now pop all the elements + // + thrust::device_vector d_popped(rx.get_num_edges()); + pq.pop(d_popped.begin(), d_popped.end()); + cudaDeviceSynchronize(); + const thrust::host_vector h_popped(d_popped); + for(size_t i = 0; i < h_popped.size(); i++) + { + std::cout << i << "\t" << h_popped[i].first + << "\t" << h_popped[i].second << "\n"; + } + return; + // compute max-min histogram + //histo.init(); + + //rx.prepare_launch_box({Op::EV}, + // launch_box, + // (void*)compute_min_max_cost, + // false); + //compute_min_max_cost + // <<>>(rx.get_context(), *coords, histo); + + //// compute histogram bins + //rx.prepare_launch_box({Op::EV}, + // launch_box, + // (void*)populate_histogram, + // false); + //populate_histogram + // <<>>(rx.get_context(), *coords, histo); + + //histo.scan(); + + + // how much we can reduce the number of edge at each iterations + reduce_ratio = reduce_ratio + 0.05; + + // loop over the mesh, and try to collapse + const int reduce_threshold = + std::max(1, int(reduce_ratio * float(rx.get_num_edges()))); + + // reset edge status + edge_status->reset(UNSEEN, DEVICE); + + rx.reset_scheduler(); + while (!rx.is_queue_empty() && rx.get_num_faces() > final_num_faces) { + RXMESH_INFO(" Queue size = {}", + rx.get_context().m_patch_scheduler.size()); + + rx.prepare_launch_box( + {Op::EV}, + launch_box, + (void*)secp, + true, + false, + false, + false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return detail::mask_num_bytes(e) + + 2 * detail::mask_num_bytes(v) + + 3 * ShmemAllocator::default_alignment; + }); + + e_attr->reset(0, DEVICE); + + GPUTimer app_timer; + app_timer.start(); + secp + <<>>(rx.get_context(), + *coords, + //histo, + reduce_threshold, + *edge_status, + *e_attr, + d_num_cavities); + + app_timer.stop(); + + GPUTimer cleanup_timer; + cleanup_timer.start(); + rx.cleanup(); + cleanup_timer.stop(); + + GPUTimer slice_timer; + slice_timer.start(); + rx.slice_patches(*coords, *edge_status, *e_attr); + slice_timer.stop(); + + GPUTimer cleanup_timer2; + cleanup_timer2.start(); + rx.cleanup(); + cleanup_timer2.stop(); + + + CUDA_ERROR(cudaDeviceSynchronize()); + CUDA_ERROR(cudaGetLastError()); + + app_time += app_timer.elapsed_millis(); + slice_time += slice_timer.elapsed_millis(); + cleanup_time += cleanup_timer.elapsed_millis(); + cleanup_time += cleanup_timer2.elapsed_millis(); + + + if (validate) { + rx.update_host(); + EXPECT_TRUE(rx.validate()); + RXMESH_INFO(" num_vertices = {}, num_edges= {}, num_faces= {}", + rx.get_num_vertices(), + rx.get_num_edges(), + rx.get_num_faces()); + } + } + + { + int h_num_cavities[2]; + CUDA_ERROR(cudaMemcpy(&h_num_cavities, + d_num_cavities, + 2 * sizeof(int), + cudaMemcpyDeviceToHost)); + RXMESH_INFO(" Requested cavities = {}, executed cavities = {}", + h_num_cavities[1], + h_num_cavities[0]); + + coords->move(DEVICE, HOST); + e_attr->move(DEVICE, HOST); + rx.update_host(); + rx.update_polyscope(); + auto ps_mesh = rx.get_polyscope_mesh(); + ps_mesh->updateVertexPositions(*coords); + ps_mesh->setEnabled(false); + ps_mesh->addEdgeScalarQuantity("eMark", *e_attr); + rx.render_vertex_patch(); + rx.render_edge_patch(); + rx.render_face_patch(); + polyscope::show(); + } + } + timer.stop(); + total_time += timer.elapsed_millis(); + CUDA_ERROR(cudaProfilerStop()); + + RXMESH_INFO("secp_rxmesh() RXMesh simplification took {} (ms)", total_time); + RXMESH_INFO("secp_rxmesh() App time {} (ms)", app_time); + RXMESH_INFO("secp_rxmesh() Slice timer {} (ms)", slice_time); + RXMESH_INFO("secp_rxmesh() Cleanup timer {} (ms)", cleanup_time); + + if (!validate) { + rx.update_host(); + } + coords->move(DEVICE, HOST); + +#if USE_POLYSCOPE + rx.update_polyscope(); + + auto ps_mesh = rx.get_polyscope_mesh(); + ps_mesh->updateVertexPositions(*coords); + ps_mesh->setEnabled(false); + + rx.render_vertex_patch(); + rx.render_edge_patch(); + rx.render_face_patch(); + polyscope::show(); +#endif + +// histo.free(); +} \ No newline at end of file From c889d45f9b1034a157c70dc8836622bd1b76084e Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Tue, 7 May 2024 01:34:04 -0700 Subject: [PATCH 07/18] Encode patch and local into 32 bits, clean up some dead code --- apps/SECPriority/secp_kernels.cuh | 113 ++++-------------------------- apps/SECPriority/secp_rxmesh.cuh | 82 +++++++++++++--------- 2 files changed, 62 insertions(+), 133 deletions(-) diff --git a/apps/SECPriority/secp_kernels.cuh b/apps/SECPriority/secp_kernels.cuh index 1fc2ca39..2fef5992 100644 --- a/apps/SECPriority/secp_kernels.cuh +++ b/apps/SECPriority/secp_kernels.cuh @@ -248,18 +248,6 @@ __global__ static void compute_edge_priorities( size_t pq_num_bytes) { using namespace rxmesh; - - // shared mem variable - // Pair_t my_local_pair_array - // index into above - // sdp: needs to be the number of edges in the batch, - // which is not known at compile time - //__shared__ PriorityPair_t intermediatePairs[blockThreads]; - -// __shared__ int my_pair_count; - // some thread needs to initialize the above to 0 - // sync - //extern __shared__ int shmem[]; namespace cg = cooperative_groups; cg::thread_block g = cg::this_thread_block(); ShmemAllocator shrd_alloc; @@ -269,12 +257,6 @@ __global__ static void compute_edge_priorities( __shared__ int pair_counter; pair_counter = 0; -/* 05/06 - char * pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes); - printf("blockThreads:%u\n", blockThreads); - printf("pq_shrd_mem:%p\t\n", (void*)pq_shrd_mem); - */ - auto edge_len = [&](const EdgeHandle& eh, const VertexIterator& iter) { const VertexHandle v0 = iter[0]; const VertexHandle v1 = iter[1]; @@ -284,97 +266,28 @@ __global__ static void compute_edge_priorities( T len2 = glm::distance2(p0, p1); - //PriorityPair_t p{(double)len2, (double)len2}; - //PriorityPair_t p{len2, len2}; auto p_e = rxmesh::detail::unpack(eh.unique_id()); + //printf("p_id:%u\te_id:%hu\n", p_e.first, p_e.second); //printf("e_id:%llu\t, len:%f\n", eh.unique_id(), len2); - PriorityPair_t p{len2, p_e.second}; + + // repack the EdgeHandle into smaller 32 bits for + // use with priority queue. Need to check elsewhere + // that there are less than 2^16 patches. + auto id32 = unique_id32(p_e.second, (uint16_t)p_e.first); + //auto p_e_32 = unpack32(id32); + //printf("32bit p_id:%hu\te_id:%hu\n", p_e_32.first, p_e_32.second); + + PriorityPair_t p{len2, id32}; //PriorityPair_t p{len2, eh}; auto val_counter = atomicAdd(&pair_counter, 1); intermediatePairs[val_counter] = p; - - //PriorityPair_t p{len2, eh}; - //PriorityPair_t p{len2, eh.patch_id()}; - //PriorityPair_t p{len2, eh.m_handle}; //same as just eh - //alignas(64) PriorityPair_t arr_p[1] = {p}; - //PriorityPair_t p{len2, eh}; - //PriorityPair_t p{len2, eh.patch_id()}; - //PriorityPair_t p{len2, eh.m_handle}; //same as just eh - //alignas(64) PriorityPair_t arr_p[1] = {p}; -/* 05/06 - alignas(16) PriorityPair_t arr_p[1] = {p}; - //pq_view.push(g, arr_p, *(&arr_p + 1), shmem); - pq_view.push(g, arr_p, arr_p + 1, pq_shrd_mem); - */ - //SDP: looks like the push might need to happen outside of this - //lambda. I think I need a static array where a thread?/EdgeHandle - //can be stored. Then, after the query.dispatch below, - //I can do a pq_view.push(cg, &array[0], &array[size], shmem) - - //Alternatively, this kernel computes the edge_length for EdgeHandle - //and saves to a thrust::device_vector d_pairs. Given an EdgeHandle, - //what's my index into the device_vector? - //Then, call pq_view.push(d_pairs.begin(), d_pairs.end()) from host side. -//SDP remove for build auto my_pair = {len2, eh}; -//SDP remove for build until can revisit pq_view.push(cg, &my_pair[0], &my_pair[0]+1, ); - -//SDP remove for build pq_view.push(); - //atomicMin(histo.min_value(), len2); - //atomicMax(histo.max_value(), len2); -// int old_value = atomicAdd(&my_pair_count, 1); }; auto block = cooperative_groups::this_thread_block(); - - //ShmemAllocator shrd_alloc; - - // need to account for this when the kernel is launched -//SDP remove for build PriorityPair_t* my_local_array = shrd_alloc.alloc(query.patch_info().m_edge_capacity[0]); - // allocate memory for the priority_queue and keep that pointer to pass query.dispatch(block, shrd_alloc, edge_len); - // sync just in case for now - // actually do the push to the queue block.sync(); - //if(block.thread_rank() == 0) { - // printf("iterators: begin = %p\t end = %p\n", (void*)intermediatePairs, (void*)(intermediatePairs + blockThreads)); - - char * pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes); - // printf("blockThreads:%u\n", blockThreads); - // printf("pq_shrd_mem:%p\t\n", (void*)pq_shrd_mem); - // for(size_t i = 0; i < blockThreads; i++) { - // printf("imp:%d\t%f\t%u\t%p\n", i, intermediatePairs[i].first, intermediatePairs[i].second, &(intermediatePairs[i])); - // } - pq_view.push(block, intermediatePairs, intermediatePairs + pair_counter, pq_shrd_mem); - //shrd_alloc.dealloc(pq_num_bytes); - //} -} -/* -template -__global__ static void populate_histogram( - rxmesh::Context context, - const rxmesh::VertexAttribute coords, - CostHistogram histo) -{ - using namespace rxmesh; - - auto edge_len = [&](const EdgeHandle& eh, const VertexIterator& iter) { - const VertexHandle v0 = iter[0]; - const VertexHandle v1 = iter[1]; - - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - - T len2 = glm::distance2(p0, p1); - - histo.insert(len2); - }; - - auto block = cooperative_groups::this_thread_block(); - - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, edge_len); -} -*/ \ No newline at end of file + char * pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes); + pq_view.push(block, intermediatePairs, intermediatePairs + pair_counter, pq_shrd_mem); +} \ No newline at end of file diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index c1d7eee3..163db23a 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -15,6 +15,38 @@ #include #include +/** + * @brief Return unique index of the local mesh element composed by the + * patch id and the local index + * + * @param local_id the local within-patch mesh element id + * @param patch_id the patch owning the mesh element + * @return + */ +constexpr __device__ __host__ __forceinline__ uint32_t +unique_id32(const uint16_t local_id, const uint16_t patch_id) +{ + uint32_t ret = patch_id; + ret = (ret << 16); + ret |= local_id; + return ret; +} + +/** + * @brief unpack a 32 uint to its high and low 16 bits. + * This is used to convert the unique id to its local id (16 + * low bit) and patch id (high 16 bit) + * @param uid unique id + * @return a std::pair storing the patch id and local id + */ +constexpr __device__ __host__ __forceinline__ std::pair + unpack32(uint32_t uid) +{ + uint16_t local_id = uid & ((1 << 16) - 1); + uint16_t patch_id = uid >> 16; + return std::make_pair(patch_id, local_id); +} + // Priority queue setup. Use 'pair_less' to prioritize smaller values. template struct pair_less @@ -24,13 +56,8 @@ struct pair_less return a.first < b.first; } }; -//using PriorityPair_t = cuco::pair; -//using PriorityPair_t = cuco::pair; using PriorityPair_t = cuco::pair; - -//using PriorityPair_t = cuco::pair; -//using PriorityPair_t = cuco::pair; using PriorityCompare = pair_less; using PriorityQueue_t = cuco::priority_queue; using PQView_t = PriorityQueue_t::device_mutable_view; @@ -48,15 +75,14 @@ enum : EdgeStatus ADDED = 3, // means it has been added to during the split/flip/collapse }; -//#include "histogram.cuh" #include "secp_kernels.cuh" inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, const uint32_t final_num_faces) { - //EXPECT_TRUE(rx.validate()); -//return; + EXPECT_TRUE(rx.validate()); + using namespace rxmesh; constexpr uint32_t blockThreads = 32; @@ -74,7 +100,6 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, const int num_bins = 256; -// CostHistogram histo(num_bins); PriorityQueue_t pq(rx.get_num_edges()); auto e_attr = rx.add_edge_attribute("eMark", 1); @@ -95,12 +120,6 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, float reduce_ratio = 0.05; - // Priority Queue Setup - // - - // - // Priority Queue Setup End - CUDA_ERROR(cudaProfilerStart()); GPUTimer timer; timer.start(); @@ -115,23 +134,14 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, false, [&](uint32_t v, uint32_t e, uint32_t f){ // Allocate enough additional memory - // for the priority queue. - // SDP: not enough info here to - // determine the memory the - // priority queue needs. Probably don't - // want to use this. - // See below. - //return 0;//e*sizeof(PriorityPair_t); + // for the priority queue and the intermediate + // array of PriorityPait_t. return pq.get_shmem_size(blockThreads) + (e*sizeof(PriorityPair_t)); }); - //launch_box.smem_bytes_dyn += pq.get_shmem_size(blockThreads); - RXMESH_TRACE("hello serban 11111111\n"); RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); - RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); - RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); RXMESH_TRACE("sizeof(PriorityPair_t){}", sizeof(PriorityPair_t)); compute_edge_priorities <<>>(rx.get_context(), *coords, pq.get_mutable_device_view(), pq.get_shmem_size(blockThreads)); cudaDeviceSynchronize(); - RXMESH_TRACE("hello serban 2222222\n"); RXMESH_TRACE("launch_box.smem_bytes_dyn = {}", launch_box.smem_bytes_dyn); RXMESH_TRACE("pq.get_shmem_size = {}", pq.get_shmem_size(blockThreads)); - // now pop all the elements + // next kernel needs to pop some percentage of the top + // elements in the priority queue and store popped elements + // to be used by the next kernel that actually does the collapses + // + // mark some sort of + // associated edge attribute + + // now pop all the elements to ouput on host // thrust::device_vector d_popped(rx.get_num_edges()); pq.pop(d_popped.begin(), d_popped.end()); cudaDeviceSynchronize(); const thrust::host_vector h_popped(d_popped); - for(size_t i = 0; i < h_popped.size(); i++) - { - std::cout << i << "\t" << h_popped[i].first - << "\t" << h_popped[i].second << "\n"; - } + // for(size_t i = 0; i < h_popped.size(); i++) + // { + // std::cout << i << "\t" << h_popped[i].first + // << "\t" << h_popped[i].second << "\n"; + // } return; // compute max-min histogram //histo.init(); From 65eb418ce7febef380910953f02af4442e43c3b8 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Tue, 7 May 2024 22:04:27 -0700 Subject: [PATCH 08/18] Add kernel to pop and mark edges to be collapsed --- apps/SECPriority/secp_kernels.cuh | 37 +++++++++++++++++++++++++++- apps/SECPriority/secp_rxmesh.cuh | 41 +++++++++++++++++++------------ 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/apps/SECPriority/secp_kernels.cuh b/apps/SECPriority/secp_kernels.cuh index 2fef5992..7ca64516 100644 --- a/apps/SECPriority/secp_kernels.cuh +++ b/apps/SECPriority/secp_kernels.cuh @@ -290,4 +290,39 @@ __global__ static void compute_edge_priorities( char * pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes); pq_view.push(block, intermediatePairs, intermediatePairs + pair_counter, pq_shrd_mem); -} \ No newline at end of file +} + +template +__global__ static void pop_and_mark_edges_to_collapse( + PQView_t pq_view, + rxmesh::EdgeAttribute marked_edges, + uint32_t pop_num_edges) +{ + // setup shared memory array to store the popped pairs + // + // device api pop pairs + namespace cg = cooperative_groups; + using namespace rxmesh; + ShmemAllocator shrd_alloc; + + auto intermediatePairs = shrd_alloc.alloc(blockThreads); + char * pq_shrd_mem = shrd_alloc.alloc(pq_view.get_shmem_size(blockThreads)); + cg::thread_block g = cg::this_thread_block(); + pq_view.pop(g, intermediatePairs, intermediatePairs + blockThreads, pq_shrd_mem); + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int local_tid = threadIdx.x; + + // Make sure the index is within bounds + if(tid < pop_num_edges) + { + //printf("tid: %d\n", tid); + // unpack the uid to get the patch and edge ids + auto p_e = unpack32(intermediatePairs[local_tid].second); + //printf("32bit p_id:%hu\te_id:%hu\n", p_e.first, p_e.second); + rxmesh::EdgeHandle eh(p_e.first, rxmesh::LocalEdgeT(p_e.second)); + + //use the eh to index into a passed in edge attribute + marked_edges(eh) = true; + } +} diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index 163db23a..7faeafc2 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -84,8 +84,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, EXPECT_TRUE(rx.validate()); using namespace rxmesh; - constexpr uint32_t blockThreads = 32; - + constexpr uint32_t blockThreads = 256; auto coords = rx.get_input_vertex_coordinates(); @@ -103,6 +102,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, PriorityQueue_t pq(rx.get_num_edges()); auto e_attr = rx.add_edge_attribute("eMark", 1); + auto e_pop_attr = rx.add_edge_attribute("ePop", false); #if USE_POLYSCOPE @@ -155,21 +155,30 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, // next kernel needs to pop some percentage of the top // elements in the priority queue and store popped elements // to be used by the next kernel that actually does the collapses - // - // mark some sort of - // associated edge attribute - - // now pop all the elements to ouput on host - // - thrust::device_vector d_popped(rx.get_num_edges()); - pq.pop(d_popped.begin(), d_popped.end()); + + float reduce_ratio = 0.1f; + + // Mark the edge attributes to be flipped + uint32_t pop_num_edges = reduce_ratio * rx.get_num_edges(); + RXMESH_TRACE("pop_num_edges: {}", pop_num_edges); + + constexpr uint32_t threads_per_block = 1024; + uint32_t number_of_blocks = (pop_num_edges + threads_per_block - 1) / threads_per_block; + int shared_mem_bytes = pq.get_shmem_size(threads_per_block) + + (threads_per_block * sizeof(PriorityPair_t)); + RXMESH_TRACE("threads_per_block: {}", threads_per_block); + RXMESH_TRACE("number_of_blocks: {}", number_of_blocks); + RXMESH_TRACE("shared_mem_bytes: {}", shared_mem_bytes); + + pop_and_mark_edges_to_collapse + <<>> + (pq.get_mutable_device_view(), + *e_pop_attr, + pop_num_edges); + cudaDeviceSynchronize(); - const thrust::host_vector h_popped(d_popped); - // for(size_t i = 0; i < h_popped.size(); i++) - // { - // std::cout << i << "\t" << h_popped[i].first - // << "\t" << h_popped[i].second << "\n"; - // } + RXMESH_TRACE("Made it past cudaDeviceSynchronize()"); + return; // compute max-min histogram //histo.init(); From 39053442c0bb67c62691636ffdfdbcbc82b68d48 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Wed, 8 May 2024 12:21:56 -0700 Subject: [PATCH 09/18] Remove dead code --- apps/SECPriority/secp_rxmesh.cuh | 191 +------------------------------ 1 file changed, 1 insertion(+), 190 deletions(-) diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index 7faeafc2..ebe0d4fb 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -85,26 +85,13 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, using namespace rxmesh; constexpr uint32_t blockThreads = 256; - auto coords = rx.get_input_vertex_coordinates(); - - auto edge_status = rx.add_edge_attribute("EdgeStatus", 1); - LaunchBox launch_box; - float total_time = 0; - float app_time = 0; - float slice_time = 0; - float cleanup_time = 0; - - const int num_bins = 256; - PriorityQueue_t pq(rx.get_num_edges()); - auto e_attr = rx.add_edge_attribute("eMark", 1); auto e_pop_attr = rx.add_edge_attribute("ePop", false); - #if USE_POLYSCOPE rx.render_vertex_patch(); rx.render_edge_patch(); @@ -112,19 +99,6 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, // polyscope::show(); #endif - bool validate = false; - - int* d_num_cavities = nullptr; - CUDA_ERROR(cudaMalloc((void**)&d_num_cavities, 2 * sizeof(int))); - CUDA_ERROR(cudaMemset(d_num_cavities, 0, 2 * sizeof(int))); - - float reduce_ratio = 0.05; - - CUDA_ERROR(cudaProfilerStart()); - GPUTimer timer; - timer.start(); - while (rx.get_num_faces() > final_num_faces) { - rx.prepare_launch_box({Op::EV}, launch_box, (void*)compute_edge_priorities, @@ -139,10 +113,6 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, return pq.get_shmem_size(blockThreads) + (e*sizeof(PriorityPair_t)); }); - RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); - RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); - RXMESH_TRACE("pair_alignment(){}", cuco::detail::pair_alignment()); - RXMESH_TRACE("sizeof(PriorityPair_t){}", sizeof(PriorityPair_t)); compute_edge_priorities <<, - // false); - //compute_min_max_cost - // <<>>(rx.get_context(), *coords, histo); - - //// compute histogram bins - //rx.prepare_launch_box({Op::EV}, - // launch_box, - // (void*)populate_histogram, - // false); - //populate_histogram - // <<>>(rx.get_context(), *coords, histo); - - //histo.scan(); - - - // how much we can reduce the number of edge at each iterations - reduce_ratio = reduce_ratio + 0.05; - - // loop over the mesh, and try to collapse - const int reduce_threshold = - std::max(1, int(reduce_ratio * float(rx.get_num_edges()))); - - // reset edge status - edge_status->reset(UNSEEN, DEVICE); - - rx.reset_scheduler(); - while (!rx.is_queue_empty() && rx.get_num_faces() > final_num_faces) { - RXMESH_INFO(" Queue size = {}", - rx.get_context().m_patch_scheduler.size()); - - rx.prepare_launch_box( - {Op::EV}, - launch_box, - (void*)secp, - true, - false, - false, - false, - [&](uint32_t v, uint32_t e, uint32_t f) { - return detail::mask_num_bytes(e) + - 2 * detail::mask_num_bytes(v) + - 3 * ShmemAllocator::default_alignment; - }); - - e_attr->reset(0, DEVICE); - - GPUTimer app_timer; - app_timer.start(); - secp - <<>>(rx.get_context(), - *coords, - //histo, - reduce_threshold, - *edge_status, - *e_attr, - d_num_cavities); - - app_timer.stop(); - - GPUTimer cleanup_timer; - cleanup_timer.start(); - rx.cleanup(); - cleanup_timer.stop(); - - GPUTimer slice_timer; - slice_timer.start(); - rx.slice_patches(*coords, *edge_status, *e_attr); - slice_timer.stop(); - - GPUTimer cleanup_timer2; - cleanup_timer2.start(); - rx.cleanup(); - cleanup_timer2.stop(); - - - CUDA_ERROR(cudaDeviceSynchronize()); - CUDA_ERROR(cudaGetLastError()); - - app_time += app_timer.elapsed_millis(); - slice_time += slice_timer.elapsed_millis(); - cleanup_time += cleanup_timer.elapsed_millis(); - cleanup_time += cleanup_timer2.elapsed_millis(); - - - if (validate) { - rx.update_host(); - EXPECT_TRUE(rx.validate()); - RXMESH_INFO(" num_vertices = {}, num_edges= {}, num_faces= {}", - rx.get_num_vertices(), - rx.get_num_edges(), - rx.get_num_faces()); - } - } - - { - int h_num_cavities[2]; - CUDA_ERROR(cudaMemcpy(&h_num_cavities, - d_num_cavities, - 2 * sizeof(int), - cudaMemcpyDeviceToHost)); - RXMESH_INFO(" Requested cavities = {}, executed cavities = {}", - h_num_cavities[1], - h_num_cavities[0]); - - coords->move(DEVICE, HOST); - e_attr->move(DEVICE, HOST); - rx.update_host(); - rx.update_polyscope(); - auto ps_mesh = rx.get_polyscope_mesh(); - ps_mesh->updateVertexPositions(*coords); - ps_mesh->setEnabled(false); - ps_mesh->addEdgeScalarQuantity("eMark", *e_attr); - rx.render_vertex_patch(); - rx.render_edge_patch(); - rx.render_face_patch(); - polyscope::show(); - } - } - timer.stop(); - total_time += timer.elapsed_millis(); - CUDA_ERROR(cudaProfilerStop()); - - RXMESH_INFO("secp_rxmesh() RXMesh simplification took {} (ms)", total_time); - RXMESH_INFO("secp_rxmesh() App time {} (ms)", app_time); - RXMESH_INFO("secp_rxmesh() Slice timer {} (ms)", slice_time); - RXMESH_INFO("secp_rxmesh() Cleanup timer {} (ms)", cleanup_time); - - if (!validate) { - rx.update_host(); - } - coords->move(DEVICE, HOST); - -#if USE_POLYSCOPE - rx.update_polyscope(); - - auto ps_mesh = rx.get_polyscope_mesh(); - ps_mesh->updateVertexPositions(*coords); - ps_mesh->setEnabled(false); - - rx.render_vertex_patch(); - rx.render_edge_patch(); - rx.render_face_patch(); - polyscope::show(); -#endif - -// histo.free(); } \ No newline at end of file From 352c8e78766a324f43eb0b75c225242ef6e5f262 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Wed, 8 May 2024 17:04:36 -0700 Subject: [PATCH 10/18] Use priority queue to actually simplify --- apps/SECPriority/secp.cu | 23 +-- apps/SECPriority/secp_kernels.cuh | 125 +++------------ apps/SECPriority/secp_rxmesh.cuh | 246 +++++++++++++++++++++++------- 3 files changed, 225 insertions(+), 169 deletions(-) diff --git a/apps/SECPriority/secp.cu b/apps/SECPriority/secp.cu index 1cdd2b2d..cedc1a64 100644 --- a/apps/SECPriority/secp.cu +++ b/apps/SECPriority/secp.cu @@ -5,11 +5,13 @@ #include "rxmesh/rxmesh_dynamic.h" +#include + struct arg { std::string obj_file_name = STRINGIFY(INPUT_DIR) "dragon.obj"; std::string output_folder = STRINGIFY(OUTPUT_DIR); - uint32_t target = 0; + float target = 0.1; uint32_t device_id = 0; char** argv; int argc; @@ -24,18 +26,21 @@ TEST(Apps, SECPriority) // Select device cuda_query(Arg.device_id); - RXMeshDynamic rx(Arg.obj_file_name); - rx.save(STRINGIFY(OUTPUT_DIR) + extract_file_name(Arg.obj_file_name) + - "_patches"); + // RXMeshDynamic rx(Arg.obj_file_name); - // RXMeshDynamic rx(Arg.obj_file_name, - // STRINGIFY(OUTPUT_DIR) + - // extract_file_name(Arg.obj_file_name) + "_patches", - // true); + const std::string p_file = STRINGIFY(OUTPUT_DIR) + + extract_file_name(Arg.obj_file_name) + + "_patches"; + RXMeshDynamic rx(Arg.obj_file_name, p_file); + if (!std::filesystem::exists(p_file)) { + rx.save(p_file); + } ASSERT_TRUE(rx.is_edge_manifold()); - secp_rxmesh(rx, Arg.target); + uint32_t final_num_vertices = Arg.target * rx.get_num_vertices(); + + secp_rxmesh(rx, final_num_vertices); } diff --git a/apps/SECPriority/secp_kernels.cuh b/apps/SECPriority/secp_kernels.cuh index 7ca64516..5807c085 100644 --- a/apps/SECPriority/secp_kernels.cuh +++ b/apps/SECPriority/secp_kernels.cuh @@ -1,5 +1,6 @@ #pragma once #include "rxmesh/cavity_manager.cuh" +#include "../ShortestEdgeCollapse/link_condition.cuh" #include #include @@ -7,11 +8,9 @@ template __global__ static void secp(rxmesh::Context context, rxmesh::VertexAttribute coords, - // const CostHistogram histo, const int reduce_threshold, rxmesh::EdgeAttribute edge_status, - rxmesh::EdgeAttribute e_attr, - int* d_num_cavities) + rxmesh::EdgeAttribute e_pop_attr) { using namespace rxmesh; auto block = cooperative_groups::this_thread_block(); @@ -29,6 +28,7 @@ __global__ static void secp(rxmesh::Context context, // filter them). Then after cavity.prologue, we reuse this bitmask to mark // the newly added edges Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + edge_mask.reset(block); // we use this bitmask to mark the other end of to-be-collapse edge during // checking for the link condition @@ -41,100 +41,26 @@ __global__ static void secp(rxmesh::Context context, ev_query.prologue(block, shrd_alloc); block.sync(); - // 1) mark edge we want to collapse + // 1a) mark edge we want to collapse given e_pop_attr for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { assert(eh.local_id() < cavity.patch_info().num_edges[0]); - if (edge_status(eh) != UNSEEN) { + if (edge_status(eh) != UNSEEN) + { return; } - const VertexIterator iter = - ev_query.template get_iterator(eh.local_id()); - const VertexHandle v0 = iter[0]; - const VertexHandle v1 = iter[1]; - - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - - T len2 = glm::distance2(p0, p1); + if(true == e_pop_attr(eh)) + { + edge_mask.set(eh.local_id(), true); + } - /*if (histo.get_bin(len2) <= reduce_threshold) { - //::atomicAdd(d_num_cavities + 1, 1); - // cavity.create(eh); - edge_mask.set(eh.local_id(), true); - }*/ }); block.sync(); - - // 2) check edge link condition. Here, for each edge marked in edge_mask, - // all threads in the block collaborate to check the edge link condition of - // this edge - __shared__ int s_num_shared_one_ring; - for (uint16_t e = 0; e < edge_mask.size(); ++e) { - - if (edge_mask(e)) { - // the edge two end vertices - const VertexIterator iter = - ev_query.template get_iterator(e); - - const uint16_t v0 = iter.local(0); - const uint16_t v1 = iter.local(1); - - if (threadIdx.x == 0) { - s_num_shared_one_ring = 0; - } - - v0_mask.reset(block); - v1_mask.reset(block); - block.sync(); - - // each thread will be assigned to an edge (including not-owned one) - // and mark in v0_mask/v1_mask if one of its two ends are v0/v1 - for_each_edge( - cavity.patch_info(), - [&](EdgeHandle eh) { - if (eh.local_id() == e) { - return; - } - const VertexIterator v_iter = - ev_query.template get_iterator( - eh.local_id()); - - const uint16_t vv0 = v_iter.local(0); - const uint16_t vv1 = v_iter.local(1); - - - if (vv0 == v0) { - v0_mask.set(vv1, true); - } - if (vv0 == v1) { - v1_mask.set(vv1, true); - } - - if (vv1 == v0) { - v0_mask.set(vv0, true); - } - if (vv1 == v1) { - v1_mask.set(vv0, true); - } - }, - true); - block.sync(); - - for (int v = threadIdx.x; v < v0_mask.size(); v += blockThreads) { - if (v0_mask(v) && v1_mask(v)) { - ::atomicAdd(&s_num_shared_one_ring, 1); - } - } - - block.sync(); - if (s_num_shared_one_ring > 2) { - edge_mask.reset(e, true); - } - } - } + // 2a) check edge link condition. + link_condition(block, cavity.patch_info(), ev_query, + edge_mask, v0_mask, v1_mask); block.sync(); for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { @@ -149,19 +75,7 @@ __global__ static void secp(rxmesh::Context context, ev_query.epilogue(block, shrd_alloc); - // create the cavity - if (cavity.prologue(block, shrd_alloc, coords, edge_status, e_attr)) { - - // if (threadIdx.x == 0) { - // uint16_t num_actual_cavities = 0; - // for (int i = 0; i < cavity.m_s_active_cavity_bitmask.size(); ++i) - // { - // if (cavity.m_s_active_cavity_bitmask(i)) { - // num_actual_cavities++; - // } - // } - // ::atomicAdd(d_num_cavities, num_actual_cavities); - // } + if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { edge_mask.reset(block); block.sync(); @@ -179,16 +93,14 @@ __global__ static void secp(rxmesh::Context context, if (new_v.is_valid()) { - coords(new_v, 0) = (coords(v0, 0) + coords(v1, 0)) * 0.5; - coords(new_v, 1) = (coords(v0, 1) + coords(v1, 1)) * 0.5; - coords(new_v, 2) = (coords(v0, 2) + coords(v1, 2)) * 0.5; + coords(new_v, 0) = (coords(v0, 0) + coords(v1, 0)) * T(0.5); + coords(new_v, 1) = (coords(v0, 1) + coords(v1, 1)) * T(0.5); + coords(new_v, 2) = (coords(v0, 2) + coords(v1, 2)) * T(0.5); DEdgeHandle e0 = cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); - e_attr(e0.get_edge_handle())++; - if (e0.is_valid()) { edge_mask.set(e0.local_id(), true); @@ -226,7 +138,6 @@ __global__ static void secp(rxmesh::Context context, }); } - cavity.epilogue(block); block.sync(); @@ -317,7 +228,7 @@ __global__ static void pop_and_mark_edges_to_collapse( if(tid < pop_num_edges) { //printf("tid: %d\n", tid); - // unpack the uid to get the patch and edge ids + //unpack the uid to get the patch and edge ids auto p_e = unpack32(intermediatePairs[local_tid].second); //printf("32bit p_id:%hu\te_id:%hu\n", p_e.first, p_e.second); rxmesh::EdgeHandle eh(p_e.first, rxmesh::LocalEdgeT(p_e.second)); diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index ebe0d4fb..c5fad43c 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -79,18 +79,23 @@ enum : EdgeStatus inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, - const uint32_t final_num_faces) + const uint32_t final_num_vertices) { EXPECT_TRUE(rx.validate()); using namespace rxmesh; constexpr uint32_t blockThreads = 256; auto coords = rx.get_input_vertex_coordinates(); + auto edge_status = rx.add_edge_attribute("EdgeStatus", 1); LaunchBox launch_box; - PriorityQueue_t pq(rx.get_num_edges()); + float total_time = 0; + float app_time = 0; + float slice_time = 0; + float cleanup_time = 0; - auto e_pop_attr = rx.add_edge_attribute("ePop", false); + + auto e_pop_attr = rx.add_edge_attribute("ePop", 1); #if USE_POLYSCOPE rx.render_vertex_patch(); @@ -99,54 +104,189 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, // polyscope::show(); #endif - rx.prepare_launch_box({Op::EV}, - launch_box, - (void*)compute_edge_priorities, - false, - false, - false, - false, - [&](uint32_t v, uint32_t e, uint32_t f){ - // Allocate enough additional memory - // for the priority queue and the intermediate - // array of PriorityPait_t. - return pq.get_shmem_size(blockThreads) + (e*sizeof(PriorityPair_t)); - }); - - compute_edge_priorities - <<>>(rx.get_context(), *coords, pq.get_mutable_device_view(), pq.get_shmem_size(blockThreads)); - - cudaDeviceSynchronize(); - RXMESH_TRACE("launch_box.smem_bytes_dyn = {}", launch_box.smem_bytes_dyn); - RXMESH_TRACE("pq.get_shmem_size = {}", pq.get_shmem_size(blockThreads)); - - // next kernel needs to pop some percentage of the top - // elements in the priority queue and store popped elements - // to be used by the next kernel that actually does the collapses - - float reduce_ratio = 0.1f; - - // Mark the edge attributes to be collapsed - uint32_t pop_num_edges = reduce_ratio * rx.get_num_edges(); - RXMESH_TRACE("pop_num_edges: {}", pop_num_edges); - - constexpr uint32_t threads_per_block = 1024; - uint32_t number_of_blocks = (pop_num_edges + threads_per_block - 1) / threads_per_block; - int shared_mem_bytes = pq.get_shmem_size(threads_per_block) + - (threads_per_block * sizeof(PriorityPair_t)); - RXMESH_TRACE("threads_per_block: {}", threads_per_block); - RXMESH_TRACE("number_of_blocks: {}", number_of_blocks); - RXMESH_TRACE("shared_mem_bytes: {}", shared_mem_bytes); - - pop_and_mark_edges_to_collapse - <<>> - (pq.get_mutable_device_view(), - *e_pop_attr, - pop_num_edges); - - cudaDeviceSynchronize(); - RXMESH_TRACE("Made it past cudaDeviceSynchronize()"); - + bool validate = false; + + CUDA_ERROR(cudaProfilerStart()); + GPUTimer timer; + timer.start(); + while(rx.get_num_vertices(true) > final_num_vertices) + { + // rebuild every round? + PriorityQueue_t pq(rx.get_num_edges()); + e_pop_attr->reset(DEVICE, false); + + //rx.prepare_launch_box( + rx.update_launch_box( + {Op::EV}, + launch_box, + (void*)compute_edge_priorities, + false, false, false, false, + [&](uint32_t v, uint32_t e, uint32_t f){ + // Allocate enough additional memory + // for the priority queue and the intermediate + // array of PriorityPair_t. + return pq.get_shmem_size(blockThreads) + (e*sizeof(PriorityPair_t)); + } + ); + + compute_edge_priorities + <<>>( rx.get_context(), *coords, pq.get_mutable_device_view(), pq.get_shmem_size(blockThreads)); + cudaDeviceSynchronize(); + //RXMESH_TRACE("launch_box.smem_bytes_dyn = {}", launch_box.smem_bytes_dyn); + //RXMESH_TRACE("pq.get_shmem_size = {}", pq.get_shmem_size(blockThreads)); + + // Next kernel needs to pop some percentage of the top + // elements in the priority queue and store popped elements + // to be used by the next kernel that actually does the collapses + + float reduce_ratio = 0.1f; + const int num_edges_before = int(rx.get_num_edges()); + const int reduce_threshold = + std::max(1, int(reduce_ratio * float(num_edges_before))); + // Mark the edge attributes to be collapsed + uint32_t pop_num_edges = reduce_threshold; //reduce_ratio * rx.get_num_edges(); + //RXMESH_TRACE("pop_num_edges: {}", pop_num_edges); + + constexpr uint32_t threads_per_block = 32; + uint32_t number_of_blocks = (pop_num_edges + threads_per_block - 1) / threads_per_block; + int shared_mem_bytes = pq.get_shmem_size(threads_per_block) + + (threads_per_block * sizeof(PriorityPair_t)); + //RXMESH_TRACE("threads_per_block: {}", threads_per_block); + //RXMESH_TRACE("number_of_blocks: {}", number_of_blocks); + //RXMESH_TRACE("shared_mem_bytes: {}", shared_mem_bytes); + + pop_and_mark_edges_to_collapse + <<>> + (pq.get_mutable_device_view(), + *e_pop_attr, + pop_num_edges); + + CUDA_ERROR(cudaDeviceSynchronize()); + CUDA_ERROR(cudaGetLastError()); + //RXMESH_TRACE("Made it past cudaDeviceSynchronize()"); + + // loop over the mesh, and try to collapse + // reset edge status + edge_status->reset(UNSEEN, DEVICE); + + rx.reset_scheduler(); + while(!rx.is_queue_empty() && + rx.get_num_vertices(true) > final_num_vertices) + { + + RXMESH_INFO(" Queue size = {}", + rx.get_context().m_patch_scheduler.size()); + + //rx.prepare_launch_box( + rx.update_launch_box( + {Op::EV}, + launch_box, + (void*)secp, + true, false, false, false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return detail::mask_num_bytes(e) + + 2 * detail::mask_num_bytes(v) + + 3 * ShmemAllocator::default_alignment; + } + ); + + GPUTimer app_timer; + app_timer.start(); + secp + <<>>(rx.get_context(), + *coords, + reduce_threshold, + *edge_status, + *e_pop_attr); + // should we cudaDeviceSyn here? stopping timers too soon? + //CUDA_ERROR(cudaDeviceSynchronize()); + //CUDA_ERROR(cudaGetLastError()); + + app_timer.stop(); + + GPUTimer cleanup_timer; + cleanup_timer.start(); + rx.cleanup(); + cleanup_timer.stop(); + + GPUTimer slice_timer; + slice_timer.start(); + rx.slice_patches(*coords, *edge_status); + slice_timer.stop(); + + GPUTimer cleanup_timer2; + cleanup_timer2.start(); + rx.cleanup(); + cleanup_timer2.stop(); + + + CUDA_ERROR(cudaDeviceSynchronize()); + CUDA_ERROR(cudaGetLastError()); + + app_time += app_timer.elapsed_millis(); + slice_time += slice_timer.elapsed_millis(); + cleanup_time += cleanup_timer.elapsed_millis(); + cleanup_time += cleanup_timer2.elapsed_millis(); + + if (validate) { + rx.update_host(); + EXPECT_TRUE(rx.validate()); + } + } + + if (false) { + + RXMESH_INFO("#Vertices {}", rx.get_num_vertices(true)); + RXMESH_INFO("#Edges {}", rx.get_num_edges(true)); + RXMESH_INFO("#Faces {}", rx.get_num_faces(true)); + RXMESH_INFO("#Patches {}", rx.get_num_patches(true)); + RXMESH_INFO("request reduction = {}, achieved reduction= {}", + reduce_threshold, + num_edges_before - int(rx.get_num_edges(true))); + + if (false) { + rx.update_host(); + coords->move(DEVICE, HOST); + rx.update_polyscope(); + auto ps_mesh = rx.get_polyscope_mesh(); + ps_mesh->updateVertexPositions(*coords); + ps_mesh->setEnabled(false); + // rx.render_vertex_patch(); + // rx.render_edge_patch(); + // rx.render_face_patch(); + + polyscope::show(); + } + } + } + timer.stop(); + total_time += timer.elapsed_millis(); + CUDA_ERROR(cudaProfilerStop()); + + RXMESH_INFO("secp_rxmesh() RXMesh simplification took {} (ms)", total_time); + RXMESH_INFO("secp_rxmesh() App time {} (ms)", app_time); + RXMESH_INFO("secp_rxmesh() Slice timer {} (ms)", slice_time); + RXMESH_INFO("secp_rxmesh() Cleanup timer {} (ms)", cleanup_time); + + if (!validate) { + rx.update_host(); + } + coords->move(DEVICE, HOST); + +#if USE_POLYSCOPE + rx.update_polyscope(); + + auto ps_mesh = rx.get_polyscope_mesh(); + ps_mesh->updateVertexPositions(*coords); + ps_mesh->setEnabled(false); + + rx.render_vertex_patch(); + rx.render_edge_patch(); + rx.render_face_patch(); + polyscope::show(); +#endif } \ No newline at end of file From 35fea4417b6a59e5a161f5fa15813e5109eff373 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Thu, 9 May 2024 00:19:09 -0700 Subject: [PATCH 11/18] Update to match histogram and add reporting --- apps/SECPriority/secp.cu | 8 +- apps/SECPriority/secp_kernels.cuh | 22 +---- apps/SECPriority/secp_rxmesh.cuh | 145 ++++++++++++++++++------------ 3 files changed, 98 insertions(+), 77 deletions(-) diff --git a/apps/SECPriority/secp.cu b/apps/SECPriority/secp.cu index cedc1a64..7a115746 100644 --- a/apps/SECPriority/secp.cu +++ b/apps/SECPriority/secp.cu @@ -3,8 +3,6 @@ #include "rxmesh/util/macros.h" #include "rxmesh/util/util.h" -#include "rxmesh/rxmesh_dynamic.h" - #include struct arg @@ -38,6 +36,8 @@ TEST(Apps, SECPriority) ASSERT_TRUE(rx.is_edge_manifold()); + ASSERT_TRUE(rx.is_closed()); + uint32_t final_num_vertices = Arg.target * rx.get_num_vertices(); secp_rxmesh(rx, final_num_vertices); @@ -62,7 +62,7 @@ int main(int argc, char** argv) " -input: Input file. Input file should be under the input/ subdirectory\n" " Default is {} \n" " Hint: Only accept OBJ files\n" - " -target: The final/target number of faces in the output mesh\n" + " -target: The fraction of output #vertices from the input\n" " -o: JSON file output folder. Default is {} \n" " -device_id: GPU device ID. Default is {}", Arg.obj_file_name, Arg.output_folder, Arg.device_id); @@ -83,7 +83,7 @@ int main(int argc, char** argv) atoi(get_cmd_option(argv, argv + argc, "-device_id")); } if (cmd_option_exists(argv, argc + argv, "-target")) { - Arg.target = false; + Arg.target = atof(get_cmd_option(argv, argv + argc, "-target")); } } diff --git a/apps/SECPriority/secp_kernels.cuh b/apps/SECPriority/secp_kernels.cuh index 5807c085..bd05b62d 100644 --- a/apps/SECPriority/secp_kernels.cuh +++ b/apps/SECPriority/secp_kernels.cuh @@ -9,7 +9,6 @@ template __global__ static void secp(rxmesh::Context context, rxmesh::VertexAttribute coords, const int reduce_threshold, - rxmesh::EdgeAttribute edge_status, rxmesh::EdgeAttribute e_pop_attr) { using namespace rxmesh; @@ -45,11 +44,7 @@ __global__ static void secp(rxmesh::Context context, for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { assert(eh.local_id() < cavity.patch_info().num_edges[0]); - if (edge_status(eh) != UNSEEN) - { - return; - } - + //edge_mask.set(eh.local_id(), e_pop_attr(eh)); if(true == e_pop_attr(eh)) { edge_mask.set(eh.local_id(), true); @@ -67,15 +62,14 @@ __global__ static void secp(rxmesh::Context context, assert(eh.local_id() < cavity.patch_info().num_edges[0]); if (edge_mask(eh.local_id())) { cavity.create(eh); - } else { - edge_status(eh) = OKAY; - } + } }); block.sync(); ev_query.epilogue(block, shrd_alloc); - if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { + // create the cavity + if (cavity.prologue(block, shrd_alloc, coords)) { edge_mask.reset(block); block.sync(); @@ -140,14 +134,6 @@ __global__ static void secp(rxmesh::Context context, cavity.epilogue(block); block.sync(); - - if (cavity.is_successful()) { - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (edge_mask(eh.local_id())) { - edge_status(eh) = ADDED; - } - }); - } } //template diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index c5fad43c..4b7cb523 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -66,17 +66,9 @@ using PQView_t = PriorityQueue_t::device_mutable_view; template using Vec3 = glm::vec<3, T, glm::defaultp>; -using EdgeStatus = int8_t; -enum : EdgeStatus -{ - UNSEEN = 0, // means we have not tested it before for e.g., split/flip/col - OKAY = 1, // means we have tested it and it is okay to skip - UPDATE = 2, // means we should update it i.e., we have tested it before - ADDED = 3, // means it has been added to during the split/flip/collapse -}; - #include "secp_kernels.cuh" +#include "rxmesh/util/report.h" inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, const uint32_t final_num_vertices) @@ -85,18 +77,39 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, using namespace rxmesh; constexpr uint32_t blockThreads = 256; + + rxmesh::Report report("SECP_RXMesh"); + report.command_line(Arg.argc, Arg.argv); + report.device(); + report.system(); + report.model_data(Arg.obj_file_name + "_before", rx, "model_before"); + report.add_member("method", std::string("RXMesh")); + report.add_member("blockThreads", blockThreads); + auto coords = rx.get_input_vertex_coordinates(); - auto edge_status = rx.add_edge_attribute("EdgeStatus", 1); + LaunchBox launch_box; float total_time = 0; float app_time = 0; float slice_time = 0; float cleanup_time = 0; - + float pq_time = 0; + float pop_mark_time = 0; + float e_priority_time = 0; auto e_pop_attr = rx.add_edge_attribute("ePop", 1); + RXMESH_INFO("#Vertices {}", rx.get_num_vertices()); + RXMESH_INFO("#Edges {}", rx.get_num_edges()); + RXMESH_INFO("#Faces {}", rx.get_num_faces()); + RXMESH_INFO("#Patches {}", rx.get_num_patches()); + + size_t max_smem_bytes_dyn = 0; + size_t max_smem_bytes_static = 0; + uint32_t max_num_registers_per_thread = 0; + uint32_t max_num_blocks = 0; + #if USE_POLYSCOPE rx.render_vertex_patch(); rx.render_edge_patch(); @@ -106,12 +119,19 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, bool validate = false; + int num_passes = 0; + CUDA_ERROR(cudaProfilerStart()); GPUTimer timer; timer.start(); while(rx.get_num_vertices(true) > final_num_vertices) { - // rebuild every round? + ++num_passes; + + GPUTimer pq_timer; + pq_timer.start(); + + // rebuild every round? Not necessarily a great way to use a pq. PriorityQueue_t pq(rx.get_num_edges()); e_pop_attr->reset(DEVICE, false); @@ -129,11 +149,15 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, } ); + GPUTimer edge_priorities_timer; + edge_priorities_timer.start(); compute_edge_priorities <<>>( rx.get_context(), *coords, pq.get_mutable_device_view(), pq.get_shmem_size(blockThreads)); - cudaDeviceSynchronize(); + edge_priorities_timer.stop(); + e_priority_time += edge_priorities_timer.elapsed_millis(); + //cudaDeviceSynchronize(); //RXMESH_TRACE("launch_box.smem_bytes_dyn = {}", launch_box.smem_bytes_dyn); //RXMESH_TRACE("pq.get_shmem_size = {}", pq.get_shmem_size(blockThreads)); @@ -149,7 +173,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, uint32_t pop_num_edges = reduce_threshold; //reduce_ratio * rx.get_num_edges(); //RXMESH_TRACE("pop_num_edges: {}", pop_num_edges); - constexpr uint32_t threads_per_block = 32; + constexpr uint32_t threads_per_block = 256; uint32_t number_of_blocks = (pop_num_edges + threads_per_block - 1) / threads_per_block; int shared_mem_bytes = pq.get_shmem_size(threads_per_block) + (threads_per_block * sizeof(PriorityPair_t)); @@ -157,6 +181,8 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, //RXMESH_TRACE("number_of_blocks: {}", number_of_blocks); //RXMESH_TRACE("shared_mem_bytes: {}", shared_mem_bytes); + GPUTimer pop_mark_timer; + pop_mark_timer.start(); pop_and_mark_edges_to_collapse <<>> (pq.get_mutable_device_view(), @@ -165,19 +191,22 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, CUDA_ERROR(cudaDeviceSynchronize()); CUDA_ERROR(cudaGetLastError()); - //RXMESH_TRACE("Made it past cudaDeviceSynchronize()"); + pop_mark_timer.stop(); + pop_mark_time += pop_mark_timer.elapsed_millis(); + + pq_timer.stop(); + + pq_time += pq_timer.elapsed_millis(); // loop over the mesh, and try to collapse - // reset edge status - edge_status->reset(UNSEEN, DEVICE); rx.reset_scheduler(); while(!rx.is_queue_empty() && rx.get_num_vertices(true) > final_num_vertices) { - RXMESH_INFO(" Queue size = {}", - rx.get_context().m_patch_scheduler.size()); + //RXMESH_INFO(" Queue size = {}", + // rx.get_context().m_patch_scheduler.size()); //rx.prepare_launch_box( rx.update_launch_box( @@ -192,7 +221,17 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, } ); + max_smem_bytes_dyn = + std::max(max_smem_bytes_dyn, launch_box.smem_bytes_dyn); + max_smem_bytes_static = + std::max(max_smem_bytes_static, launch_box.smem_bytes_static); + max_num_registers_per_thread = + std::max(max_num_registers_per_thread, + launch_box.num_registers_per_thread); + max_num_blocks = + std::max(max_num_blocks, DIVIDE_UP(launch_box.blocks, 8)); GPUTimer app_timer; + app_timer.start(); secp <<>>(rx.get_context(), *coords, reduce_threshold, - *edge_status, *e_pop_attr); // should we cudaDeviceSyn here? stopping timers too soon? //CUDA_ERROR(cudaDeviceSynchronize()); @@ -215,7 +253,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, GPUTimer slice_timer; slice_timer.start(); - rx.slice_patches(*coords, *edge_status); + rx.slice_patches(*coords); slice_timer.stop(); GPUTimer cleanup_timer2; @@ -231,52 +269,46 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, slice_time += slice_timer.elapsed_millis(); cleanup_time += cleanup_timer.elapsed_millis(); cleanup_time += cleanup_timer2.elapsed_millis(); - - if (validate) { - rx.update_host(); - EXPECT_TRUE(rx.validate()); - } - } - - if (false) { - - RXMESH_INFO("#Vertices {}", rx.get_num_vertices(true)); - RXMESH_INFO("#Edges {}", rx.get_num_edges(true)); - RXMESH_INFO("#Faces {}", rx.get_num_faces(true)); - RXMESH_INFO("#Patches {}", rx.get_num_patches(true)); - RXMESH_INFO("request reduction = {}, achieved reduction= {}", - reduce_threshold, - num_edges_before - int(rx.get_num_edges(true))); - - if (false) { - rx.update_host(); - coords->move(DEVICE, HOST); - rx.update_polyscope(); - auto ps_mesh = rx.get_polyscope_mesh(); - ps_mesh->updateVertexPositions(*coords); - ps_mesh->setEnabled(false); - // rx.render_vertex_patch(); - // rx.render_edge_patch(); - // rx.render_face_patch(); - - polyscope::show(); - } } } timer.stop(); total_time += timer.elapsed_millis(); CUDA_ERROR(cudaProfilerStop()); - RXMESH_INFO("secp_rxmesh() RXMesh simplification took {} (ms)", total_time); + RXMESH_INFO("secp_rxmesh() RXMesh SEC took {} (ms), num_passes= {}", + total_time, + num_passes); + RXMESH_INFO("secp_rxmesh() PriorityQ time {} (ms)", pq_time); + RXMESH_INFO("secp_rxmesh() |-Edge priorities time {} (ms)", e_priority_time); + RXMESH_INFO("secp_rxmesh() |-Pop and Mark time {} (ms)", pop_mark_time); RXMESH_INFO("secp_rxmesh() App time {} (ms)", app_time); RXMESH_INFO("secp_rxmesh() Slice timer {} (ms)", slice_time); RXMESH_INFO("secp_rxmesh() Cleanup timer {} (ms)", cleanup_time); - if (!validate) { - rx.update_host(); - } + RXMESH_INFO("#Vertices {}", rx.get_num_vertices(true)); + RXMESH_INFO("#Edges {}", rx.get_num_edges(true)); + RXMESH_INFO("#Faces {}", rx.get_num_faces(true)); + RXMESH_INFO("#Patches {}", rx.get_num_patches(true)); + + + rx.update_host(); + coords->move(DEVICE, HOST); + report.add_member("num_passes", num_passes); + report.add_member("max_smem_bytes_dyn", max_smem_bytes_dyn); + report.add_member("max_smem_bytes_static", max_smem_bytes_static); + report.add_member("max_num_registers_per_thread", + max_num_registers_per_thread); + report.add_member("max_num_blocks", max_num_blocks); + report.add_member("secs_remesh_time", total_time); + report.add_member("priority_queue_time", pq_time); + report.add_member("app_time", app_time); + report.add_member("slice_time", slice_time); + report.add_member("cleanup_time", cleanup_time); + report.add_member("attributes_memory_mg", coords->get_memory_mg()); + report.model_data(Arg.obj_file_name + "_after", rx, "model_after"); + #if USE_POLYSCOPE rx.update_polyscope(); @@ -289,4 +321,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, rx.render_face_patch(); polyscope::show(); #endif + + report.write(Arg.output_folder + "/rxmesh_secp", + "SECP_RXMesh_" + extract_file_name(Arg.obj_file_name)); } \ No newline at end of file From 7a0bf73f0a5c784311ce4b4f80d324390116c7ea Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Tue, 14 May 2024 13:34:06 -0700 Subject: [PATCH 12/18] Use DIVIDE_UP for number of kernel blocks --- apps/SECPriority/secp_rxmesh.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index 4b7cb523..551e09c2 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -234,7 +234,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, app_timer.start(); secp - <<>>(rx.get_context(), *coords, From 18925fc36c47a9c7afbab22286f9188af86acf17 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Tue, 14 May 2024 16:17:12 -0700 Subject: [PATCH 13/18] Fix edge attr reset bug that swapped value with device --- apps/SECPriority/secp_rxmesh.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index 551e09c2..9540cd62 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -133,7 +133,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, // rebuild every round? Not necessarily a great way to use a pq. PriorityQueue_t pq(rx.get_num_edges()); - e_pop_attr->reset(DEVICE, false); + e_pop_attr->reset(false, DEVICE); //rx.prepare_launch_box( rx.update_launch_box( From 3cae5d650e5f44ca8296e70ae1f188634b95f023 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Tue, 14 May 2024 17:51:09 -0700 Subject: [PATCH 14/18] Add debug function to view edges to be collaped --- apps/SECPriority/secp_rxmesh.cuh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index 9540cd62..2dd09059 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -70,6 +70,34 @@ using Vec3 = glm::vec<3, T, glm::defaultp>; #include "rxmesh/util/report.h" +template +void render_edge_attr(rxmesh::RXMeshDynamic& rx, + const std::shared_ptr>& edge_attr) +{ + using namespace rxmesh; + //make sure the attribute is on the HOST + edge_attr->move(DEVICE, HOST); + + std::vector edgeColors(rx.get_num_edges()); + rx.for_each_edge(HOST, + [&](EdgeHandle eh) { + if(true == (*edge_attr)(eh)) + { + //save a red color + edgeColors[rx.linear_id(eh)] = 1.0f; + } + else + { + //save a black color + edgeColors[rx.linear_id(eh)] = 0.0f; + } + }); + + auto ps_mesh = rx.get_polyscope_mesh(); + auto edge_colors = ps_mesh->addEdgeScalarQuantity("Edges to Collapse", edgeColors); + edge_colors->setEnabled(true); +} + inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, const uint32_t final_num_vertices) { @@ -189,6 +217,10 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, *e_pop_attr, pop_num_edges); + // if(num_passes == 1) + // { + // render_edge_attr(rx, e_pop_attr); + // } CUDA_ERROR(cudaDeviceSynchronize()); CUDA_ERROR(cudaGetLastError()); pop_mark_timer.stop(); From 23145d84fecfcd3977b9acf22efeff6aa6d1b888 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Wed, 15 May 2024 11:29:28 -0700 Subject: [PATCH 15/18] Add a bash script to test a range of target vertex counts --- sweep_test.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 sweep_test.sh diff --git a/sweep_test.sh b/sweep_test.sh new file mode 100755 index 00000000..6d7ee81e --- /dev/null +++ b/sweep_test.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Start value +start=0.001 +# End value +end=0.9 +# Step value +step=0.031 + +# Command path +command="./build/bin/SECPriority" +# Input file +input_file="./input/rocker-arm.obj" + +# Loop through the range +for target in $(seq $start $step $end) +do + echo "Running with target = $target" + $command -input $input_file -target $target +done + From 452bb39f6b83fa44430184fcf8e66ce81e8ed222 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Thu, 16 May 2024 10:22:09 -0700 Subject: [PATCH 16/18] Render edges to collapse with patch boundaries --- apps/SECPriority/secp_rxmesh.cuh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index 2dd09059..98e48ca4 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -83,13 +83,11 @@ void render_edge_attr(rxmesh::RXMeshDynamic& rx, [&](EdgeHandle eh) { if(true == (*edge_attr)(eh)) { - //save a red color - edgeColors[rx.linear_id(eh)] = 1.0f; + edgeColors[rx.linear_id(eh)] = 200.0f; } else { - //save a black color - edgeColors[rx.linear_id(eh)] = 0.0f; + edgeColors[rx.linear_id(eh)] = eh.patch_id(); } }); @@ -333,7 +331,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, report.add_member("max_num_registers_per_thread", max_num_registers_per_thread); report.add_member("max_num_blocks", max_num_blocks); - report.add_member("secs_remesh_time", total_time); + report.add_member("secp_remesh_time", total_time); report.add_member("priority_queue_time", pq_time); report.add_member("app_time", app_time); report.add_member("slice_time", slice_time); From b6e4cb16fefa66abead58035e1dc329ba2aaba18 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Thu, 16 May 2024 11:58:16 -0700 Subject: [PATCH 17/18] Add edgefrac(tion) to collapse per round --- apps/SECPriority/secp.cu | 8 +++++++- apps/SECPriority/secp_rxmesh.cuh | 5 +++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/apps/SECPriority/secp.cu b/apps/SECPriority/secp.cu index 7a115746..8da2cfe8 100644 --- a/apps/SECPriority/secp.cu +++ b/apps/SECPriority/secp.cu @@ -10,6 +10,7 @@ struct arg std::string obj_file_name = STRINGIFY(INPUT_DIR) "dragon.obj"; std::string output_folder = STRINGIFY(OUTPUT_DIR); float target = 0.1; + float edgefrac = 0.1; uint32_t device_id = 0; char** argv; int argc; @@ -40,7 +41,7 @@ TEST(Apps, SECPriority) uint32_t final_num_vertices = Arg.target * rx.get_num_vertices(); - secp_rxmesh(rx, final_num_vertices); + secp_rxmesh(rx, final_num_vertices, Arg.edgefrac); } @@ -63,6 +64,7 @@ int main(int argc, char** argv) " Default is {} \n" " Hint: Only accept OBJ files\n" " -target: The fraction of output #vertices from the input\n" + " -edgefrac: The fraction of edges to collapse in a round\n" " -o: JSON file output folder. Default is {} \n" " -device_id: GPU device ID. Default is {}", Arg.obj_file_name, Arg.output_folder, Arg.device_id); @@ -85,12 +87,16 @@ int main(int argc, char** argv) if (cmd_option_exists(argv, argc + argv, "-target")) { Arg.target = atof(get_cmd_option(argv, argv + argc, "-target")); } + if (cmd_option_exists(argv, argc + argv, "-edgefrac")) { + Arg.edgefrac = atof(get_cmd_option(argv, argv + argc, "-edgefrac")); + } } RXMESH_TRACE("input= {}", Arg.obj_file_name); RXMESH_TRACE("output_folder= {}", Arg.output_folder); RXMESH_TRACE("device_id= {}", Arg.device_id); RXMESH_TRACE("target= {}", Arg.target); + RXMESH_TRACE("edgefrac= {}", Arg.edgefrac); return RUN_ALL_TESTS(); } \ No newline at end of file diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh index 98e48ca4..76270d97 100644 --- a/apps/SECPriority/secp_rxmesh.cuh +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -97,7 +97,8 @@ void render_edge_attr(rxmesh::RXMeshDynamic& rx, } inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, - const uint32_t final_num_vertices) + const uint32_t final_num_vertices, + const float edge_reduce_ratio) { EXPECT_TRUE(rx.validate()); @@ -191,7 +192,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, // elements in the priority queue and store popped elements // to be used by the next kernel that actually does the collapses - float reduce_ratio = 0.1f; + float reduce_ratio = edge_reduce_ratio; const int num_edges_before = int(rx.get_num_edges()); const int reduce_threshold = std::max(1, int(reduce_ratio * float(num_edges_before))); From 024a11bd15e0fa7025b442000665c8c51b57d448 Mon Sep 17 00:00:00 2001 From: "Serban D. Porumbescu" Date: Thu, 16 May 2024 11:58:40 -0700 Subject: [PATCH 18/18] Add bash script to sweep edgefrac(tion) --- sweep_edgefrac_test.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 sweep_edgefrac_test.sh diff --git a/sweep_edgefrac_test.sh b/sweep_edgefrac_test.sh new file mode 100755 index 00000000..9ffb5195 --- /dev/null +++ b/sweep_edgefrac_test.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Start value +start=0.001 +# End value +end=0.9 +# Step value +step=0.031 + +# Command path +command="./build/bin/SECPriority" +# Input file +input_file="./input/rocker-arm.obj" + +# Loop through the range +for target in $(seq $start $step $end) +do + echo "Running with edgefrac = $target" + $command -input $input_file -edgefrac $target +done +