From e92a24bbe8a63e2a27a51bc83d6b12a6bba8a223 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 26 May 2022 13:52:38 -0400 Subject: [PATCH 1/6] Update cuco git tag --- cpp/cmake/thirdparty/get_cucollections.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 332b0d9dc96..7cb4f02248a 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -22,7 +22,7 @@ function(find_and_configure_cucollections) GLOBAL_TARGETS cuco::cuco BUILD_EXPORT_SET cudf-exports CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections - GIT_TAG 8b15f06f38d034e815bc72045ca3403787f75e07 + GIT_TAG 917f1e5aea1e748e4fd54b18355e4dafd2e723d6 EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS} OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) From cc2dfb952b8e0c5368d93dd91e129e2a8f86bfd6 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 26 May 2022 15:51:58 -0400 Subject: [PATCH 2/6] Use sentinel wrapper --- cpp/src/io/parquet/chunk_dict.cu | 15 ++++++++++----- cpp/src/join/mixed_join_semi.cu | 8 ++++---- cpp/src/join/semi_join.cu | 4 ++-- cpp/src/stream_compaction/distinct.cu | 4 ++-- cpp/src/stream_compaction/distinct_count.cu | 4 ++-- cpp/src/text/subword/load_merges_file.cu | 6 +++--- 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu index 93e76a6ac23..3b66d4bcd0e 100644 --- a/cpp/src/io/parquet/chunk_dict.cu +++ b/cpp/src/io/parquet/chunk_dict.cu @@ -125,8 +125,11 @@ __global__ void __launch_bounds__(block_size) column_device_view const& data_col = *col->leaf_column; // Make a view of the hash map - auto hash_map_mutable = map_type::device_mutable_view( - chunk->dict_map_slots, chunk->dict_map_size, KEY_SENTINEL, VALUE_SENTINEL); + auto hash_map_mutable = + map_type::device_mutable_view(chunk->dict_map_slots, + chunk->dict_map_size, + cuco::sentinel::empty_key{KEY_SENTINEL}, + cuco::sentinel::empty_value{VALUE_SENTINEL}); __shared__ size_type total_num_dict_entries; size_type val_idx = s_start_value_idx + t; @@ -184,9 +187,11 @@ __global__ void __launch_bounds__(block_size) auto& chunk = chunks[blockIdx.x]; if (not chunk.use_dictionary) { return; } - auto t = threadIdx.x; - auto map = - map_type::device_view(chunk.dict_map_slots, chunk.dict_map_size, KEY_SENTINEL, VALUE_SENTINEL); + auto t = threadIdx.x; + auto map = map_type::device_view(chunk.dict_map_slots, + chunk.dict_map_size, + cuco::sentinel::empty_key{KEY_SENTINEL}, + cuco::sentinel::empty_value{VALUE_SENTINEL}); __shared__ cuda::atomic counter; using cuda::std::memory_order_relaxed; diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index 13a1f1a0ce2..fbaabd08684 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -159,8 +159,8 @@ std::unique_ptr> mixed_join_semi( cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls}; semi_map_type hash_table{compute_hash_table_size(build.num_rows()), - std::numeric_limits::max(), - cudf::detail::JoinNoneValue, + cuco::sentinel::empty_key{std::numeric_limits::max()}, + cuco::sentinel::empty_value{cudf::detail::JoinNoneValue}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; @@ -397,8 +397,8 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls}; semi_map_type hash_table{compute_hash_table_size(build.num_rows()), - std::numeric_limits::max(), - cudf::detail::JoinNoneValue, + cuco::sentinel::empty_key{std::numeric_limits::max()}, + cuco::sentinel::empty_value{cudf::detail::JoinNoneValue}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index b7b33000707..cf29f486112 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -91,8 +91,8 @@ std::unique_ptr> left_semi_anti_join( // Create hash table. semi_map_type hash_table{compute_hash_table_size(right_num_rows), - std::numeric_limits::max(), - cudf::detail::JoinNoneValue, + cuco::sentinel::empty_key{std::numeric_limits::max()}, + cuco::sentinel::empty_value{cudf::detail::JoinNoneValue}, hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index d698c547a61..b815c0559b8 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -63,8 +63,8 @@ std::unique_ptr distinct(table_view const& input, auto const num_rows{keys_view.num_rows()}; hash_map_type key_map{compute_hash_table_size(num_rows), - COMPACTION_EMPTY_KEY_SENTINEL, - COMPACTION_EMPTY_VALUE_SENTINEL, + cuco::sentinel::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, + cuco::sentinel::empty_value{COMPACTION_EMPTY_VALUE_SENTINEL}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 7ccc61f304b..96fcd8b53fc 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -130,8 +130,8 @@ cudf::size_type distinct_count(table_view const& keys, auto const has_null = nullate::DYNAMIC{cudf::has_nulls(keys)}; hash_map_type key_map{compute_hash_table_size(num_rows), - COMPACTION_EMPTY_KEY_SENTINEL, - COMPACTION_EMPTY_VALUE_SENTINEL, + cuco::sentinel::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, + cuco::sentinel::empty_value{COMPACTION_EMPTY_VALUE_SENTINEL}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu index 1e0c9c81fcd..68c4b9faf6e 100644 --- a/cpp/src/text/subword/load_merges_file.cu +++ b/cpp/src/text/subword/load_merges_file.cu @@ -106,9 +106,9 @@ std::unique_ptr initialize_merge_pairs_map( // Ensure capacity is at least (size/0.7) as documented here: // https://github.com/NVIDIA/cuCollections/blob/6ec8b6dcdeceea07ab4456d32461a05c18864411/include/cuco/static_map.cuh#L179-L182 auto merge_pairs_map = std::make_unique( - static_cast(input.size() * 2), // capacity is 2x; - std::numeric_limits::max(), // empty key; - -1, // empty value is not used + static_cast(input.size() * 2), // capacity is 2x; + cuco::sentinel::empty_key{std::numeric_limits::max()}, + cuco::sentinel::empty_value{-1}, // empty value is not used hash_table_allocator_type{default_allocator{}, stream}, stream.value()); From a8539a8dcbcf579e0b4682171d647e43af098d0d Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 26 May 2022 15:53:13 -0400 Subject: [PATCH 3/6] [test only] --- cpp/cmake/thirdparty/get_cucollections.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 7cb4f02248a..2c41f2bee7e 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -21,8 +21,8 @@ function(find_and_configure_cucollections) cuco 0.0.1 GLOBAL_TARGETS cuco::cuco BUILD_EXPORT_SET cudf-exports - CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections - GIT_TAG 917f1e5aea1e748e4fd54b18355e4dafd2e723d6 + CPM_ARGS GITHUB_REPOSITORY PointKernel/cuCollections + GIT_TAG add-view-ctors EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS} OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) From c1fbdd1b035220b9340b97dd27e8d93331823364 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 3 Jun 2022 12:04:49 -0400 Subject: [PATCH 4/6] Update cuco git tag --- cpp/cmake/thirdparty/get_cucollections.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 2c41f2bee7e..fa9744ccb38 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -21,8 +21,8 @@ function(find_and_configure_cucollections) cuco 0.0.1 GLOBAL_TARGETS cuco::cuco BUILD_EXPORT_SET cudf-exports - CPM_ARGS GITHUB_REPOSITORY PointKernel/cuCollections - GIT_TAG add-view-ctors + CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections + GIT_TAG 5a76729d8149b20e2bbf0ec9882fe446952a7cbe EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS} OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) From 00820d9ffb3bc3c0794d1a02f8f3a3cedd6cd1ee Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 3 Jun 2022 14:44:51 -0400 Subject: [PATCH 5/6] Use sentinel namespace --- cpp/src/io/parquet/chunk_dict.cu | 6 ++++-- cpp/src/join/hash_join.cu | 4 ++-- cpp/src/join/mixed_join.cu | 8 ++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu index 3b66d4bcd0e..daf5c0757af 100644 --- a/cpp/src/io/parquet/chunk_dict.cu +++ b/cpp/src/io/parquet/chunk_dict.cu @@ -238,8 +238,10 @@ __global__ void __launch_bounds__(block_size) column_device_view const& data_col = *col->leaf_column; - auto map = map_type::device_view( - chunk->dict_map_slots, chunk->dict_map_size, KEY_SENTINEL, VALUE_SENTINEL); + auto map = map_type::device_view(chunk->dict_map_slots, + chunk->dict_map_size, + cuco::sentinel::empty_key{KEY_SENTINEL}, + cuco::sentinel::empty_value{VALUE_SENTINEL}); auto val_idx = s_start_value_idx + t; while (val_idx < end_value_idx) { diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 07995ba2785..3e198fc7681 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -286,8 +286,8 @@ hash_join::hash_join(cudf::table_view const& build, _composite_bitmask{cudf::detail::bitmask_and(build, stream).first}, _nulls_equal{compare_nulls}, _hash_table{compute_hash_table_size(build.num_rows()), - std::numeric_limits::max(), - cudf::detail::JoinNoneValue, + cuco::sentinel::empty_key{std::numeric_limits::max()}, + cuco::sentinel::empty_value{cudf::detail::JoinNoneValue}, stream.value(), detail::hash_table_allocator_type{default_allocator{}, stream}} { diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index 11553858e5f..e19dc1d02fb 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -127,8 +127,8 @@ mixed_join( // Don't use multimap_type because we want a CG size of 1. mixed_multimap_type hash_table{ compute_hash_table_size(build.num_rows()), - std::numeric_limits::max(), - cudf::detail::JoinNoneValue, + cuco::sentinel::empty_key{std::numeric_limits::max()}, + cuco::sentinel::empty_value{cudf::detail::JoinNoneValue}, stream.value(), detail::hash_table_allocator_type{default_allocator{}, stream}}; @@ -375,8 +375,8 @@ compute_mixed_join_output_size(table_view const& left_equality, // Don't use multimap_type because we want a CG size of 1. mixed_multimap_type hash_table{ compute_hash_table_size(build.num_rows()), - std::numeric_limits::max(), - cudf::detail::JoinNoneValue, + cuco::sentinel::empty_key{std::numeric_limits::max()}, + cuco::sentinel::empty_value{cudf::detail::JoinNoneValue}, stream.value(), detail::hash_table_allocator_type{default_allocator{}, stream}}; From c6e69c019e4171c1f635d47c97e5e96580fd50cc Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 6 Jun 2022 13:01:18 -0400 Subject: [PATCH 6/6] Update cuco git tag --- cpp/cmake/thirdparty/get_cucollections.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index fa9744ccb38..1f2c592888f 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -22,7 +22,7 @@ function(find_and_configure_cucollections) GLOBAL_TARGETS cuco::cuco BUILD_EXPORT_SET cudf-exports CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections - GIT_TAG 5a76729d8149b20e2bbf0ec9882fe446952a7cbe + GIT_TAG 55029034c3f82bca36148c9be29941b37492394d EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS} OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" )