diff --git a/BUILD b/BUILD index d4d693db..8e672de7 100644 --- a/BUILD +++ b/BUILD @@ -82,6 +82,7 @@ cc_library( "include/phtree/distance.h", "include/phtree/filter.h", "include/phtree/phtree.h", + "include/phtree/phtree_grid_index.h", "include/phtree/phtree_multimap.h", ], includes = [ diff --git a/TODO.txt b/TODO.txt index 5d168dc9..fd3184f0 100644 --- a/TODO.txt +++ b/TODO.txt @@ -8,7 +8,12 @@ Ideas that didn't work Counting showed that PQ would go 3-5 nodes deep (100K:3, 10M: 5) but that had no effect. Lesson: Look at WQ initialization, it may be too expensive. Why is WQ traversal so slow??? - +#XX Grid-index: This works reasonably well but needs more testing: + - as one can expect, update() works less well on highly clustered data, but for_each() works 2x-3x better! + - It appears to work better for large N, but that is probably heavily dependent on the chosen grid size. + - TODO KNN Requires a distance function API that has "Entry" as parameter + Lesson: Overall GOOD! + -> Look further into this! Fix const-ness ============== diff --git a/benchmark/query_mm_d_benchmark.cc b/benchmark/query_mm_d_benchmark.cc index ab0b4054..2ef963dd 100644 --- a/benchmark/query_mm_d_benchmark.cc +++ b/benchmark/query_mm_d_benchmark.cc @@ -16,6 +16,7 @@ #include "benchmark_util.h" #include "logging.h" #include "phtree/phtree.h" +#include "phtree/phtree_grid_index.h" #include "phtree/phtree_multimap.h" #include #include @@ -32,11 +33,12 @@ namespace { const double GLOBAL_MAX = 10000; -enum Scenario { TREE_WITH_MAP, MULTI_MAP, MULTI_MAP_STD }; +enum Scenario { TREE_WITH_MAP, MULTI_MAP, MULTI_MAP_STD, GRID_INDEX }; using TestPoint = PhPointD<3>; using QueryBox = PhBoxD<3>; -using payload_t = TestPoint; +// using payload_t = TestPoint; // TODO!?!? +using payload_t = size_t; using BucketType = std::set; struct Query { @@ -53,7 +55,29 @@ using TestMap = typename std::conditional_t< typename std::conditional_t< SCENARIO == MULTI_MAP, PhTreeMultiMapD, b_plus_tree_hash_set>, - PhTreeMultiMapD, std::unordered_set>>>; + typename std::conditional_t< + SCENARIO == GRID_INDEX, + PhTreeGridIndex, + PhTreeMultiMapD< + DIM, + payload_t, + CONVERTER, + std::unordered_set>>>>; + +template +TestMap CreateTree( + size_t n, typename std::enable_if_t* dummy = 0) { + (void)dummy; + auto edge_len = GLOBAL_MAX * pow(10. / (double)n, 1. / (double)DIM); + return TestMap(edge_len); +} + +template +TestMap CreateTree( + size_t, typename std::enable_if_t* dummy = 0) { + (void)dummy; + return TestMap(); +} template class IndexBenchmark { @@ -86,7 +110,7 @@ IndexBenchmark::IndexBenchmark(benchmark::State& state, double av : data_type_{static_cast(state.range(1))} , num_entities_(state.range(0)) , avg_query_result_size_(avg_query_result_size) -, tree_{} +, tree_{CreateTree(num_entities_)} , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , points_(state.range(0)) { @@ -121,6 +145,12 @@ void InsertEntry( tree.emplace(point, data); } +template +void InsertEntry( + TestMap& tree, const PhPointD& point, payload_t data) { + tree.emplace(point, data); +} + template void InsertEntry( TestMap& tree, @@ -154,6 +184,13 @@ typename std::enable_if::type Count return counter.n_; } +template +size_t CountEntries(TestMap& tree, const Query& query) { + CounterMultiMap counter{0}; + tree.for_each(query.box, counter); + return counter.n_; +} + template size_t CountEntries(TestMap& tree, const Query& query) { CounterMultiMap counter{0}; @@ -174,7 +211,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { // create data with about 10% duplicate coordinates CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX, 0.1); for (size_t i = 0; i < num_entities_; ++i) { - InsertEntry(tree_, points_[i], points_[i]); + InsertEntry(tree_, points_[i], i); // points_[i]); } state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); @@ -218,6 +255,12 @@ void PhTreeMultiMapStd3D(benchmark::State& state, Arguments&&... arguments) { benchmark.Benchmark(state); } +template +void PhTreeGI3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::GRID_INDEX> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + template void PhTree3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; @@ -231,6 +274,11 @@ BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); +BENCHMARK_CAPTURE(PhTreeGI3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + // PhTreeMultiMap BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0) ->RangeMultiplier(10) diff --git a/benchmark/update_mm_d_benchmark.cc b/benchmark/update_mm_d_benchmark.cc index 6c5cfa57..c59e6a40 100644 --- a/benchmark/update_mm_d_benchmark.cc +++ b/benchmark/update_mm_d_benchmark.cc @@ -16,6 +16,7 @@ #include "benchmark_util.h" #include "logging.h" #include "phtree/phtree.h" +#include "phtree/phtree_grid_index.h" #include "phtree/phtree_multimap.h" #include #include @@ -34,7 +35,13 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; -enum Scenario { ERASE_EMPLACE, MM_BPT_RELOCATE, MM_SET_RELOCATE, MM_SET_RELOCATE_IF }; +enum Scenario { + ERASE_EMPLACE, + MM_BPT_RELOCATE, + GI_BPT_RELOCATE, + MM_SET_RELOCATE, + MM_SET_RELOCATE_IF +}; using payload_t = scalar_64_t; @@ -53,7 +60,10 @@ using TestMap = typename std::conditional_t< typename std::conditional_t< SCENARIO == MM_BPT_RELOCATE, PhTreeMultiMapD, b_plus_tree_hash_set>, - PhTreeMultiMapD, std::set>>>; + typename std::conditional_t< + SCENARIO == GI_BPT_RELOCATE, + PhTreeGridIndexD, + PhTreeMultiMapD, std::set>>>>; template struct UpdateOp { @@ -62,6 +72,21 @@ struct UpdateOp { PointType new_; }; +template +TestMap CreateTree( + size_t n, typename std::enable_if_t* dummy = 0) { + (void)dummy; + auto edge_len = GLOBAL_MAX * pow(10. / (double)n, 1. / (double)DIM); + return TestMap(edge_len); +} + +template +TestMap CreateTree( + size_t, typename std::enable_if_t* dummy = 0) { + (void)dummy; + return TestMap(); +} + template class IndexBenchmark { public: @@ -96,6 +121,7 @@ IndexBenchmark::IndexBenchmark( , num_entities_(state.range(0)) , updates_per_round_(updates_per_round) , move_distance_(std::move(move_distance)) +, tree_{CreateTree(num_entities_)} , points_(num_entities_) , updates_(updates_per_round) , random_engine_{0} @@ -128,6 +154,12 @@ void InsertEntry( tree.emplace(point, data); } +template +void InsertEntry( + TestMap& tree, const PointType& point, payload_t data) { + tree.emplace(point, data); +} + template void InsertEntry( TestMap& tree, const PointType& point, payload_t data) { @@ -161,7 +193,8 @@ typename std::enable_if::type Updat template typename std::enable_if< - SCENARIO == Scenario::MM_BPT_RELOCATE || SCENARIO == Scenario::MM_SET_RELOCATE, + SCENARIO == Scenario::MM_BPT_RELOCATE || SCENARIO == Scenario::MM_SET_RELOCATE || + SCENARIO == Scenario::GI_BPT_RELOCATE, size_t>::type UpdateEntry(TestMap& tree, std::vector>& updates) { size_t n = 0; @@ -246,6 +279,12 @@ void PhTreeMMRelocateBpt3D(benchmark::State& state, Arguments&&... arguments) { benchmark.Benchmark(state); } +template +void PhTreeGIRelocateBpt3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::GI_BPT_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + template void PhTreeMMRelocateStdSet3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, Scenario::MM_SET_RELOCATE> benchmark{state, arguments...}; @@ -271,6 +310,11 @@ BENCHMARK_CAPTURE(PhTreeMMRelocateBpt3D, UPDATE_1000, UPDATES_PER_ROUND) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); +BENCHMARK_CAPTURE(PhTreeGIRelocateBpt3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + // PhTreeMultiMap with std::set BENCHMARK_CAPTURE(PhTreeMMRelocateStdSet3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) diff --git a/include/phtree/phtree_grid_index.h b/include/phtree/phtree_grid_index.h new file mode 100644 index 00000000..362b1399 --- /dev/null +++ b/include/phtree/phtree_grid_index.h @@ -0,0 +1,943 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PHTREE_GRID_INDEX_H +#define PHTREE_PHTREE_GRID_INDEX_H + +#include "common/b_plus_tree_hash_map.h" +#include "common/common.h" +#include "phtree_multimap.h" +#include + +namespace improbable::phtree { + +/* + * PH-Tree grid-index main class. + * + * The PhTreeGridIndex is a wrapper around a normal PH-tree multi-map. + * The grid-index has much faster relocate() operations: In case of small movements the + * cost is O(1) (basically O(0)!). The tree can see whether an entry would stay in the same bin, + * if it does, the tree is not traversed, the cost of the operation is mainly comparing the old + * and new key plus some maths. + * + * Internally, the grid index just rounds the coordinates to a configurable grid. That's it. + * + * The API follows mostly the std::unordered_multimap, exceptions are pointed out. + * Differences to PhTree + * - This is a multi-map and hence follows the std::unordered_multimap rather than std::map + * - erase() returns an iterator instead of a pairs {iterator, bool) + * - similar to the normal PH-Tree, emplace() returns a reference to the value instead of an + * iterator + * + * For more information please refer to the README of this project. + */ + +namespace { + +template +class ConverterGridIndex : public ConverterPointBase { + using BASE = ConverterPointBase; + + public: + using Point = typename BASE::KeyExternal; + using PointInternal = typename BASE::KeyInternal; + using QueryBox = typename BASE::QueryBoxExternal; + using QueryBoxInternal = typename BASE::QueryBoxInternal; + using ScalarExternal = typename BASE::ScalarExternal; + using ScalarInternal = typename BASE::ScalarInternal; + + public: + explicit ConverterGridIndex(double cell_edge_length) + : post_{cell_edge_length}, pre_{1. / cell_edge_length} {} + + ConverterGridIndex(const ConverterGridIndex& other) = default; + ConverterGridIndex& operator=(const ConverterGridIndex& other) = default; + ConverterGridIndex(ConverterGridIndex&& other) noexcept = default; + ConverterGridIndex& operator=(ConverterGridIndex&& other) noexcept = default; + ~ConverterGridIndex() noexcept = default; + + [[nodiscard]] PointInternal pre(const Point& point) const { + PointInternal p{}; + for (dimension_t d = 0; d < DIM; ++d) { + p[d] = static_cast(point[d] * pre_); + } + return p; + } + + [[nodiscard]] Point post(const PointInternal& in) const { + Point p{}; + for (dimension_t d = 0; d < DIM; ++d) { + p[d] = static_cast(in[d] * post_); + } + return p; + } + + [[nodiscard]] QueryBoxInternal pre_query(const QueryBox& box) const { + return {pre(box.min()), pre(box.max())}; + } + + private: + const double post_; + const double pre_; +}; + +/* + * Base class for the internal PH-Tree multi-map iterators. + * + * This base class must be distinct from the other Iterator classes because it must be agnostic of + * the types of the fields that hold iterators. If it knew about these types then we would need + * to provide them for the ==/!= operators, which would then make it impossible to compare + * the generic end() iterator with any specialized iterator. + */ +// TODO merge with "Normal" ?!?!? +template +class IteratorBaseGI { + friend PHTREE; + using T = typename PHTREE::ValueType; + + public: + explicit IteratorBaseGI() noexcept : current_value_ptr_{nullptr} {} + + T& operator*() const noexcept { + assert(current_value_ptr_); + return const_cast(*current_value_ptr_); + } + + T* operator->() const noexcept { + assert(current_value_ptr_); + return const_cast(current_value_ptr_); + } + + friend bool operator==( + const IteratorBaseGI& left, const IteratorBaseGI& right) noexcept { + return left.current_value_ptr_ == right.current_value_ptr_; + } + + friend bool operator!=( + const IteratorBaseGI& left, const IteratorBaseGI& right) noexcept { + return left.current_value_ptr_ != right.current_value_ptr_; + } + + protected: + void SetFinished() noexcept { + current_value_ptr_ = nullptr; + } + + void SetCurrentValue(const T* current_value_ptr) noexcept { + current_value_ptr_ = current_value_ptr; + } + + private: + const T* current_value_ptr_; +}; + +template +class IteratorNormalGI : public IteratorBaseGI { + friend PHTREE; + + public: + explicit IteratorNormalGI() noexcept : IteratorBaseGI(), iter_ph_{} {} + + template + IteratorNormalGI(ITER_PH&& iter_ph, FILT&& filter) noexcept + : IteratorBaseGI() + , iter_ph_{std::forward(iter_ph)} + , filter_{std::forward(filter)} { + FindNextElement(); + } + + IteratorNormalGI& operator++() noexcept { + ++iter_ph_; + FindNextElement(); + return *this; + } + + IteratorNormalGI operator++(int) noexcept { + IteratorNormalGI iterator(this->iter_ph_, filter_); // TODO ... ? + ++(*this); + return iterator; + } + + /* + * Returns the external key (the 'first' part of the key/value pair). + */ + auto first() const { + return iter_ph_.first(); + } + + protected: + auto& GetIteratorOfPhTree() const noexcept { + return iter_ph_; + } + + private: + void FindNextElement() { + while (!iter_ph_.__is_end()) { + // We filter only entries here, nodes are filtered elsewhere + auto& entry = *iter_ph_; + // TODO filter + if (filter_(entry.first)) { + this->SetCurrentValue(&(entry.second)); + return; + } + ++iter_ph_; + } + // finished + this->SetFinished(); + } + + ITERATOR_PH iter_ph_; + FILTER filter_; +}; + +template +class IteratorKnnGI : public IteratorNormalGI { + public: + template + IteratorKnnGI(ITER_PH&& iter_ph, FILT&& filter) noexcept + : IteratorNormalGI( + std::forward(iter_ph), std::forward(filter)) {} + + [[nodiscard]] double distance() const noexcept { + return this->GetIteratorOfPhTree().distance(); + } +}; + +} // namespace + +template +using PhTreeGridIndexEntry = std::pair; +} // namespace improbable::phtree + +namespace std { +// template <> +template +struct hash> { + size_t operator()(const typename improbable::phtree::PhTreeGridIndexEntry& x) const { + return std::hash{}(x.second); + } +}; +}; // namespace std + +namespace improbable::phtree { +/* + * The PhTreeMultiMap class. + */ +template < + dimension_t DIM, + typename T, + // typename CONVERTER = ConverterNoOp, + typename CONVERTER = ConverterGridIndex, + typename BUCKET = b_plus_tree_hash_set, + bool POINT_KEYS = true, + typename DEFAULT_QUERY_TYPE = QueryPoint> +class PhTreeGridIndex { + using KeyInternal = typename CONVERTER::KeyInternal; + using Key = typename CONVERTER::KeyExternal; + static constexpr dimension_t DimInternal = CONVERTER::DimInternal; + using PHTREE = PhTreeGridIndex; + using ValueType = T; + using EndType = decltype(std::declval, + CONVERTER, + BUCKET, + POINT_KEYS, + DEFAULT_QUERY_TYPE>>() + .end()); + + friend PhTreeDebugHelper; + friend IteratorBaseGI; + + public: + using QueryBox = typename CONVERTER::QueryBoxExternal; + using EntryT = PhTreeGridIndexEntry; + + private: + using BUCKET_Internal = b_plus_tree_hash_set; + + public: + explicit PhTreeGridIndex(double cell_edge_length = 100) : tree_{CONVERTER{cell_edge_length}} {} + + explicit PhTreeGridIndex(CONVERTER converter) : tree_{converter} {} + + PhTreeGridIndex(const PhTreeGridIndex& other) = delete; + PhTreeGridIndex& operator=(const PhTreeGridIndex& other) = delete; + PhTreeGridIndex(PhTreeGridIndex&& other) noexcept = default; + PhTreeGridIndex& operator=(PhTreeGridIndex&& other) noexcept = default; + ~PhTreeGridIndex() noexcept = default; + + /* + * Attempts to build and insert a key and a value into the tree. + * + * @param key The key for the new entry. + * + * @param args Arguments used to generate a new value. + * + * @return A pair, whose first element points to the possibly inserted pair, + * and whose second element is a bool that is true if the pair was actually inserted. + * + * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is + * effectively a multi-set, so if an entry with the same key/value was already in the tree, it + * returns that entry instead of inserting a new one. + */ + template + std::pair emplace(const Key& key, Args&&... args) { + auto result = tree_.try_emplace(key, EntryT{key, std::forward(args)...}); + return {const_cast(result.first.second), result.second}; + } + + /* + * The emplace_hint() method uses an iterator as hint for insertion. + * The hint is ignored if it is not useful or is equal to end(). + * + * Iterators should normally not be used after the tree has been modified. As an exception to + * this rule, an iterator can be used as hint if it was previously used with at most one call + * to erase() and if no other modifications occurred. + * The following is valid: + * + * // Move value from key1 to key2 (if you don't want to use relocate() ). + * auto iter = tree.find(key1); + * auto value = iter.second(); // The value may become invalid in erase() + * erase(iter); + * emplace_hint(iter, key2, value); // the iterator can still be used as hint here + */ + template + std::pair emplace_hint(const ITERATOR& iterator, const Key& key, Args&&... args) { + auto result = tree_.emplace_hint( + iterator.GetIteratorOfPhTree(), key, EntryT{key, std::forward(args)...}); + return {const_cast(result.first.second), result.second}; + } + + /* + * See std::unordered_multimap::insert(). + * + * @return a pair consisting of the inserted value (or to the value that prevented the + * insertion if the key/value already existed) and a bool denoting whether the insertion + * took place. + */ + std::pair insert(const Key& key, const T& value) { + return emplace(key, value); + } + + /* + * See emplace(). + */ + template + std::pair try_emplace(const Key& key, Args&&... args) { + return emplace(key, std::forward(args)...); + } + + /* + * See emplace_hint(). + */ + template + std::pair try_emplace(const ITERATOR& iterator, const Key& key, Args&&... args) { + return emplace_hint(iterator, key, std::forward(args)...); + } + + /* + * @return '1', if a value is associated with the provided key, otherwise '0'. + */ + size_t count(const Key& key) const { + auto iter = tree_.find(key); + size_t n = 0; + while (iter != tree_.end()) { + n += (key == iter->first); + ++iter; + } + return n; + } + + /* + * Estimates the result count of a rectangular window query by counting the sizes of all buckets + * that overlap with the query box. This estimate function should be much faster than a normal + * query, especially in trees with many entries per bucket. + * + * @param query_box The query window. + * @param query_type The type of query, such as QueryIntersect or QueryInclude + */ + template + size_t estimate_count(QueryBox query_box, QUERY_TYPE query_type = QUERY_TYPE()) const { + return tree_.estimate_count(query_box, query_type); + // // TODO... use box filter + // size_t n = 0; + // auto counter_lambda = [&](const Key&, const EntryT& bucket) { ++n; }; + // // auto filter = [&](const Key&, const BUCKET& bucket) { n += bucket.size(); }; + // tree_.for_each(query_box, counter_lambda, FilterNoOp{}, query_type); + // return n; + } + + /* + * See std::unordered_multimap::find(). + * + * @param key the key to look up + * @return an iterator that points either to the first value associated with the key or + * to {@code end()} if no value was found + */ + auto find(const Key& key) const { + auto filter = [key = key](const Key& key2) noexcept { return key == key2; }; + return CreateIterator(tree_.find(key), std::move(filter)); + } + + /* + * See std::unordered_multimap::find(). + * + * @param key the key to look up + * @param value the value to look up + * @return an iterator that points either to the associated value of the key/value pair + * or to {@code end()} if the key/value pair was found + */ + auto find(const Key& key, const T& value) const { + auto filter = [key = key](const Key& key2) noexcept { return key == key2; }; + return CreateIterator(tree_.find(key, create(key, value)), std::move(filter)); + } + + /* + * See std::unordered_multimap::erase(). Removes the provided key/value pair if it exists. + * + * @return '1' if the key/value pair was found, otherwise '0'. + */ + size_t erase(const Key& key, const T& value) { + return tree_.erase(key, create(key, value)); + } + + /* + * See std::map::erase(). Removes any entry located at the provided iterator. + * + * This function uses the iterator to directly erase the entry, so it is usually faster than + * erase(key, value). + * + * @return '1' if a value was found, otherwise '0'. + */ + template + size_t erase(const ITERATOR& iterator) { + return tree_.erase(iterator.GetIteratorOfPhTree()); + } + + /* + * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The relocate function will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the old position and new position are identical. + * + * The relocate function will report _failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * In case of _failure_, this function guarantees that the tree remains unchanged + * or is returned to its original state (i.e. before the function was called). + * + * @param old_key The old position + * @param new_key The new position + * @param value The value that needs to be relocated. The relocate() method used the value's + * '==' operator to identify the entry that should be moved. + * @param verify_exists This setting toggles whether a relocate() between two identical keys + * should verify whether the key actually exist before return '1'. + * If set to 'false', this function will return '1' if the keys are identical, + * without checking whether the keys actually exist. Avoiding this check can + * considerably speed up relocate() calls, especially when using a + * ConverterMultiply. + * + * @return '1' if a value was found and reinserted, otherwise '0'. + */ + template + size_t relocate( + const Key& old_key, const Key& new_key, T2&& value, bool verify_exists = false) { + // TODO document verify_exists, + // TODO do we need to check coordinates? Document this!! + // TODO update old/new key? With verify=false we can ignore updating the key!! + // return tree_.relocate(old_key, new_key, create(old_key, value), verify_exists); + + auto update_fn = [&value, &old_key, &new_key](const EntryT& e) -> size_t { + if (e.second == value && e.first == old_key) { + const_cast(e.first) = new_key; + return true; + } + return false; + }; + return tree_.relocate_if(old_key, new_key, std::move(update_fn), true); + + // auto fn = [&value](BUCKET& src, BUCKET& dst) -> size_t { + // auto it = src.find(value); + // if (it != src.end() && dst.emplace(std::move(*it)).second) { + // src.erase(it); + // return 1; + // } + // return 0; + // }; + // auto count_fn = [&value](BUCKET& src) -> size_t { return src.find(value) != + // src.end(); }; return tree_._relocate_mm( + // converter_.pre(old_key), converter_.pre(new_key), verify_exists, fn, + // count_fn); + } + + /* + * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The relocate function will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the old position and new position are identical. + * + * The relocate function will report _failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * In case of _failure_, this function guarantees that the tree remains unchanged + * or is returned to its original state (i.e. before the function was called). + * + * @param old_key The old position + * @param new_key The new position + * @param predicate The predicate that is used for every value at position old_key to evaluate + * whether it should be relocated to new_key. + * @param verify_exists This setting toggles whether a relocate() between two identical keys + * should verify whether the key actually exist before return '1'. + * If set to 'false', this function will return '1' if the keys are identical, + * without checking whether the keys actually exist. Avoiding this check can + * considerably speed up relocate() calls, especially when using a + * ConverterMultiply. + * + * @return the number of values that were relocated. + */ + template + size_t relocate_if( + const Key& old_key, const Key& new_key, PREDICATE&& pred_fn, bool verify_exists = false) { + // TODO document verify_exists, + // TODO do we need to check coordinates? Document this!! + return tree_.relocate_if(old_key, new_key, std::forward(pred_fn), verify_exists); + // auto fn = [&pred_fn](BUCKET& src, BUCKET& dst) -> size_t { + // size_t result = 0; + // auto iter_src = src.begin(); + // while (iter_src != src.end()) { + // if (pred_fn(*iter_src) && dst.emplace(std::move(*iter_src)).second) { + // iter_src = src.erase(iter_src); + // ++result; + // } else { + // ++iter_src; + // } + // } + // return result; + // }; + // auto count_fn = [&pred_fn](BUCKET& src) -> size_t { + // size_t result = 0; + // auto iter_src = src.begin(); + // while (iter_src != src.end()) { + // if (pred_fn(*iter_src)) { + // ++result; + // } + // ++iter_src; + // } + // return result; + // }; + // return tree_._relocate_mm( + // converter_.pre(old_key), converter_.pre(new_key), verify_exists, fn, + // count_fn); + } + + /* + * Relocates all values from one coordinate to another. + * Returns an iterator pointing to the relocated data (or end(), if the relocation failed). + */ + auto relocate_all(const Key& old_key, const Key& new_key) { + return tree_.relocate(old_key, new_key); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @param callback The callback function to be called for every entry that matches the filter. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are passed to the callback or traversed. Any filter function must + * follow the signature of the default 'FilterNoOp`. + * The default 'FilterNoOp` filter matches all entries. + */ + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each( + NoOpCallback{}, + WrapCallbackFilter{ + std::forward(callback), std::forward(filter), converter()}); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param callback The callback function to be called for every entry that matches the query + * and filter. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param query_type The type of query, such as QueryIntersect or QueryInclude + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + * The default 'FilterNoOp` filter matches all entries. + */ + template < + typename CALLBACK, + typename FILTER = FilterNoOp, + typename QUERY_TYPE = DEFAULT_QUERY_TYPE> + void for_each( + QueryBox query_box, + CALLBACK&& callback, + FILTER&& filter = FILTER(), + QUERY_TYPE query_type = QUERY_TYPE()) const { + tree_.template for_each>( + query_box, + {}, + {std::forward(callback), + std::forward(filter), + converter(), + query_box}, + query_type); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @return an iterator over all (filtered) entries in the tree, + */ + template + auto begin(FILTER&& filter = FILTER()) const { + return CreateIterator(tree_.begin(WrapFilter(std::forward(filter)))); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param query_type The type of query, such as QueryIntersect or QueryInclude + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + * @return Result iterator. + */ + template + auto begin_query( + const QueryBox& query_box, + FILTER&& filter = FILTER(), + QUERY_TYPE&& query_type = QUERY_TYPE()) const { + auto key_filter = [query_box = query_box](const Key& key) noexcept { + auto& min = query_box.min(); + auto& max = query_box.max(); + for (dimension_t d = 0; d < DIM; ++d) { + if (key[d] < min[d] || key[d] > max[d]) { + return false; + } + } + return true; + }; + return CreateIterator( + tree_.begin_query( + query_box, WrapFilter(std::forward(filter)), query_type), + std::move(key_filter)); + } + + /* + * Locate nearest neighbors for a given point in space. + * + * NOTE: This method is not (currently) available for box keys. + * + * @param min_results number of entries to be returned. More entries may or may not be returned + * when several entries have the same distance. + * @param center center point + * @param distance_function optional distance function, defaults to euclidean distance + * @param filter optional filter predicate that excludes nodes/entries before their distance is + * calculated. + * @return Result iterator. + */ + template < + typename DISTANCE, + typename FILTER = FilterNoOp, + // Some magic to disable this in case of box keys + bool DUMMY = POINT_KEYS, + typename std::enable_if::type = 0> + auto begin_knn_query( + size_t min_results, + const Key& center, + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { + // We use pre() instead of pre_query() here because, strictly speaking, we want to + // find the nearest neighbors of a (fictional) key, which may as well be a box. + // TODO filter + return CreateIteratorKnn(tree_.begin_knn_query( + min_results, + center, + std::forward(distance_function), + WrapFilter(std::forward(filter)))); + } + + /* + * @return An iterator representing the tree's 'end'. + */ + auto end() const { + return IteratorNormalGI{}; + } + + /* + * Remove all entries from the tree. + */ + void clear() { + tree_.clear(); + } + + /* + * @return the number of entries (key/value pairs) in the tree. + */ + [[nodiscard]] size_t size() const { + return tree_.size(); + } + + /* + * @return 'true' if the tree is empty, otherwise 'false'. + */ + [[nodiscard]] bool empty() const { + return tree_.empty(); + } + + /* + * @return the converter associated with this tree. + */ + [[nodiscard]] const CONVERTER& converter() const { + return tree_.converter(); + } + + private: + // This is used by PhTreeDebugHelper + const auto& GetInternalTree() const { + return tree_.GetInternalTree(); + } + + void CheckConsistencyExternal() const { + tree_.CheckConsistencyExternal(); + } + + template + EntryT create(const Key& key, Args&&... args) const { + return std::make_pair(key, std::forward(args)...); + } + + struct NoOpCallback { + constexpr void operator()(const Key&, const EntryT&) const noexcept {} + }; + + struct NoOpFilterGI { + constexpr bool operator()(const Key&) const noexcept { + return true; + } + }; + + template + auto CreateIterator(OUTER_ITER&& outer_iter, KEY_FILTER&& filter = KEY_FILTER()) const { + return IteratorNormalGI( + std::forward(outer_iter), std::forward(filter)); + } + + template + auto CreateIteratorKnn(OUTER_ITER&& outer_iter, KEY_FILTER&& filter = KEY_FILTER()) const { + return IteratorKnnGI( + std::forward(outer_iter), std::forward(filter)); + } + + template + class WrapFilter { + public: + template + WrapFilter(F&& filter) : filter_{std::forward(filter)} {} + + template + [[nodiscard]] constexpr bool IsEntryValid(const KeyInternal&, const BucketT& e) const { + return true; // filter_.IsEntryValid(e.first, e.second); + } + [[nodiscard]] constexpr bool IsNodeValid(const KeyInternal&, int) const { + // TODO? Remove filter methods for grid ?!?! + return true; + } + [[nodiscard]] constexpr bool IsBucketEntryValid( + const KeyInternal& k, const EntryT& e) const { + // TODO avoid using key-internal + return filter_.IsBucketEntryValid(k, e.second); + } + + private: + FILTER filter_; + }; + + /* + * This wrapper wraps the Filter and Callback such that the callback is called for every + * entry in any bucket that matches the user defined IsEntryValid(). + */ + template + class WrapCallbackFilter { + public: + /* + * We always have two iterators, one that traverses the PH-Tree and returns 'buckets', the + * other iterator traverses the returned buckets. + * The wrapper ensures that the callback is called for every entry in a bucket.. + */ + template + WrapCallbackFilter(CB&& callback, F&& filter, const CONVERTER& converter) + : callback_{std::forward(callback)} + , filter_{std::forward(filter)} + , converter_{converter} {} + + [[nodiscard]] inline bool IsEntryValid( + const KeyInternal& internal_key, const BUCKET& bucket) { + // TODO??? + // We can roughly filter the bucket by key, but we need to traverse all + // entries anyway to get the correct key. + // Problem: we cannot easily map the type of the internal bucket to the external + // bucket because of the different Entry type. + // However we can simply forward the modified bucket type, it is easy to use, + // even if it does not comply with normal signature.... + + // if (filter_.IsEntryValid(internal_key, bucket)) { + // auto key = converter_.post(internal_key); + // for (auto& entry : bucket) { + // if (filter_.IsBucketEntryValid(internal_key, entry)) { + // callback_(key, entry); + // } + // } + // } + // // Return false. We already called the callback. + // return false; + return true; + } + + template + [[nodiscard]] inline bool IsBucketEntryValid( + const KeyInternal& internal_key, const ValueT& entry) const noexcept { + if (filter_.IsBucketEntryValid(internal_key, entry.second)) { + callback_(entry.first, entry.second); + return true; + } + // Return false. We already called the callback. + return false; + } + + [[nodiscard]] inline bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) { + // TODO document this?!? We cannot check the nodes..... + // TODO disable filters all together? + return true; + // return filter_.IsNodeValid(prefix, bits_to_ignore); + } + + private: + CALLBACK callback_; + FILTER filter_; + const CONVERTER& converter_; + }; + + template + class WrapCallbackFilterQuery { + public: + /* + * We always have two iterators, one that traverses the PH-Tree and returns 'buckets', the + * other iterator traverses the returned buckets. + * The wrapper ensures that the callback is called for every entry in a bucket.. + */ + template + WrapCallbackFilterQuery( + CB&& callback, F&& filter, const CONVERTER& converter, const QueryBox& query) + : callback_{std::forward(callback)} + , filter_{std::forward(filter)} + , converter_{converter} + , query_{query} {} + + [[nodiscard]] inline bool IsEntryValid(const KeyInternal&, const BUCKET_Internal&) { + return true; + } + + template + [[nodiscard]] inline bool IsBucketEntryValid( + const KeyInternal& internal_key, const ValueT& entry) const noexcept { + auto& min = query_.min(); + auto& max = query_.max(); + auto& key = entry.first; + for (dimension_t d = 0; d < DIM; ++d) { + if (key[d] < min[d] || key[d] > max[d]) { + return false; + } + } + + if (filter_.IsBucketEntryValid(internal_key, entry.second)) { + callback_(entry.first, entry.second); + return true; + } + // Return false. We already called the callback. + return false; + } + + [[nodiscard]] inline bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) { + // TODO document this?!? We cannot check the nodes..... + // TODO disable filters all together? + return true; + // return filter_.IsNodeValid(prefix, bits_to_ignore); + } + + private: + CALLBACK callback_; + FILTER filter_; + const CONVERTER& converter_; + QueryBox query_; + }; + + PhTreeMultiMap tree_; +}; + +/** + * A PH-Tree multi-map that uses (axis aligned) points as keys. + * The points are defined with 64bit 'double' floating point coordinates. + * + * See 'PhTreeD' for details. + */ +template < + dimension_t DIM, + typename T, + typename CONVERTER = ConverterGridIndex, + // TODO !!!!!!!!!!!!!!!!11 + typename BUCKET = b_plus_tree_hash_set, T>>> +using PhTreeGridIndexD = PhTreeGridIndex; + +template < + dimension_t DIM, + typename T, + typename CONVERTER_BOX, + typename BUCKET = b_plus_tree_hash_set, T>>> +using PhTreeGridIndexBox = PhTreeGridIndex; + +/** + * A PH-Tree multi-map that uses (axis aligned) boxes as keys. + * The boxes are defined with 64bit 'double' floating point coordinates. + * + * See 'PhTreeD' for details. + */ +template < + dimension_t DIM, + typename T, + typename CONVERTER_BOX = ConverterBoxIEEE, + typename BUCKET = b_plus_tree_hash_set, T>>> +using PhTreeGridIndexBoxD = PhTreeGridIndexBox; + +} // namespace improbable::phtree + +#endif // PHTREE_PHTREE_GRID_INDEX_H diff --git a/include/phtree/phtree_multimap.h b/include/phtree/phtree_multimap.h index af6ae0ec..0a24f011 100644 --- a/include/phtree/phtree_multimap.h +++ b/include/phtree/phtree_multimap.h @@ -84,6 +84,10 @@ class IteratorBase { return left.current_value_ptr_ != right.current_value_ptr_; } + bool __is_end() { + return current_value_ptr_ == nullptr; + } + protected: void SetFinished() noexcept { current_value_ptr_ = nullptr; @@ -730,7 +734,7 @@ class PhTreeMultiMap { return converter_; } - private: + public: // This is used by PhTreeDebugHelper const auto& GetInternalTree() const { return tree_; @@ -745,6 +749,7 @@ class PhTreeMultiMap { assert(n == size_); } + private: template auto CreateIteratorFind(OUTER_ITER&& outer_iter, const T& value) const { auto bucket_iter = diff --git a/test/BUILD b/test/BUILD index 0d8d0d7f..f8ebaad5 100644 --- a/test/BUILD +++ b/test/BUILD @@ -104,6 +104,19 @@ cc_test( ], ) +cc_test( + name = "phtree_grid_index_d", + timeout = "long", + srcs = [ + "phtree_grid_d_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + cc_test( name = "phtree_d_test", timeout = "long", diff --git a/test/phtree_grid_d_test.cc b/test/phtree_grid_d_test.cc new file mode 100644 index 00000000..04dc9a96 --- /dev/null +++ b/test/phtree_grid_d_test.cc @@ -0,0 +1,1308 @@ +/* + * Copyright 2023 Tilmann Zaeschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_grid_index.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_multimap_d_test { + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeGridIndexD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i{i}, data_{0} {} + explicit Id(const size_t i) : _i{static_cast(i)}, data_{0} {} + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + int _i; + int data_; +}; +} + +namespace std { +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test::Id& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +namespace phtree_multimap_d_test { + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(static_cast(id)) {} + + double _distance; + int _id; +}; + +bool comparePointDistanceAndId(PointDistance& i1, PointDistance& i2) { + return (i1._distance != i2._distance) ? (i1._distance < i2._distance) : (i1._id < i2._id); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id(i); + if (i % 4 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, tree.find(p, id)->_i); + if (i % 3 == 0) { + ASSERT_EQ(1, tree.erase(p, id)); + } else { + auto iter = tree.find(p, id); + ASSERT_EQ(1, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p, id)); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTest, SmokeTestBasicOps) { + SmokeTestBasicOps<1>(10000); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(10000); + SmokeTestBasicOps<20>(1000); + SmokeTestBasicOps<63>(100); +} + +TEST(PhTreeMMDTest, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 3, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N / NUM_DUPL, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeMMDTest, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), NUM_DUPL); + Id id(i); + ASSERT_EQ(i, tree.find(p, id)->_i); + ASSERT_EQ(i / NUM_DUPL, tree.find(p)->_i / NUM_DUPL); + } +} + +TEST(PhTreeMMDTest, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (int i = 0; i < (int)N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again (same `identity`), this should NOT replace the existing value + Id id2(i); + id2.data_ = 42; + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.find(p, id2)->_i); + ASSERT_EQ(0, tree.find(p, id2)->data_); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + + // Check that the returned value is a reference + tree.emplace(p, id2).first.data_++; + ASSERT_EQ(1, tree.find(p, id)->data_); + tree.emplace(p, id2).first.data_ = 0; + ASSERT_EQ(0, tree.emplace(p, id).first.data_); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(tree.count(p), NUM_DUPL); + ASSERT_EQ(i, tree.find(p, id)->_i); + } +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeMMDTest, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeMMDTest, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_NE(tree.find(p, id), tree.end()); + ASSERT_NE(tree.end(), tree.find(p, id)); + ASSERT_EQ(tree.find(p, id)->_i, i); + auto iterN = tree.find(points[0]); + size_t n = 0; + while (iterN != tree.end()) { + ++iterN; + ++n; + } + ASSERT_EQ(n, NUM_DUPL); + i++; + } + + TestPoint p{1, 1, 10000000}; + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); + + auto iter1 = tree.find(points[0]); + auto iter2 = tree.find(points[0]); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +TEST(PhTreeMMDTest, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + double delta = 20; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + size_t count_new = tree.count(pNew); + size_t count_old = tree.count(pOld); + size_t n = tree.erase(pOld, Id(i)); + ASSERT_EQ(1U, n); + tree.emplace(pNew, Id(i)); + ASSERT_EQ(count_new + 1, tree.count(pNew)); + ASSERT_EQ(count_old - 1, tree.count(pOld)); + p = pNew; + ++i; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTest, TestUpdateWithEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + int i = 0; + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + double delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + auto iter = tree.find(pOld, Id(i)); + size_t n = tree.erase(iter); + ASSERT_EQ(1U, n); + ASSERT_TRUE(tree.emplace_hint(iter, pNew, Id(i)).second); + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + auto iterNew = tree.find(pNew, Id(i)); + ASSERT_FALSE(tree.emplace_hint(iterNew, pNew, Id(i)).second); + p = pNew; + ++i; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); +} + +void TestUpdateWithRelocate(bool relocate_to_existing_coordinate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + for (auto delta : deltas) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew; + if (relocate_to_existing_coordinate) { + pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; + } else { + pNew = {pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + } + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + if (pOld != pNew) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, Id(i))); + ASSERT_EQ(tree.end(), tree.find(pOld, Id(i))); + } else { + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateDelta) { + TestUpdateWithRelocate(false); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateToExisting) { + TestUpdateWithRelocate(true); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(42))); + + // Check that small tree works + tree.emplace(point0, Id(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, Id(1))); + ASSERT_EQ(tree.end(), tree.find(point0, Id(1))); + ASSERT_EQ(1, tree.find(point1, Id(1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + // TODO why is this not allowed in a multimap???? +// tree.emplace(point0, Id(1)); +// tree.emplace(point1, Id(1)); + //ASSERT_EQ(0u, tree.relocate(point0, point1, Id(1))); + + // tree.emplace(point0, Id(0)); + // tree.emplace(point1, Id(1)); + //ASSERT_EQ(0u, tree.relocate(point0, point1, Id(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, Id(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTest, TestEraseByIterator) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + ASSERT_NE(tree.end(), tree.find(p)); + auto iter = tree.find(p, Id(i)); + ASSERT_NE(tree.end(), iter); + size_t count = tree.erase(iter); + ASSERT_EQ(1U, count); + ASSERT_EQ(tree.end(), tree.find(p, Id(i))); + if (tree.size() % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + i++; + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeMMDTest, TestEraseByIteratorQuery) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + for (size_t i = 0; i < N; ++i) { + auto iter = tree.begin(); + ASSERT_NE(tree.end(), iter); + size_t count = tree.erase(iter); + ASSERT_EQ(1U, count); + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeMMDTest, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); + + auto iter1 = tree.begin(); + auto iter2 = tree.begin(); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +template +struct FilterEvenId { + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT&) const { + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint&, const T& value) const { + return value._i % 2 == 0; + } +}; + +TEST(PhTreeMMDTest, TestExtentFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(FilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e * 2); +} + +TEST(PhTreeMMDTest, TestExtentForEachFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + struct Counter { + void operator()(const TestPoint key, const Id& t) { + ++n_; + ASSERT_EQ(points_[t._i], key); + ASSERT_TRUE(t._i % 2 == 0); + } + std::vector>& points_; + size_t n_ = 0; + }; + Counter callback{points, 0}; + tree.for_each(callback, FilterEvenId()); + ASSERT_EQ(N, callback.n_ * 2); +} + +TEST(PhTreeMMDTest, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t num_e1 = 0; + for (auto& x : tree) { + x.data_ = 42; + num_e1++; + } + ASSERT_EQ(N, num_e1); + + // Check that we really had references and that data_ was changed + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_EQ(42, x.data_); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +TEST(PhTreeMMDTest, TestEstimateCountIntersect) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + std::vector> points; + populate(tree, points, N); + + // Test small + for (auto& p : points) { + size_t n = tree.estimate_count({p, p}); + ASSERT_LE(NUM_DUPL, n); + // arbitrary upper limit: 10*NUM_DUPL + ASSERT_GE(10, NUM_DUPL); + } + + // Test medium (1/8 of volume), allow variation of 20% 0.8 / 2.0 + double min_2 = WORLD_MIN / 2; + double max_2 = WORLD_MAX / 2; + size_t n_medium = tree.estimate_count({{min_2, min_2, min_2}, {max_2, max_2, max_2}}); + ASSERT_LE(N / 8. * 0.8, n_medium); + ASSERT_GE(N / 8. * 2.0, n_medium); + + // Test all + size_t n_all = + tree.estimate_count({{WORLD_MIN, WORLD_MIN, WORLD_MIN}, {WORLD_MAX, WORLD_MAX, WORLD_MAX}}); + ASSERT_EQ(N, n_all); +} + +template +void referenceQuery( + std::vector>& points, + TestPoint& min, + TestPoint& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= p[d] >= min[d] && p[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testQuery(TestPoint& min, TestPoint& max, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeMMDTest, TestWindowQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeMMDTest, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + auto& x = *q; + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, x._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeMMDTest, TestWindowQueryMany) { + const dimension_t dim = 3; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + int n = 0; + testQuery(min, max, 10000, n); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeMMDTest, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + TestPoint min{-10000, -10000, -10000}; + TestPoint max{10000, 10000, 10000}; + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); +} + +TEST(PhTreeMMDTest, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10., i * 9., i * 11.}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeMMDTest, TestWindowForEachQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10., i * 9., i * 11.}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + struct Counter { + void operator()(const TestPoint&, const Id& t) { + ++n_; + ASSERT_EQ(referenceResult.count(t._i), 1); + } + std::set& referenceResult; + size_t n_ = 0; + }; + + size_t n = 0; + Counter callback{referenceResult, 0}; + tree.for_each({min, max}, callback); + n += callback.n_; + nn += callback.n_; + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeMMDTest, TestWindowQueryIterators) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q1 = tree.begin_query({p, p}); + auto q2 = tree.begin_query({p, p}); + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeMMDTest, TestWindowQueryFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + auto qE = tree.begin_query({min, max}, FilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_LE(2, num_e); + ASSERT_GE(50, num_e); +} + +TEST(PhTreeMMDTest, TestKnnQuery) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i++) { + double dist = distance(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistanceAndId); + + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + ASSERT_EQ(sorted_data[n]._id / NUM_DUPL, e._i / NUM_DUPL); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id / NUM_DUPL, q->_i / NUM_DUPL); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq * NUM_DUPL, n); + } +} + +template +struct PhDistanceLongL1 { + double operator()(const TestPoint& v1, const TestPoint& v2) const { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(v1[i] - v2[i]); + } + return sum; + }; +}; + +TEST(PhTreeMMDTest, TestKnnQueryFilterAndDistanceL1) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 100; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually by L1; skip every 2nd point + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i += 2) { + double dist = distanceL1(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistanceAndId); + + std::vector sorted_results; + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center, PhDistanceLongL1(), FilterEvenId()); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + sorted_results.emplace_back(q.distance(), e._i); + if (sorted_data[n]._id == e._i) { + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q->_i); + } + + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + std::sort(sorted_results.begin(), sorted_results.end(), comparePointDistanceAndId); + + for (size_t i = 0; i < n; ++i) { + auto& r = sorted_results[i]; + ASSERT_EQ(sorted_data[i]._distance, r._distance); + ASSERT_EQ(sorted_data[i]._id, r._id); + } + ASSERT_EQ(Nq * NUM_DUPL / 2, n); + } +} + +TEST(PhTreeMMDTest, TestKnnQueryIterator) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + TestPoint center{rng.next(), rng.next(), rng.next()}; + size_t n = 0; + auto q1 = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); + auto q2 = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); + while (q1 != tree.end()) { + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(Nq * NUM_DUPL, n); +} + +TEST(PhTreeMMDTest, SmokeTestPoint0) { + // Test edge case: empty tree + TestPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree; + ASSERT_EQ(tree.size(), 0); + ASSERT_EQ(tree.find(p), tree.end()); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(0, tree.erase(p, Id(-1))); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeMMDTest, SmokeTestPointInfinity) { + // Test inifnity. + double positive_infinity = std::numeric_limits::infinity(); + double negative_infinity = -positive_infinity; + PhPointD<3> p_pos{positive_infinity, positive_infinity, positive_infinity}; + PhPointD<3> p_neg{negative_infinity, negative_infinity, negative_infinity}; + PhPointD<3> p{1, 2, 3}; + TestTree<3, Id> tree; + tree.emplace(p, Id{1}); + tree.emplace(p_pos, Id{10}); + tree.emplace(p_neg, Id{-10}); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree.find(p_neg, Id(-10))->_i, -10); + ASSERT_EQ(tree.find(p, Id(1))->_i, 1); + ASSERT_EQ(tree.find(p_pos, Id(10))->_i, 10); + + ASSERT_EQ(positive_infinity, positive_infinity); + ASSERT_EQ(negative_infinity, negative_infinity); + ASSERT_GT(positive_infinity, negative_infinity); + + // Note that the tree returns result in z-order, however, since the z-order is based on + // the (unsigned) bit representation, negative values come _after_ positive values. + auto q_window = tree.begin_query({p_neg, p_pos}); + std::set s; + s.emplace(q_window->_i); + ++q_window; + s.emplace(q_window->_i); + ++q_window; + FAIL(); + // TODO this fails because of integer overflow in the converter + s.emplace(q_window->_i); + ++q_window; +// ASSERT_EQ(1, q_window->_i); +// ++q_window; +// ASSERT_EQ(10, q_window->_i); +// ++q_window; +// ASSERT_EQ(-10, q_window->_i); +// ++q_window; + ASSERT_EQ(q_window, tree.end()); + ASSERT_TRUE(s.count(1)); + ASSERT_TRUE(s.count(10)); + ASSERT_TRUE(s.count(-10)); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(10, q_extent->_i); + ++q_extent; + ASSERT_EQ(-10, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_NE(q_knn, tree.end()); + ++q_knn; + ASSERT_NE(q_knn, tree.end()); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p_neg, Id(-10))); + ASSERT_EQ(1, tree.erase(p, Id(1))); + ASSERT_EQ(1, tree.erase(p_pos, Id(10))); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p_neg, Id(-10))); + ASSERT_EQ(0, tree.erase(p_pos, Id(10))); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeMMDTest, SmokeTestTreeAPI) { + std::map mapPtr; + PhTreeGridIndexD<3, Id*> treePtr; + Id* idPtr = new Id(1); + treePtr.emplace(PhPointD<3>{1, 2, 3}, idPtr); + treePtr.clear(); + delete idPtr; +} + +template +void test_tree(TREE& tree) { + PhPointD<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); + Id id3{3}; + tree.insert(p, id3); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree.count(p), 3); + ASSERT_EQ(tree.find(p, Id(1))->_i, 1); + ASSERT_EQ(tree.find(p, Id(2))->_i, 2); + ASSERT_EQ(tree.find(p, Id(3))->_i, 3); + + auto q_window = tree.begin_query({p, p}); + std::set wq_result; + wq_result.emplace(q_window->_i); + ++q_window; + wq_result.emplace(q_window->_i); + ++q_window; + wq_result.emplace(q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + ASSERT_EQ(3, wq_result.size()); + + auto q_extent = tree.begin(); + std::set eq_result; + eq_result.emplace(q_extent->_i); + ++q_extent; + eq_result.emplace(q_extent->_i); + ++q_extent; + eq_result.emplace(q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + ASSERT_EQ(3, eq_result.size()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + std::set knn_result; + knn_result.emplace(q_knn->_i); + ++q_knn; + knn_result.emplace(q_knn->_i); + ++q_knn; + knn_result.emplace(q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + ASSERT_EQ(3, knn_result.size()); + + ASSERT_EQ(1, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(0, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(1, tree.erase(p, Id{2})); + ASSERT_EQ(1, tree.erase(p, Id{3})); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeMMDTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeGridIndexD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); + tree.~PhTreeGridIndex(); +} + +TEST(PhTreeMMDTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeGridIndexD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + // TODO!?!?! + // tree = std::move(tree1); + FAIL(); + test_tree(tree); + tree.~PhTreeGridIndex(); +} + +TEST(PhTreeMMDTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + // TODO ? -> FILTERS (lambdas) are not movable +// ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterMultiMapAABB filter(p, p, tree.converter()); + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); +} + +} // namespace phtree_multimap_d_test