From 5d4b09b9811514085d51a46c81b9c69183a0628b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tilmann=20Z=C3=A4schke?= Date: Tue, 23 Mar 2021 14:59:48 +0000 Subject: [PATCH] v1.0.0 (#6) --- .bazelrc | 13 +- .github/workflows/bazel.yml | 31 + .github/workflows/cmake.yml | 40 + .gitignore | 4 +- .travis.yml | 56 - BUILD | 10 + CHANGELOG.md | 27 + CMakeLists.txt | 4 +- README.md | 369 +++++- WORKSPACE | 10 +- ci/includes/bazel.sh | 2 +- examples/example.cc | 4 +- phtree/BUILD | 120 +- phtree/benchmark/BUILD | 77 ++ phtree/benchmark/benchmark_util.h | 10 +- phtree/benchmark/count_mm_d_benchmark.cc | 215 ++++ phtree/benchmark/erase_benchmark.cc | 13 +- phtree/benchmark/erase_d_benchmark.cc | 15 +- phtree/benchmark/extent_benchmark.cc | 13 +- phtree/benchmark/extent_benchmark_weird.cc | 77 +- phtree/benchmark/find_benchmark.cc | 13 +- phtree/benchmark/insert_benchmark.cc | 13 +- phtree/benchmark/insert_box_d_benchmark.cc | 15 +- phtree/benchmark/insert_d_benchmark.cc | 15 +- phtree/benchmark/knn_d_benchmark.cc | 19 +- phtree/benchmark/logging.cc | 33 + phtree/benchmark/logging.h | 117 ++ phtree/benchmark/query_benchmark.cc | 37 +- phtree/benchmark/query_box_d_benchmark.cc | 158 ++- phtree/benchmark/query_d_benchmark.cc | 159 ++- phtree/benchmark/query_mm_box_d_benchmark.cc | 222 ++++ phtree/benchmark/query_mm_d_benchmark.cc | 230 ++++ phtree/benchmark/update_box_d_benchmark.cc | 47 +- phtree/benchmark/update_d_benchmark.cc | 223 +++- phtree/benchmark/update_mm_box_d_benchmark.cc | 248 ++++ phtree/benchmark/update_mm_d_benchmark.cc | 242 ++++ phtree/common/BUILD | 36 +- phtree/common/CMakeLists.txt | 20 +- phtree/common/base_types.h | 171 +++ phtree/common/base_types_test.cc | 86 ++ phtree/common/{ph_bits.h => bits.h} | 83 +- .../common/{ph_bits_test.cc => bits_test.cc} | 25 +- phtree/common/{ph_common.h => common.h} | 71 +- .../{ph_common_test.cc => common_test.cc} | 12 +- phtree/common/converter.h | 363 ++++++ phtree/common/converter_test.cc | 49 + ...{ph_tree_debug_helper.h => debug_helper.h} | 8 +- phtree/common/{ph_distance.h => distance.h} | 73 +- .../{ph_distance_test.cc => distance_test.cc} | 19 +- phtree/common/{ph_filter.h => filter.h} | 86 +- .../{ph_filter_test.cc => filter_test.cc} | 10 +- .../{ph_flat_array_map.h => flat_array_map.h} | 81 +- ...ray_map_test.cc => flat_array_map_test.cc} | 42 +- ...ph_flat_sparse_map.h => flat_sparse_map.h} | 33 +- ...se_map_test.cc => flat_sparse_map_test.cc} | 39 +- phtree/common/ph_base_types.h | 162 --- phtree/common/ph_base_types_test.cc | 38 - phtree/common/ph_preprocessor.h | 115 -- phtree/common/ph_preprocessor_test.cc | 34 - .../common/{ph_tree_stats.h => tree_stats.h} | 27 +- phtree/phtree.h | 208 +++- phtree/phtree_box_d.h | 220 ---- phtree/phtree_box_d_test.cc | 236 +++- phtree/phtree_box_d_test_query_types.cc | 62 + phtree/phtree_box_f_test.cc | 760 ++++++++++++ phtree/phtree_d.h | 204 --- phtree/phtree_d_test.cc | 209 +++- phtree/phtree_d_test_custom_key.cc | 216 ++++ phtree/phtree_d_test_filter.cc | 70 ++ phtree/phtree_d_test_preprocessor.cc | 27 +- phtree/phtree_f_test.cc | 1005 +++++++++++++++ phtree/phtree_multimap.h | 739 +++++++++++ phtree/phtree_multimap_box_d_test.cc | 908 ++++++++++++++ phtree/phtree_multimap_d_test.cc | 1104 +++++++++++++++++ phtree/phtree_test.cc | 269 +++- phtree/phtree_test_const_values.cc | 65 +- phtree/phtree_test_ptr_values.cc | 55 +- phtree/phtree_test_unique_ptr_values.cc | 184 +++ phtree/testing/gtest_main/BUILD | 1 - phtree/testing/gtest_main/gtest_main.cc | 8 - phtree/v16/BUILD | 16 +- phtree/v16/CMakeLists.txt | 14 +- .../{debug_helper.h => debug_helper_v16.h} | 30 +- phtree/v16/{ph_entry.h => entry.h} | 63 +- phtree/v16/for_each.h | 74 ++ phtree/v16/for_each_hc.h | 189 +++ phtree/v16/iterator_base.h | 156 +++ .../{ph_iterator_full.h => iterator_full.h} | 52 +- .../v16/{ph_iterator_hc.h => iterator_hc.h} | 104 +- ...ph_iterator_knn_hs.h => iterator_knn_hs.h} | 86 +- phtree/v16/iterator_simple.h | 68 + phtree/v16/node.h | 133 +- phtree/v16/ph_iterator_base.h | 111 -- phtree/v16/ph_iterator_simple.h | 64 - phtree/v16/phtree_v16.h | 236 +++- tools/bazel | 10 +- tools/build_rules/http.bzl | 111 +- tools/build_rules/utils.bzl | 322 +++++ 98 files changed, 10709 insertions(+), 2264 deletions(-) create mode 100644 .github/workflows/bazel.yml create mode 100644 .github/workflows/cmake.yml delete mode 100644 .travis.yml create mode 100644 phtree/benchmark/count_mm_d_benchmark.cc create mode 100644 phtree/benchmark/logging.cc create mode 100644 phtree/benchmark/logging.h create mode 100644 phtree/benchmark/query_mm_box_d_benchmark.cc create mode 100644 phtree/benchmark/query_mm_d_benchmark.cc create mode 100644 phtree/benchmark/update_mm_box_d_benchmark.cc create mode 100644 phtree/benchmark/update_mm_d_benchmark.cc create mode 100644 phtree/common/base_types.h create mode 100644 phtree/common/base_types_test.cc rename phtree/common/{ph_bits.h => bits.h} (58%) rename phtree/common/{ph_bits_test.cc => bits_test.cc} (66%) rename phtree/common/{ph_common.h => common.h} (66%) rename phtree/common/{ph_common_test.cc => common_test.cc} (83%) create mode 100644 phtree/common/converter.h create mode 100644 phtree/common/converter_test.cc rename phtree/common/{ph_tree_debug_helper.h => debug_helper.h} (92%) rename phtree/common/{ph_distance.h => distance.h} (56%) rename phtree/common/{ph_distance_test.cc => distance_test.cc} (61%) rename phtree/common/{ph_filter.h => filter.h} (61%) rename phtree/common/{ph_filter_test.cc => filter_test.cc} (82%) rename phtree/common/{ph_flat_array_map.h => flat_array_map.h} (70%) rename phtree/common/{ph_flat_array_map_test.cc => flat_array_map_test.cc} (67%) rename phtree/common/{ph_flat_sparse_map.h => flat_sparse_map.h} (78%) rename phtree/common/{ph_flat_sparse_map_test.cc => flat_sparse_map_test.cc} (58%) delete mode 100644 phtree/common/ph_base_types.h delete mode 100644 phtree/common/ph_base_types_test.cc delete mode 100644 phtree/common/ph_preprocessor.h delete mode 100644 phtree/common/ph_preprocessor_test.cc rename phtree/common/{ph_tree_stats.h => tree_stats.h} (80%) delete mode 100644 phtree/phtree_box_d.h create mode 100644 phtree/phtree_box_d_test_query_types.cc create mode 100644 phtree/phtree_box_f_test.cc delete mode 100644 phtree/phtree_d.h create mode 100644 phtree/phtree_d_test_custom_key.cc create mode 100644 phtree/phtree_d_test_filter.cc create mode 100644 phtree/phtree_f_test.cc create mode 100644 phtree/phtree_multimap.h create mode 100644 phtree/phtree_multimap_box_d_test.cc create mode 100644 phtree/phtree_multimap_d_test.cc create mode 100644 phtree/phtree_test_unique_ptr_values.cc rename phtree/v16/{debug_helper.h => debug_helper_v16.h} (84%) rename phtree/v16/{ph_entry.h => entry.h} (52%) create mode 100644 phtree/v16/for_each.h create mode 100644 phtree/v16/for_each_hc.h create mode 100644 phtree/v16/iterator_base.h rename phtree/v16/{ph_iterator_full.h => iterator_full.h} (66%) rename phtree/v16/{ph_iterator_hc.h => iterator_hc.h} (75%) rename phtree/v16/{ph_iterator_knn_hs.h => iterator_knn_hs.h} (65%) create mode 100644 phtree/v16/iterator_simple.h delete mode 100644 phtree/v16/ph_iterator_base.h delete mode 100644 phtree/v16/ph_iterator_simple.h create mode 100644 tools/build_rules/utils.bzl diff --git a/.bazelrc b/.bazelrc index 54d31d36..d3f8f8b9 100644 --- a/.bazelrc +++ b/.bazelrc @@ -1,5 +1,6 @@ # general build options build --compilation_mode=dbg +build --host_compilation_mode=fastbuild build --verbose_failures build --experimental_strict_action_env build --experimental_guard_against_concurrent_changes @@ -18,6 +19,7 @@ build --enable_runfiles --build_runfile_links test --test_output=errors build:release --compilation_mode=opt +build:release --host_compilation_mode=opt build:ci --keep_going build:ci --announce_rc @@ -28,15 +30,17 @@ build:linux --copt="-fvisibility=hidden" build:linux --copt="-fno-omit-frame-pointer" # for friendlier stack traces build:linux --copt="-Wno-error" build:linux --copt="-mavx" +build:linux --copt="-Wsequence-point" +build:linux --copt="-Wsign-compare" build:linux --cxxopt="-std=c++17" -build:linux --linkopt="-lm" -build:linux --linkopt="-latomic" -build:linux --linkopt="-ldl" build:linux-release --config=release build:linux-release --config=linux build:linux-release --copt="-O3" +build:benchmark --config=linux-release +build:benchmark --copt="-g" # To get code references in vtune + build:macos --copt="-fvisibility=hidden" build:macos --copt="-Wno-error" build:macos --cxxopt="-std=c++17" @@ -49,13 +53,16 @@ build:windows --features=static_link_msvcrt # the remote cache to never be hit due to differing build graph hashes. build:windows --action_env TMP=C:/Windows/Temp build:windows --action_env TEMP=C:/Windows/Temp +build:windows --cxxopt="/DWIN32_LEAN_AND_MEAN" # Config for when tests are running in a "slow" environment such as Valgrind or TSan build:slow-tests --copt="-DIMPROBABLE_SLOW_TEST" # Valgrind config. +build:valgrind-memcheck --config=linux build:valgrind-memcheck --config=slow-tests test:valgrind-memcheck --run_under=//tools/runners/sanitizers/valgrind-memcheck +run:valgrind-memcheck --run_under=//tools/runners/sanitizers/valgrind-memcheck # Sanitizer configs; for an overview of the sanitizers, see https://github.com/google/sanitizers/wiki # For more specific information on sanitizers: diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml new file mode 100644 index 00000000..8a77cd5f --- /dev/null +++ b/.github/workflows/bazel.yml @@ -0,0 +1,31 @@ +name: Bazel build + +on: [push] + +jobs: + build: + name: Run bazel + runs-on: ubuntu-latest + + defaults: + run: + shell: bash + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Setup bazel + # install bazelisk to install the appropriate bazel version + run: | + export PATH=$PATH:$HOME/bin && mkdir -p $HOME/bin + wget https://github.com/bazelbuild/bazelisk/releases/download/v1.5.0/bazelisk-linux-amd64 && chmod +x bazelisk-linux-amd64 && mv bazelisk-linux-amd64 $HOME/bin/bazel + wget https://github.com/bazelbuild/buildtools/releases/download/0.22.0/buildifier && chmod +x buildifier && mv buildifier $HOME/bin/ + + - name: Build + shell: bash + run: bazel build ... + + - name: Test + shell: bash + run: bazel test ... diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml new file mode 100644 index 00000000..f5a52b4d --- /dev/null +++ b/.github/workflows/cmake.yml @@ -0,0 +1,40 @@ +name: CMake build + +on: [push] + +env: + BUILD_TYPE: Release + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Create Build Environment + run: cmake -E make_directory ${{github.workspace}}/build + + - name: Configure CMake + # Use a bash shell so we can use the same syntax for environment variable + # access regardless of the host operating system + shell: bash + working-directory: ${{github.workspace}}/build + # Note the current convention is to use the -S and -B options here to specify source + # and build directories, but this is only available with CMake 3.13 and higher. + # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE + + - name: Build + working-directory: ${{github.workspace}}/build + shell: bash + # Execute the build. You can specify a specific target with "--target " + run: cmake --build . --config $BUILD_TYPE + + - name: Test + working-directory: ${{github.workspace}}/build + shell: bash + # Execute tests defined by the CMake configuration. + # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + # TODO Currently tests are run via bazel only. + run: ctest -C $BUILD_TYPE diff --git a/.gitignore b/.gitignore index d10e42f2..55098c94 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,10 @@ !.bazelversion !.clang-format !.gitignore -!.travis.yml +!.github bazel-* +!bazel-*.sh compile_commands.json perf.data* build + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 176c4612..00000000 --- a/.travis.yml +++ /dev/null @@ -1,56 +0,0 @@ -dist: bionic - -language: cpp - -compiler: - - gcc - -before_install: - ############################################################################ - # All the dependencies are installed in ${HOME}/deps/ - ############################################################################ - - DEPS_DIR="${HOME}/deps" - - mkdir -p ${DEPS_DIR} && cd ${DEPS_DIR} - - ############################################################################ - # Install CMake - ############################################################################ - - CMAKE_URL="https://cmake.org/files/v3.14/cmake-3.14.7-Linux-x86_64.tar.gz" - - mkdir cmake && travis_retry wget --no-check-certificate --quiet -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C cmake - - export PATH=${DEPS_DIR}/cmake/bin:${PATH} - - cmake --version - - ############################################################################ - # Install Bazel - ############################################################################ - - export PATH=$PATH:$HOME/bin && mkdir -p $HOME/bin - # install bazelisk as bazel to install the appropriate bazel version - - wget https://github.com/bazelbuild/bazelisk/releases/download/v1.5.0/bazelisk-linux-amd64 && chmod +x bazelisk-linux-amd64 && mv bazelisk-linux-amd64 $HOME/bin/bazel - - wget https://github.com/bazelbuild/buildtools/releases/download/0.22.0/buildifier && chmod +x buildifier && mv buildifier $HOME/bin/ - -install: - ############################################################################ - # Set up CMake - ############################################################################ - # Ensure we are in correct folder - - cd "${TRAVIS_BUILD_DIR}" - - # switch to build directory - - mkdir build - - cd build - # run cmake; the project's top-level CMakeLists.txt is located at '..' - - cmake .. - -script: - ############################################################################ - # Execute CMake - ############################################################################ - - cmake --build . - # run the example program - - ./examples/Example - # Back to root folder - - cd "${TRAVIS_BUILD_DIR}" - ############################################################################ - # Execute Bazel with tests - ############################################################################ - - bazel test ... diff --git a/BUILD b/BUILD index 91b986e5..0bf4e407 100644 --- a/BUILD +++ b/BUILD @@ -12,6 +12,11 @@ config_setting( constraint_values = ["@bazel_tools//platforms:osx"], ) +config_setting( + name = "macos_not_ios", + constraint_values = ["@bazel_tools//platforms:osx"], +) + config_setting( name = "windows", constraint_values = ["@bazel_tools//platforms:windows"], @@ -33,6 +38,11 @@ config_setting( }, ) +config_setting( + name = "windows-x86_64", + constraint_values = ["@bazel_tools//platforms:windows"], +) + # Buildifier sh_binary( diff --git a/CHANGELOG.md b/CHANGELOG.md index b864ed81..438859db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,33 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Nothing yet. + + +## [1.0.0] - 2021-03-23 +### Added +- API: `MultiMap`: A wrapper that makes PH-Tree behave as a multi-map. +- API: `erase(iterator)` +- API: `emplace_hint(iterator, ...)` +- API for `PhTreeF` and `PhTreeBoxF`: 32bit floating point options +- Support for custom key classes + +### Changed +- BREAKING CHANGE: The query functions now require a query box as input (instead of a min/max point pair) +- BREAKING CHANGE: `phtree_box_d.h` has been removed, please use `phtree.h instead. +- BREAKING CHANGE: `phtree_d.h` has been removed, please use `phtree.h` instead. +- BREAKING CHANGE: Data converters (IEEE, Multiply, etc) are now structs i.o. functions/functors +- BREAKING CHANGE: `PhFilterNoOp` has been renamed to `FilterNoOp` +- BREAKING CHANGE: kNN queries now always require the distance function to be specified. +- BREAKING CHANGE: Preprocessors have been refactored and renamed to Converter/ScalarConverter +- Moved CI builds from Travis to GitHub actions + +### Removed +- Nothing. + +### Fixed +- GCC warnings from `-Wsign-compare` and `-Wsequence-point`. + ## 0.1.0 - 2020-07-02 ### Added diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b7b3f5b..15fcb1a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.14) # set the project name -project(PH_Tree_Main VERSION 0.1.0 +project(PH_Tree_Main VERSION 1.0.0 DESCRIPTION "PH-Tree C++" LANGUAGES CXX) @@ -12,7 +12,7 @@ endif() # specify the C++ standard set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fpermissive") set(CMAKE_CXX_FLAGS_RELEASE "-O3") add_subdirectory(phtree) diff --git a/README.md b/README.md index 0366b904..fa62ffd4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.org/improbable-eng/phtree-cpp.svg?branch=master)](https://travis-ci.org/improbable-eng/phtree-cpp) +[![Build status](TODO)](TODO) # PH-Tree C++ @@ -23,15 +23,77 @@ See also : More information about PH-Trees (including a Java implementation) is available [here](http://www.phtree.org). +---------------------------------- -## Usage +## User Guide + +### API Usage + +[Key Types](#key-types) + +[Basic operations](#basic-operations) + +[Queries](#queries) + + * [for_each](#for-each-example) + + * [Iterators](#iterator-examples) + + * [Filters](#filters) + + * [Distance Functions](#distance-functions) + +[Converters](#converters) + +[Custom Key Types](#custom-key-types) + +[Restrictions](#restrictions) + +[Troubleshooting / FAQ](#troubleshooting-faq) + +### Performance + +[When to use a PH-Tree](#when-to-use-a-ph-tree) + +[Optimising Performance](#optimising-performance) + +### Compiling / Building + +[Build system & dependencies](#build-system-and-dependencies) + +[bazel](#bazel) + +[cmake](#cmake) + +---------------------------------- + +## API Usage + + #### Key Types -The PH-Tree supports three types of keys: +The **PH-Tree Map** supports out of the box five types: - `PhTreeD` uses `PhPointD` keys, which are vectors/points of 64 bit `double`. +- `PhTreeF` uses `PhPointF` keys, which are vectors/points of 32 bit `float`. - `PhTreeBoxD` uses `PhBoxD` keys, which consist of two `PhPointD` that define an axis-aligned rectangle/box. -- `PhTree` uses `PhPoint` keys, which are vectors/points of `std::int64` +- `PhTreeBoxF` uses `PhBoxF` keys, which consist of two `PhPointF` that define an axis-aligned rectangle/box. +- `PhTree` uses `PhPoint` keys, which are vectors/points of `std::int64` + +The **PH-Tree MultiMap** supports out of the box three types: +- `PhTreeMultiMapD` uses `PhPointD` keys, which are vectors/points of 64 bit `double`. +- `PhTreeMultiMapBoxD` uses `PhBoxD` keys, which consist of two `PhPointD` that define an axis-aligned rectangle/box. +- `PhTreeMultiMap` uses `PhPoint` keys, which are vectors/points of `std::int64` + +Additional tree types can be defined easily analogous to the types above, please refer to the declaration of the tree types +for an example. +Support for custom key classes (points and boxes) as well as custom coordinate mappings can be implemented using custom `Converter` classes, see below. +The `PhTreeMultiMap` is by default backed by `std::unordered_set` but this can be changed via a template parameter. + +The `PhTree` and `PhTreeMultiMap` types are available from `phtree.h` and `phtree_multimap.h`. + + + #### Basic Operations ```C++ @@ -46,22 +108,53 @@ PhPointD<3> p{1.1, 1.0, 10.}; // Some operations tree.emplace(p, my_data); +tree.emplace_hint(hint, p, my_data); tree.insert(p, my_data); tree[p] = my_data; tree.count(p); tree.find(p); tree.erase(p); +tree.erase(iterator); tree.size(); tree.empty(); tree.clear(); + +// Multi-map only +tree.relocate(p_old, p_new, value); +tree.estimate_count(query); ``` + + #### Queries -* Iterator over all elements: `auto q = tree.begin();` -* Iterator for box shaped window queries: `auto q = tree.begin_query(min, max);` -* Iterator for _k_ nearest neighbor queries: `auto q = tree.begin_knn_query(k, center_point);` +* For-each over all elements: `tree.fore_each(callback);` +* Iterator over all elements: `auto iterator = tree.begin();` +* For-each with box shaped window queries: `tree.fore_each(PhBoxD(min, max), callback);` +* Iterator for box shaped window queries: `auto q = tree.begin_query(PhBoxD(min, max));` +* Iterator for _k_ nearest neighbor queries: `auto q = tree.begin_knn_query(k, center_point, distance_function);` + + +##### For-each example +```C++ +// Callback for counting entries +struct Counter { + void operator()(PhPointD<3> key, T& t) { + ++n_; + } + size_t n_ = 0; +}; + +// Count entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3) +Counter callback; +tree.for_each({{1, 1, 1}, {3, 3, 3}}, callback); +// callback.n_ is now the number of entries in the box. +``` + + + +##### Iterator examples ```C++ // Iterate over all entries for (auto it : tree) { @@ -69,7 +162,7 @@ for (auto it : tree) { } // Iterate over all entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3) -for (auto it = tree.begin_query({1, 1, 1}, {3, 3, 3}); it != tree.end(); ++it) { +for (auto it = tree.begin_query({{1, 1, 1}, {3, 3, 3}}); it != tree.end(); ++it) { ... } @@ -79,11 +172,14 @@ for (auto it = tree.begin_knn_query(5, {1, 1, 1}); it != tree.end(); ++it) { } ``` + + +##### Filters All queries allow specifying an additional filter. The filter is called for every key/value pair that the would normally be returned (subject to query constraints) and to every node in the tree that the query decides to traverse (also subject to query constraints). Returning `true` in the filter does not change query behaviour, returning `false` means that the current value or child node is not returned or traversed. -An example of a geometric filter can be found in `phtree/common/ph_filter.h` in `PhFilterAABB`. +An example of a geometric filter can be found in `phtree/common/filter.h` in `FilterAABB`. ```C++ template struct FilterByValueId { @@ -104,21 +200,26 @@ for (auto it = tree.begin_query({1, 1, 1}, {3, 3, 3}, FilterByValueId<3, T>())); } ``` + + +##### Distance function Nearest neighbor queries can also use custom distance metrics, such as L1 distance. Note that this returns a special iterator that provides a function to get the distance of the current entry: ```C++ -#include "phtree/phtree_d.h" +#include "phtree/phtree.h" // Find 5 nearest neighbors of (1,1,1) using L1 distance -for (auto it = tree.begin_knn_query(5, {1, 1, 1}, PhDistanceLongL1<3>())); it != tree.end(); ++it) { +for (auto it = tree.begin_knn_query(5, {1, 1, 1}, DistanceL1<3>())); it != tree.end(); ++it) { std::cout << "distance = " << it.distance() << std::endl; ... } ``` -#### Pre- & Post-Processors + + +#### Converters The PH-Tree can internally only process integer keys. In order to use floating point coordinates, the floating point coordinates must be converted to integer coordinates. The `PhTreeD` and `PhTreeBoxD` use by default the `PreprocessIEEE` & `PostProcessIEEE` functions. The `IEEE` processor is a loss-less converter (in term of numeric @@ -140,57 +241,248 @@ double resultung_float = ((double)my_int) / 1000000.; ``` It is obvious that this approach leads to a loss of numerical precision. Moreover, the loss of precision depends on the actual range of the double values and the constant. -The chosen constant should probably be as large as possible, but small enough such that converted -values do not exceed the 64bit limit of `std::int64_4`. +The chosen constant should probably be as large as possible but small enough such that converted +values do not exceed the 64bit limit of `std::int64_t`. +Note that the PH-Tree provides several `ConverterMultiply` implementations for point/box and double/float. ```C++ -static const double MY_MULTIPLIER = 1000000.; -static const double MY_DIVIDER = 1./MY_MULTIPLIER; - template -PhPoint PreprocessMultiply(const PhPointD& point) { - PhPoint out; - for (dimension_t i = 0; i < DIM; ++i) { - out[i] = point[i] * MY_MULTIPLIER; +struct MyConverterMultiply : public ConverterPointBase { + explicit MyConverterMultiply(double multiplier) + : multiplier_{multiplier}, divider_{1. / multiplier} {} + + [[nodiscard]] PhPoint pre(const PhPointD& point) const { + PhPoint out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = point[i] * multiplier_; + } + return out; } - return out; -} -template -PhPointD PostprocessMultiply(const PhPoint& in) { - PhPointD out; - for (dimension_t i = 0; i < DIM; ++i) { - out[i] = ((double)in[i]) * MY_DIVIDER; + [[nodiscard]] PhPointD post(const PhPoint& in) const { + PhPointD out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = ((double)in[i]) * divider_; + } + return out; + } + + [[nodiscard]] auto pre_query(const PhBoxD& query_box) const { + return PhBox{pre(query_box.min()), pre(query_box.max())}; } - return out; -} + + const double multiplier_; + const double divider_; +}; template -using MyTree = PhTreeD, PreprocessMultiply, PostprocessMultiply>; +using MyTree = PhTreeD>; + +void test() { + MyConverterMultiply<3> converter{1000000}; + MyTree<3, MyData> tree(converter); + ... // use the tree +} ``` It is also worth trying out constants that are 1 or 2 orders of magnitude smaller or larger than this maximum value. -Experience shows that this may affect query performance by up to 10%. The reason for this is currently unknown. +Experience shows that this may affect query performance by up to 10%. This is due to a more compact structure + of the resulting index tree. + + + +##### Custom key types +With custom converters it is also possible to use your own custom classes as keys (instead of `PhPointD` or `PhBoxF`). +The following example defined custom `MyPoint` and `MyBox` types and a converter that allows using them with a `PhTree`: + +```c++ +struct MyPoint { + double x_; + double y_; + double z_; +}; + +using MyBox = std::pair; + +class MyConverterMultiply : public ConverterBase<3, 3, double, scalar_64_t, MyPoint, MyBox> { + using BASE = ConverterPointBase<3, double, scalar_64_t>; + using PointInternal = typename BASE::KeyInternal; + using QueryBoxInternal = typename BASE::QueryBoxInternal; + + public: + explicit MyConverterMultiply(double multiplier = 1000000) + : multiplier_{multiplier}, divider_{1. / multiplier} {} + + [[nodiscard]] PointInternal pre(const MyPoint& point) const { + return {static_cast(point.x_ * multiplier_), + static_cast(point.y_ * multiplier_), + static_cast(point.z_ * multiplier_)}; + } + + [[nodiscard]] MyPoint post(const PointInternal& in) const { + return {in[0] * divider_, in[1] * divider_, in[2] * divider_}; + } + + [[nodiscard]] QueryBoxInternal pre_query(const MyBox& box) const { + return {pre(box.first), pre(box.second)}; + } + + private: + const double multiplier_; + const double divider_; +}; +void test() { + MyConverterMultiply tm; + PhTree<3, Id, MyConverterMultiply> tree(tm); + ... // use the tree +} +``` + #### Restrictions * **C++**: Supports value types of `T` and `T*`, but not `T&` * **C++**: Return types of `find()`, `emplace()`, ... differ slightly from `std::map`, they have function `first()`, `second()` instead of fields of the same name. -* **General**: PH-Trees are **maps**, i.e. each coordinate can hold only *one* entry. In order to hold multiple coordinates per entry, one needs to insert lists or hashmaps as values. -* **General**: PH-Trees order entries internally in z-order (Morton order). However, the order is based on the (unsigned) bit represenation of keys, so negative coordinates are returned *after* positive coordinates. +* **General**: PH-Trees are **maps**, i.e. each coordinate can hold only *one* entry. In order to hold multiple values per coordinate + please use the `PhTreeMultiMap` implementations. +* **General**: PH-Trees order entries internally in z-order (Morton order). However, the order is based on the (unsigned) bit representation of keys, so negative coordinates are returned *after* positive coordinates. * **General**: The current implementation support between 2 and 63 dimensions. * **Differences to std::map**: There are several differences to `std::map`. Most notably for the iterators: * `begin()`/`end()` are not comparable with `<` or `>`. Only `it == tree.end()` and `it != tree.end()` is supported. * Value of `end()`: The tree has no linear memory layout, so there is no useful definition of a pointer pointing _after_ the last entry or any entry. This should be irrelevant for normal usage. + + +### Troubleshooting / FAQ + +**Problem**: The PH-Tree appears to be losing updates/insertions. + +**Solution**: Remember that the PH-Tree is a *map*, keys will not be inserted if an identical key already exists. +The easiest solution is to use one of the `PhTreeMultiMap` implementations. +Alternatively, this can be solved by turning the PH-Tree into a multi-map, for example by using something like `std::map` or `std::set` as member type: +`PhTree<3, std::set>`. The `set` instances can then be used to handle key conflicts by storing +multiple entries for the same key. The logic to handle conflicts must currently be implemented manually by the user. + +---------------------------------- + +## Performance + + + +### When to use a PH-Tree + +The PH-Tree is a multi-dimensional index or spatial index. This section gives a rough overview how the PH-Tree +compares to other spatial indexes, such as *k*D-trees, R-trees/BV-hierarchies or quadtrees. + +Disclaimer: This overview cannot be comprehensive (there are 100s of spatial indexes out there) and performance +depends heavily on the actual dataset, usage patterns, hardware, ... . + +**Generally, the PH-Tree tends to have the following advantages:** + +* Fast insertion/removal times. While some indexes, such as *k*-D-trees, trees can be build from scratch very fast, +they tend to be be much slower when removing entries or when indexing large datasets. Also, most indexes require +rebalancing which may result in unpredictable latency (R-trees) or may result in index degradation if delayed +(*k*D-trees). + +* Competitive query performance. Query performance is generally comparable to other index structures. The PH-Tree +is fast at looking up coordinates but requires more traversal than other indexes. This means it is especially +efficient if the query results are 'small', e.g. up to 100 results per query. + +* Scalability with large datasets. The PH-Tree's insert/remove/query performance tends to scale well to large +datasets with millions of entries. + +* Scalability with the number of dimensions. The PH-Tree has been shown to deal "well" with high dimensional data (1000k+ +dimensions). What does "well" mean? + * It works very well for up to 30 (sometimes 50) dimensions. **Please note that the C++ implementation has not been + optimised nearly as much as the Java implementation.** + * For more dimensions (Java was tested with 1000+ dimensions) the PH-Tree still has excellent + insertion/deletion performance. However, the query performance cannot compete with specialised + high-dim indexes such as cover-trees or pyramid-trees (these tend to be *very slow* on insertion/deletion though). + +* Modification operations (insert/delete) in a PH-Tree are guaranteed to modify only one Node (potentially +creating/deleting a second one). This guarantee can have advantages for concurrent implementations or when +serializing the index. Please note that this advantage is somewhat theoretical because this guarantee is not exploited +by the current implementation (it doesn't support concurrency or serialization). + + +**PH-Tree disadvantages:** + +* A PH-Tree is a *map*, not a *multi-map*. This project also provides `PhTreeMultiMap` implementations that store a +hash-set at each coordinate. +In practice, the overhead of storing sets appears to be usually small enough to not matter much. + +* PH-Trees are not very efficient in scenarios where queries tend to return large result sets in the order of 1000 or more. + + + + +### Optimising Performance + +There are numerous ways to improve performance. The following list gives an overview over the possibilities. + +1) **Use `for_each` instead of iterators**. This should improve performance of queries by 5%-10%. + +2) **Use `emplace_hint` if possible**. When updating the position of an entry, the naive way is to use `erase()`/`emplace()`. + With `emplace_hint`, insertion can avoid navigation to the target node if the insertion coordinate is close to the + removal coordinate. + ```c++ + auto iter = tree.find(old_position); + tree.erase(iter); + tree.emplace_hint(iter, new_position, value); + ``` + +3) **Store pointers instead of large data objects**. For example, use `PhTree<3, MyLargeClass*>` instead of +`PhTree<3, MyLargeClass>` if `MyLargeClass` is large. + * This prevents the PH-Tree from storing the values inside the tree. This should improve cache-locality + and thus performance when operating on the tree. + * Using pointers is also useful if construction/destruction of values is expensive. The reason is that + the tree has to construct and destruct objects internally. This may be avoidable but is currently still happening. + +4) **Use non-box query shapes**. Depending on the use case it may be more suitable to use a custom filter for querier. +For example: + + `tree.for_each(callback, MySphereFIlter(center, radius, tree.converter()));` + +5) **Use a different data converter**. The default converter of the PH-Tree results in a reasonably fast index. +Its biggest advantage is that it provides lossless conversion from floating point coordinates to PH-Tree coordinates +(integers) and back to floating point coordinates. + * The `ConverterMultiply` is a lossy converter but it tends to give 10% or more better performance. This is not caused + by faster operation in the converter itself but by a more compact tree shape. The example shows how to use a converter + that multiplies coordinates by 100'000, thus preserving roughly 5 fractional digits: + + `PhTreeD>` + +6) **Use custom key types**. By default, the PH-Tree accepts only coordinates in the form of its own key types, such + as `PhPointD`, `PhBoxF` or similar. To avoid conversion from custom types to PH-Tree key types, custom classes + can often be adapted to be accepted directly by the PH-Tree without conversion. This requires implementing a + custom converter as described in the section about [Custom Key Types](#custom-key-types). + +7) Advanced: **Adapt internal Node represenation**. Depending on the dimensionality `DIM`, the PH-Tree uses internally in +`Nodes` different container types to hold entries. By default it uses an array for `DIM<=3`, a vector +for `DIM<=8` and an ordered map for `DIM>8`. Adapting these thresholds can have strong effects on performance as well as +memory usage. +One example: Changing the threshold to use vector for `DIM==3` reduced performance of the `update_d` benchmark by 40%-50% but +improved performance of `query_d` by 15%-20%. The threshold is currently hardcoded. +The effects are not always easy to predict but here are some guidelines: + * "array" is the fastest solution for insert/update/remove type operations. Query performance is "ok". Memory consumption is + **O(DIM^2)** for every node regardless of number of entries in the node. + * "vector" is the fastest for queries but has for large nodes **worst case O(DIM^2)** insert/update/remove performance. + * "map" scales well with `DIM` but is for low values of `DIM` generally slower than "array" or "vector". + + +---------------------------------- + + ## Compiling the PH-Tree This section will guide you through the initial build system and IDE you need to go through in order to build and run custom versions of the PH-Tree on your machine. + + ### Build system & dependencies PH-Tree can be built with *cmake 3.14* or [Bazel](https://bazel.build) as build system. All of the code is written in C++ targeting the C++17 standard. @@ -223,6 +515,8 @@ To build on Windows, you'll need to have a version of Visual Studio 2019 install [Bazel](https://docs.bazel.build/versions/master/windows.html). + + ### Bazel Once you have set up your dependencies, you should be able to build the PH-Tree repository by running: ``` @@ -234,6 +528,8 @@ Similarly, you can run all unit tests with: bazel test ... ``` + + ### cmake ``` mkdir build @@ -242,8 +538,3 @@ cmake .. cmake --build . ./example/Example ``` - -## Troubleshooting - -**Problem**: The PH-Tree appears to be losing updates/insertions. -**Solution**: Remember that the PH-Tree is a *map*, keys will not be inserted if an identical key already exists. diff --git a/WORKSPACE b/WORKSPACE index 789ab5b5..707bc800 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -11,8 +11,8 @@ http_archive( load("@bazel_skylib//lib:versions.bzl", "versions") versions.check( - minimum_bazel_version = "2.0.0", - maximum_bazel_version = "2.0.0", + minimum_bazel_version = "3.4.1", + maximum_bazel_version = "3.4.1", ) # NOTE: We make third_party/ its own bazel workspace because it allows to run `bazel build ...` without @@ -27,9 +27,9 @@ local_repository( http_archive( name = "spdlog", build_file = "@third_party//spdlog:BUILD", - sha256 = "160845266e94db1d4922ef755637f6901266731c4cb3b30b45bf41efa0e6ab70", - strip_prefix = "spdlog-1.3.1", - url = "https://github.com/gabime/spdlog/archive/v1.3.1.tar.gz", + sha256 = "b38e0bbef7faac2b82fed550a0c19b0d4e7f6737d5321d4fd8f216b80f8aee8a", + strip_prefix = "spdlog-1.5.0", + url = "https://github.com/gabime/spdlog/archive/v1.5.0.tar.gz", ) http_archive( diff --git a/ci/includes/bazel.sh b/ci/includes/bazel.sh index a2aa3c85..8d1c7ee8 100755 --- a/ci/includes/bazel.sh +++ b/ci/includes/bazel.sh @@ -10,5 +10,5 @@ function runBazel() { } function getBazelVersion() { - echo "2.0.0" + echo "3.4.1" } diff --git a/examples/example.cc b/examples/example.cc index 9dad4b07..ebf8b531 100644 --- a/examples/example.cc +++ b/examples/example.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "../phtree/phtree_d.h" +#include "../phtree/phtree.h" #include using namespace improbable::phtree; @@ -39,7 +39,7 @@ int main() { std::cout << std::endl; std::cout << "All points in range:" << p1 << "/" << p2 << std::endl; - for (auto it = tree.begin_query(p2, p4); it != tree.end(); ++it) { + for (auto it = tree.begin_query({p2, p4}); it != tree.end(); ++it) { std::cout << " " << it.second() << " -> " << it.first() << std::endl; } std::cout << std::endl; diff --git a/phtree/BUILD b/phtree/BUILD index f94404b6..fe48ccc8 100644 --- a/phtree/BUILD +++ b/phtree/BUILD @@ -6,8 +6,7 @@ cc_library( ], hdrs = [ "phtree.h", - "phtree_box_d.h", - "phtree_d.h", + "phtree_multimap.h", ], linkstatic = True, visibility = [ @@ -57,6 +56,32 @@ cc_test( ], ) +cc_test( + name = "phtree_test_unique_ptr_values", + timeout = "long", + srcs = [ + "phtree_test_unique_ptr_values.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_test_move_only_values", + timeout = "long", + srcs = [ + "phtree_test_unique_ptr_values.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + cc_test( name = "phtree_d_test", timeout = "long", @@ -70,6 +95,32 @@ cc_test( ], ) +cc_test( + name = "phtree_d_test_filter", + timeout = "long", + srcs = [ + "phtree_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_d_test_custom_key", + timeout = "long", + srcs = [ + "phtree_d_test_custom_key.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + cc_test( name = "phtree_d_test_preprocessor", timeout = "long", @@ -83,6 +134,32 @@ cc_test( ], ) +cc_test( + name = "phtree_multimap_d_test", + timeout = "long", + srcs = [ + "phtree_multimap_d_test.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_box_d_test_query_types", + timeout = "long", + srcs = [ + "phtree_box_d_test_query_types.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + cc_test( name = "phtree_box_d_test", timeout = "long", @@ -95,3 +172,42 @@ cc_test( "//phtree/testing/gtest_main", ], ) + +cc_test( + name = "phtree_multimap_box_d_test", + timeout = "long", + srcs = [ + "phtree_multimap_box_d_test.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_f_test", + timeout = "long", + srcs = [ + "phtree_f_test.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_box_f_test", + timeout = "long", + srcs = [ + "phtree_box_f_test.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) diff --git a/phtree/benchmark/BUILD b/phtree/benchmark/BUILD index 37d15f7d..95315788 100644 --- a/phtree/benchmark/BUILD +++ b/phtree/benchmark/BUILD @@ -4,9 +4,11 @@ cc_library( name = "benchmark", testonly = True, srcs = [ + "logging.cc", ], hdrs = [ "benchmark_util.h", + "logging.h", ], visibility = [ "//visibility:public", @@ -18,6 +20,21 @@ cc_library( alwayslink = 1, ) +cc_binary( + name = "count_mm_d_benchmark", + testonly = True, + srcs = [ + "count_mm_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + cc_binary( name = "erase_benchmark", testonly = True, @@ -198,6 +215,36 @@ cc_binary( ], ) +cc_binary( + name = "query_mm_d_benchmark", + testonly = True, + srcs = [ + "query_mm_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "query_mm_box_d_benchmark", + testonly = True, + srcs = [ + "query_mm_box_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + cc_binary( name = "update_d_benchmark", testonly = True, @@ -213,6 +260,36 @@ cc_binary( ], ) +cc_binary( + name = "update_mm_d_benchmark", + testonly = True, + srcs = [ + "update_mm_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "update_mm_box_d_benchmark", + testonly = True, + srcs = [ + "update_mm_box_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + cc_binary( name = "update_box_d_benchmark", testonly = True, diff --git a/phtree/benchmark/benchmark_util.h b/phtree/benchmark/benchmark_util.h index c5b9fc31..5af70367 100644 --- a/phtree/benchmark/benchmark_util.h +++ b/phtree/benchmark/benchmark_util.h @@ -17,9 +17,7 @@ #ifndef PHTREE_BENCHMARK_UTIL_H #define PHTREE_BENCHMARK_UTIL_H -#include "phtree/common/ph_common.h" -#include -#include +#include "phtree/common/common.h" #include #include @@ -64,7 +62,7 @@ auto CreateDataCLUSTER = [](auto& points, // loop over clusters PhPointD cp; // center point of cluster size_t id = 0; - for (int c = 0; c < num_cluster; ++c) { + for (size_t c = 0; c < num_cluster; ++c) { for (dimension_t d = 0; d < DIM; ++d) { cp[d] = distribution(random_engine); } @@ -86,7 +84,7 @@ auto CreateDuplicates = [](auto& points, size_t num_unique_entries, size_t num_total_entities, std::uint32_t seed) { std::default_random_engine random_engine{seed}; std::uniform_int_distribution<> distribution(0, num_unique_entries); - for (int i = num_unique_entries; i < num_total_entities; ++i) { + for (size_t i = num_unique_entries; i < num_total_entities; ++i) { // copy some random other point or box points[i] = points[distribution(random_engine)]; } @@ -107,7 +105,7 @@ auto CreatePointDataMinMax = [](auto& points, // Create at least 1 unique point // Note that the following point generator is likely, but not guaranteed, to created unique // points. - int num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + size_t num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); points.reserve(num_entities); switch (test_generator) { case CUBE: diff --git a/phtree/benchmark/count_mm_d_benchmark.cc b/phtree/benchmark/count_mm_d_benchmark.cc new file mode 100644 index 00000000..1f503987 --- /dev/null +++ b/phtree/benchmark/count_mm_d_benchmark.cc @@ -0,0 +1,215 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for counting entries in multi-map implementations. + */ +namespace { + +const double GLOBAL_MAX = 10000; + +enum Scenario { TREE_WITH_MAP, MULTI_MAP }; + +using TestPoint = PhPointD<3>; +using QueryBox = PhBoxD<3>; +using payload_t = TestPoint; +using BucketType = std::set; + +struct Query { + QueryBox box{}; +}; + +template +using CONVERTER = ConverterIEEE; + +template +using TestMap = typename std::conditional_t< + SCENARIO == TREE_WITH_MAP, + PhTreeD>, + PhTreeMultiMapD>>; + +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, const Query& query); + void CreateQuery(Query& query); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr double query_endge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + TestMap tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities_) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + Query query{}; + for (auto _ : state) { + state.PauseTiming(); + CreateQuery(query); + state.ResumeTiming(); + + QueryWorld(state, query); + } +} + +template +void InsertEntry( + TestMap& tree, + const PhPointD& point, + const payload_t& data) { + BucketType& bucket = tree.emplace(point).first; + bucket.emplace(data); +} + +template +void InsertEntry( + TestMap& tree, const PhPointD& point, const payload_t& data) { + tree.emplace(point, data); +} + +struct CounterTreeWithMap { + void operator()(const PhPointD<3>& key, const BucketType& value) { + for (auto& x : value) { + n_ += x.size(); + } + } + size_t n_; +}; + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + CounterTreeWithMap counter{0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +size_t CountEntries(TestMap& tree, const Query& query) { + return tree.estimate_count(query.box); +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + // create data with about 10% duplicate coordinates + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX, 0.1); + for (size_t i = 0; i < num_entities_; ++i) { + InsertEntry(tree_, points_[i], points_[i]); + } + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { + size_t n = CountEntries(tree_, query); + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(Query& query) { + double radius = query_endge_length() * 0.5; + for (dimension_t d = 0; d < DIM; ++d) { + auto s = cube_distribution_(random_engine_); + query.box.min()[d] = s - radius; + query.box.max()[d] = s + radius; + } +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMultiMapM3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, avg_query_result_size +// PhTree +BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMapM3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTree +BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMapM3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/erase_benchmark.cc b/phtree/benchmark/erase_benchmark.cc index 38713a57..7ed086c7 100644 --- a/phtree/benchmark/erase_benchmark.cc +++ b/phtree/benchmark/erase_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -59,11 +58,7 @@ IndexBenchmark::IndexBenchmark( , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -86,13 +81,13 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); state.counters["total_remove_count"] = benchmark::Counter(0); state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template diff --git a/phtree/benchmark/erase_d_benchmark.cc b/phtree/benchmark/erase_d_benchmark.cc index 8f55c761..42fa545b 100644 --- a/phtree/benchmark/erase_d_benchmark.cc +++ b/phtree/benchmark/erase_d_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree_d.h" +#include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -59,11 +58,7 @@ IndexBenchmark::IndexBenchmark( , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -86,13 +81,13 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); state.counters["total_remove_count"] = benchmark::Counter(0); state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template diff --git a/phtree/benchmark/extent_benchmark.cc b/phtree/benchmark/extent_benchmark.cc index 5a8bd9e4..760a5749 100644 --- a/phtree/benchmark/extent_benchmark.cc +++ b/phtree/benchmark/extent_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -59,11 +58,7 @@ IndexBenchmark::IndexBenchmark( , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -76,7 +71,7 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); for (int i = 0; i < num_entities_; ++i) { tree_.emplace(points_[i], i); @@ -87,7 +82,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template diff --git a/phtree/benchmark/extent_benchmark_weird.cc b/phtree/benchmark/extent_benchmark_weird.cc index f36984c5..395e14a9 100644 --- a/phtree/benchmark/extent_benchmark_weird.cc +++ b/phtree/benchmark/extent_benchmark_weird.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -67,11 +66,7 @@ IndexBenchmark::IndexBenchmark( , cube_distribution_{0, GLOBAL_MAX} , points_(num_entities) , type_{type} { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -84,7 +79,7 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); for (int i = 0; i < num_entities_; ++i) { tree_.emplace(points_[i], i); @@ -95,26 +90,23 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } -template < - dimension_t DIM, - typename KEY = PhPoint, - PhPreprocessor PRE = PrePostNoOp> -class PhFilterBoxIntersection { +template , typename SCALAR = scalar_64_t> +class FilterBoxIntersection { public: - PhFilterBoxIntersection(const PhPoint& minInclude, const PhPoint& maxInclude) - : minIncludeBits{minInclude}, maxIncludeBits{maxInclude} {}; + FilterBoxIntersection(const PhPoint& min_include, const PhPoint& max_include) + : min_include_bits{min_include}, max_include_bits{max_include} {}; void set(const PhPointD& minExclude, const PhPointD& maxExclude) { - minIncludeBits = PRE(minExclude); - maxIncludeBits = PRE(maxExclude); + min_include_bits = PRE(minExclude); + max_include_bits = PRE(maxExclude); } [[nodiscard]] bool IsEntryValid(const PhPoint& key, const int& value) const { for (int i = 0; i < DIM; ++i) { - if (key[i] < minIncludeBits[i] || key[i] > maxIncludeBits[i]) { + if (key[i] < min_include_bits[i] || key[i] > max_include_bits[i]) { return false; } } @@ -123,16 +115,15 @@ class PhFilterBoxIntersection { [[nodiscard]] bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { // skip this for root node (bitsToIgnore == 64) - if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { return true; } - bit_mask_t maskMin = MAX_MASK << bits_to_ignore; - bit_mask_t maskMax = ~maskMin; + bit_mask_t mask_min = MAX_MASK << bits_to_ignore; + bit_mask_t mask_max = ~mask_min; for (size_t i = 0; i < prefix.size(); ++i) { - scalar_t minBits = prefix[i] & maskMin; - scalar_t maxBits = prefix[i] | maskMax; - if (maxBits < minIncludeBits[i] || minBits > maxIncludeBits[i]) { + if ((prefix[i] | mask_max) < min_include_bits[i] || + (prefix[i] & mask_min) > max_include_bits[i]) { return false; } } @@ -140,17 +131,14 @@ class PhFilterBoxIntersection { } private: - const PhPoint minIncludeBits; - const PhPoint maxIncludeBits; + const PhPoint min_include_bits; + const PhPoint max_include_bits; }; -template < - dimension_t DIM, - typename KEY = PhPoint, - PhPreprocessor PRE = PrePostNoOp> -class PhFilterTrue { +template > +class FilterTrue { public: - PhFilterTrue(const PhPoint& minInclude, const PhPoint& maxInclude) + FilterTrue(const PhPoint& minInclude, const PhPoint& maxInclude) : minIncludeBits{minInclude}, maxIncludeBits{maxInclude} {}; void set(const PhPointD& minExclude, const PhPointD& maxExclude) { @@ -171,13 +159,10 @@ class PhFilterTrue { const PhPoint maxIncludeBits; }; -template < - dimension_t DIM, - typename KEY = PhPoint, - PhPreprocessor PRE = PrePostNoOp> -class PhFilterTrue2 { +template > +class FilterTrue2 { public: - PhFilterTrue2() : minIncludeBits{}, maxIncludeBits{} {}; + FilterTrue2() : minIncludeBits{}, maxIncludeBits{} {}; [[nodiscard]] bool IsEntryValid(const PhPoint& key, const int& value) const { return true; @@ -193,7 +178,7 @@ class PhFilterTrue2 { }; template -struct PhFilterTrue3 { +struct FilterTrue3 { [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { return true; } @@ -224,12 +209,12 @@ void IndexBenchmark::QueryWorld(benchmark::State& state) { if (type_ == 0) { // PhPoint min{-GLOBAL_MAX, -GLOBAL_MAX, -GLOBAL_MAX}; // PhPoint max{GLOBAL_MAX, GLOBAL_MAX, GLOBAL_MAX}; - // PhFilterAABB filter(min, max); - // PhFilterBoxIntersection filter(min, max); - // PhFilterNoOp filter; - // PhFilterTrue filter(min, max); - // PhFilterTrue2 filter; - PhFilterTrue3 filter; + // FilterAABB filter(min, max); + // FilterBoxIntersection filter(min, max); + // FilterNoOp filter; + // FilterTrue filter(min, max); + // FilterTrue2 filter; + FilterTrue3 filter; auto q = tree_.begin(filter); // auto q = tree_.begin(); diff --git a/phtree/benchmark/find_benchmark.cc b/phtree/benchmark/find_benchmark.cc index c8341692..0cc90197 100644 --- a/phtree/benchmark/find_benchmark.cc +++ b/phtree/benchmark/find_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -68,11 +67,7 @@ IndexBenchmark::IndexBenchmark( , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -105,7 +100,7 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); for (int i = 0; i < num_entities_; ++i) { tree_.emplace(points_[i], i); @@ -116,7 +111,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template diff --git a/phtree/benchmark/insert_benchmark.cc b/phtree/benchmark/insert_benchmark.cc index 6be2c5cd..c48e7778 100644 --- a/phtree/benchmark/insert_benchmark.cc +++ b/phtree/benchmark/insert_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include -#include -#include using namespace improbable; using namespace improbable::phtree; @@ -65,11 +64,7 @@ IndexBenchmark::IndexBenchmark( , num_entities_(num_entities) , insertion_type_(insertionType) , points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -91,13 +86,13 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); state.counters["total_put_count"] = benchmark::Counter(0); state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template diff --git a/phtree/benchmark/insert_box_d_benchmark.cc b/phtree/benchmark/insert_box_d_benchmark.cc index ffdd45c1..817e848d 100644 --- a/phtree/benchmark/insert_box_d_benchmark.cc +++ b/phtree/benchmark/insert_box_d_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree_box_d.h" +#include "phtree/phtree.h" #include -#include -#include using namespace improbable; using namespace improbable::phtree; @@ -52,11 +51,7 @@ template IndexBenchmark::IndexBenchmark( benchmark::State& state, TestGenerator data_type, int num_entities) : data_type_{data_type}, num_entities_(num_entities), boxes_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -78,13 +73,13 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); state.counters["total_put_count"] = benchmark::Counter(0); state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template diff --git a/phtree/benchmark/insert_d_benchmark.cc b/phtree/benchmark/insert_d_benchmark.cc index 2c7daca8..7ef06a36 100644 --- a/phtree/benchmark/insert_d_benchmark.cc +++ b/phtree/benchmark/insert_d_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree_d.h" +#include "phtree/phtree.h" #include -#include -#include using namespace improbable; using namespace improbable::phtree; @@ -51,11 +50,7 @@ template IndexBenchmark::IndexBenchmark( benchmark::State& state, TestGenerator data_type, int num_entities) : data_type_{data_type}, num_entities_(num_entities), points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -77,13 +72,13 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); state.counters["total_put_count"] = benchmark::Counter(0); state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template diff --git a/phtree/benchmark/knn_d_benchmark.cc b/phtree/benchmark/knn_d_benchmark.cc index 131de0a9..7c56b852 100644 --- a/phtree/benchmark/knn_d_benchmark.cc +++ b/phtree/benchmark/knn_d_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree_d.h" +#include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -63,11 +62,7 @@ IndexBenchmark::IndexBenchmark( , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -85,7 +80,7 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); for (int i = 0; i < num_entities_; ++i) { tree_.emplace(points_[i], i); @@ -97,13 +92,15 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template void IndexBenchmark::QueryWorld(benchmark::State& state, PhPointD& center) { int n = 0; - for (auto q = tree_.begin_knn_query(knn_result_size_, center); q != tree_.end(); ++q) { + for (auto q = tree_.begin_knn_query(knn_result_size_, center, DistanceEuclidean<3>()); + q != tree_.end(); + ++q) { ++n; } diff --git a/phtree/benchmark/logging.cc b/phtree/benchmark/logging.cc new file mode 100644 index 00000000..51803f0c --- /dev/null +++ b/phtree/benchmark/logging.cc @@ -0,0 +1,33 @@ +// Copyright (c) Improbable Worlds Ltd, All Rights Reserved +#include "logging.h" + +namespace improbable::phtree::phbenchmark::logging { + +void SetupDefaultLogging() { + SetupLogging({}, spdlog::level::warn); +} + +void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level) { + auto& console_sink = sinks.emplace_back(std::make_shared()); + console_sink->set_level(log_level); + + // Find the minimum log level, in case one of the sinks passed to us has a lower log level. + const auto& sink_with_lowest_log_level = *std::min_element( + sinks.begin(), + sinks.end(), + [](const spdlog::sink_ptr& a, const spdlog::sink_ptr& b) -> bool { + return a->level() < b->level(); + }); + spdlog::level::level_enum min_log_level = + std::min(sink_with_lowest_log_level->level(), log_level); + + // Create the external logger, worker logger and the internal (default) logger from the same log + // sinks. Each logsink can use `GetLoggerTypeFromMessage` to determine which logger a message + // was logged to. + spdlog::set_default_logger( + std::make_shared(kInternalLoggerName, sinks.begin(), sinks.end())); + spdlog::set_level(min_log_level); + spdlog::flush_on(min_log_level); +} + +} // namespace improbable::phtree::phbenchmark::logging diff --git a/phtree/benchmark/logging.h b/phtree/benchmark/logging.h new file mode 100644 index 00000000..14b7ae68 --- /dev/null +++ b/phtree/benchmark/logging.h @@ -0,0 +1,117 @@ +// Copyright (c) Improbable Worlds Ltd, All Rights Reserved +#ifndef PHTREE_BENCHMARK_LOGGING_H +#define PHTREE_BENCHMARK_LOGGING_H + +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace improbable::phtree::phbenchmark::logging { + +#ifdef _WIN32 +using ConsoleSpdlogSink = spdlog::sinks::wincolor_stdout_sink_mt; +#else +using ConsoleSpdlogSink = spdlog::sinks::ansicolor_stdout_sink_mt; +#endif + +constexpr auto kInternalLoggerName = "internal"; + +// Sets up spdlog for internal and external. If you need to do some logging before doing this +// call, use instead CaptureLogMessagesToBufferSink()/SetupLoggingAndFlushBuffer. +void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level); + +// Sets up default logging typically used for tests/benchmarks. Also used for default +// initialization if the logging hasn't been initialized before the first logging line. +void SetupDefaultLogging(); + +template +inline void log( + spdlog::source_loc source, + spdlog::level::level_enum lvl, + spdlog::string_view_t fmt, + const Args&... args) { + spdlog::log(source, lvl, fmt, args...); +} + +template +inline void log(spdlog::level::level_enum lvl, spdlog::string_view_t fmt, const Args&... args) { + spdlog::log(spdlog::source_loc{}, lvl, fmt, args...); +} + +template +inline void trace(spdlog::string_view_t fmt, const Args&... args) { + log(spdlog::level::level_enum::trace, fmt, args...); +} + +template +inline void debug(spdlog::string_view_t fmt, const Args&... args) { + log(spdlog::level::level_enum::debug, fmt, args...); +} + +template +inline void info(spdlog::string_view_t fmt, const Args&... args) { + log(spdlog::level::level_enum::info, fmt, args...); +} + +template +inline void warn(spdlog::string_view_t fmt, const Args&... args) { + log(spdlog::level::level_enum::warn, fmt, args...); +} + +template +inline void error(spdlog::string_view_t fmt, const Args&... args) { + log(spdlog::level::level_enum::err, fmt, args...); +} + +template +inline void critical(spdlog::string_view_t fmt, const Args&... args) { + log(spdlog::level::level_enum::critical, fmt, args...); +} + +template +inline void log(spdlog::source_loc source, spdlog::level::level_enum lvl, const T& msg) { + spdlog::log(source, lvl, msg); +} + +template +inline void log(spdlog::level::level_enum lvl, const T& msg) { + spdlog::log(lvl, msg); +} + +template +inline void trace(const T& msg) { + log(spdlog::level::level_enum::trace, msg); +} + +template +inline void debug(const T& msg) { + log(spdlog::level::level_enum::debug, msg); +} + +template +inline void info(const T& msg) { + log(spdlog::level::level_enum::info, msg); +} + +template +inline void warn(const T& msg) { + log(spdlog::level::level_enum::warn, msg); +} + +template +inline void error(const T& msg) { + log(spdlog::level::level_enum::err, msg); +} + +template +inline void critical(const T& msg) { + log(spdlog::level::level_enum::critical, msg); +} + +} // namespace improbable::phtree::phbenchmark::logging + +#endif // PHTREE_BENCHMARK_LOGGING_H diff --git a/phtree/benchmark/query_benchmark.cc b/phtree/benchmark/query_benchmark.cc index b8734082..b0f50f39 100644 --- a/phtree/benchmark/query_benchmark.cc +++ b/phtree/benchmark/query_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -45,9 +44,9 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void QueryWorld(benchmark::State& state, PhPoint& min, PhPoint& max); + void QueryWorld(benchmark::State& state, PhBox& query); - void CreateQuery(PhPoint& min, PhPoint& max); + void CreateQuery(PhBox& query); const TestGenerator data_type_; const int num_entities_; @@ -75,11 +74,7 @@ IndexBenchmark::IndexBenchmark( , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -87,18 +82,17 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - PhPoint min; - PhPoint max; - CreateQuery(min, max); + PhBox query_box; + CreateQuery(query_box); state.ResumeTiming(); - QueryWorld(state, min, max); + QueryWorld(state, query_box); } } template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); for (int i = 0; i < num_entities_; ++i) { tree_.emplace(points_[i], i); @@ -109,14 +103,13 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template -void IndexBenchmark::QueryWorld( - benchmark::State& state, PhPoint& min, PhPoint& max) { +void IndexBenchmark::QueryWorld(benchmark::State& state, PhBox& query_box) { int n = 0; - for (auto q = tree_.begin_query(min, max); q != tree_.end(); ++q) { + for (auto q = tree_.begin_query(query_box); q != tree_.end(); ++q) { ++n; } @@ -127,15 +120,15 @@ void IndexBenchmark::QueryWorld( } template -void IndexBenchmark::CreateQuery(PhPoint& min, PhPoint& max) { +void IndexBenchmark::CreateQuery(PhBox& query_box) { int length = query_endge_length(); // scale to ensure query lies within boundary double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - scalar_t s = cube_distribution_(random_engine_); + auto s = cube_distribution_(random_engine_); s = s * scale; - min[d] = s; - max[d] = s + length; + query_box.min()[d] = s; + query_box.max()[d] = s + length; } } diff --git a/phtree/benchmark/query_box_d_benchmark.cc b/phtree/benchmark/query_box_d_benchmark.cc index 125dccd0..2b3fefa4 100644 --- a/phtree/benchmark/query_box_d_benchmark.cc +++ b/phtree/benchmark/query_box_d_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree_box_d.h" +#include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -29,26 +28,37 @@ namespace { const double GLOBAL_MAX = 10000; const double BOX_LEN = GLOBAL_MAX / 100.; +enum QueryType { MIN_MAX_ITER, MIN_MAX_FOR_EACH }; + +template +using BoxType = PhBoxD; + +template +using PointType = PhPointD; + +template +using TreeType = PhTreeBoxD; + /* * Benchmark for window queries. */ -template +template class IndexBenchmark { public: IndexBenchmark( benchmark::State& state, TestGenerator data_type, int num_entities, - double avg_query_result_size_); + double avg_query_result_size_ = 100); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - void QueryWorld(benchmark::State& state, PhPointD& min, PhPointD& max); + void QueryWorld(benchmark::State& state, BoxType& query_box); - void CreateQuery(PhPointD& min, PhPointD& max); + void CreateQuery(BoxType& query_box); const TestGenerator data_type_; const int num_entities_; @@ -58,14 +68,14 @@ class IndexBenchmark { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; - PhTreeBoxD tree_; + TreeType tree_; std::default_random_engine random_engine_; std::uniform_real_distribution<> cube_distribution_; - std::vector> boxes_; + std::vector> boxes_; }; -template -IndexBenchmark::IndexBenchmark( +template +IndexBenchmark::IndexBenchmark( benchmark::State& state, TestGenerator data_type, int num_entities, @@ -76,30 +86,25 @@ IndexBenchmark::IndexBenchmark( , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , boxes_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } -template -void IndexBenchmark::Benchmark(benchmark::State& state) { +template +void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - PhPointD min; - PhPointD max; - CreateQuery(min, max); + BoxType query_box; + CreateQuery(query_box); state.ResumeTiming(); - QueryWorld(state, min, max); + QueryWorld(state, query_box); } } -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); for (int i = 0; i < num_entities_; ++i) { tree_.emplace(boxes_[i], i); @@ -110,16 +115,45 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } +template +struct Counter { + void operator()(BoxType key, T& t) { + ++n_; + } + + size_t n_ = 0; +}; + template -void IndexBenchmark::QueryWorld( - benchmark::State& state, PhPointD& min, PhPointD& max) { - int n = 0; - for (auto q = tree_.begin_query(min, max); q != tree_.end(); ++q) { +size_t Count_MMI(TreeType& tree, BoxType& query_box) { + size_t n = 0; + for (auto q = tree.begin_query(query_box); q != tree.end(); ++q) { ++n; } + return n; +} + +template +size_t Count_MMFE(TreeType& tree, BoxType& query_box) { + Counter callback; + tree.for_each(query_box, callback); + return callback.n_; +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { + int n = 0; + switch (QUERY_TYPE) { + case MIN_MAX_ITER: + n = Count_MMI(tree_, query_box); + break; + case MIN_MAX_FOR_EACH: + n = Count_MMFE(tree_, query_box); + break; + } state.counters["total_result_count"] += n; state.counters["query_rate"] += 1; @@ -127,53 +161,87 @@ void IndexBenchmark::QueryWorld( state.counters["avg_result_count"] += n; } -template -void IndexBenchmark::CreateQuery(PhPointD& min, PhPointD& max) { +template +void IndexBenchmark::CreateQuery(BoxType& query_box) { int length = query_endge_length(); // scale to ensure query lies within boundary double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - scalar_t s = cube_distribution_(random_engine_); + auto s = cube_distribution_(random_engine_); s = s * scale; - min[d] = s; - max[d] = s + length; + query_box.min()[d] = s; + query_box.max()[d] = s + length; } } } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; +void PhTree3D_MMI(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MIN_MAX_ITER> benchmark{state, arguments...}; benchmark.Benchmark(state); } +template +void PhTree3D_MMFE(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MIN_MAX_FOR_EACH> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + // index type, scenario name, data_type, num_entities, query_result_size // PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000) ->Unit(benchmark::kMillisecond); // index type, scenario name, data_type, num_entities, query_result_size // PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_d_benchmark.cc b/phtree/benchmark/query_d_benchmark.cc index 1cd8202e..c35ff64c 100644 --- a/phtree/benchmark/query_d_benchmark.cc +++ b/phtree/benchmark/query_d_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree_d.h" +#include "phtree/phtree.h" #include -#include -#include #include using namespace improbable; @@ -28,26 +27,37 @@ namespace { const double GLOBAL_MAX = 10000; +enum QueryType { MIN_MAX_ITER, MIN_MAX_FOR_EACH }; + +template +using BoxType = PhBoxD; + +template +using PointType = PhPointD; + +template +using TreeType = PhTreeD; + /* * Benchmark for window queries. */ -template +template class IndexBenchmark { public: IndexBenchmark( benchmark::State& state, TestGenerator data_type, int num_entities, - double avg_query_result_size_); + double avg_query_result_size_ = 100); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - void QueryWorld(benchmark::State& state, PhPointD& min, PhPointD& max); + void QueryWorld(benchmark::State& state, BoxType& query_box); - void CreateQuery(PhPointD& min, PhPointD& max); + void CreateQuery(BoxType& query_box); const TestGenerator data_type_; const int num_entities_; @@ -57,14 +67,14 @@ class IndexBenchmark { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; - PhTreeD tree_; + TreeType tree_; std::default_random_engine random_engine_; std::uniform_real_distribution<> cube_distribution_; - std::vector> points_; + std::vector> points_; }; -template -IndexBenchmark::IndexBenchmark( +template +IndexBenchmark::IndexBenchmark( benchmark::State& state, TestGenerator data_type, int num_entities, @@ -72,33 +82,29 @@ IndexBenchmark::IndexBenchmark( : data_type_{data_type} , num_entities_(num_entities) , avg_query_result_size_(avg_query_result_size) +, tree_{} , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} , points_(num_entities) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } -template -void IndexBenchmark::Benchmark(benchmark::State& state) { +template +void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - PhPointD min; - PhPointD max; - CreateQuery(min, max); + BoxType query_box; + CreateQuery(query_box); state.ResumeTiming(); - QueryWorld(state, min, max); + QueryWorld(state, query_box); } } -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); for (int i = 0; i < num_entities_; ++i) { tree_.emplace(points_[i], i); @@ -109,16 +115,45 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } +template +struct Counter { + void operator()(PointType key, T& t) { + ++n_; + } + + size_t n_ = 0; +}; + template -void IndexBenchmark::QueryWorld( - benchmark::State& state, PhPointD& min, PhPointD& max) { - int n = 0; - for (auto q = tree_.begin_query(min, max); q != tree_.end(); ++q) { +size_t Count_MMI(TreeType& tree, BoxType& query_box) { + size_t n = 0; + for (auto q = tree.begin_query(query_box); q != tree.end(); ++q) { ++n; } + return n; +} + +template +size_t Count_MMFE(TreeType& tree, BoxType& query_box) { + Counter callback; + tree.for_each(query_box, callback); + return callback.n_; +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { + int n = 0; + switch (QUERY_TYPE) { + case MIN_MAX_ITER: + n = Count_MMI(tree_, query_box); + break; + case MIN_MAX_FOR_EACH: + n = Count_MMFE(tree_, query_box); + break; + } state.counters["total_result_count"] += n; state.counters["query_rate"] += 1; @@ -126,53 +161,87 @@ void IndexBenchmark::QueryWorld( state.counters["avg_result_count"] += n; } -template -void IndexBenchmark::CreateQuery(PhPointD& min, PhPointD& max) { +template +void IndexBenchmark::CreateQuery(BoxType& query_box) { int length = query_endge_length(); // scale to ensure query lies within boundary double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - scalar_t s = cube_distribution_(random_engine_); + auto s = cube_distribution_(random_engine_); s = s * scale; - min[d] = s; - max[d] = s + length; + query_box.min()[d] = s; + query_box.max()[d] = s + length; } } } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; +void PhTree3D_MMI(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MIN_MAX_ITER> benchmark{state, arguments...}; benchmark.Benchmark(state); } +template +void PhTree3D_MMFE(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MIN_MAX_FOR_EACH> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + // index type, scenario name, data_type, num_entities, query_result_size // PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000) ->Unit(benchmark::kMillisecond); // index type, scenario name, data_type, num_entities, query_result_size // PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100.0) +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_mm_box_d_benchmark.cc b/phtree/benchmark/query_mm_box_d_benchmark.cc new file mode 100644 index 00000000..d71c7084 --- /dev/null +++ b/phtree/benchmark/query_mm_box_d_benchmark.cc @@ -0,0 +1,222 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for querying entries in multi-map implementations. + */ +namespace { + +const double GLOBAL_MAX = 10000; +const double BOX_LEN = 100; + +enum Scenario { TREE_WITH_MAP, MULTI_MAP }; + +using TestPoint = PhPointD<3>; +using QueryBox = PhBoxD<3>; +using payload_t = PhBoxD<3>; +using BucketType = std::unordered_set; + +struct Query { + QueryBox box_{}; +}; + +template +using CONVERTER = ConverterBoxIEEE; + +template +using TestMap = typename std::conditional_t< + SCENARIO == TREE_WITH_MAP, + PhTreeBoxD>, + PhTreeMultiMapBoxD>>; + +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, const Query& query); + void CreateQuery(Query& query); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr double query_endge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + TestMap tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> boxes_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, boxes_(num_entities_) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + Query query{}; + for (auto _ : state) { + state.PauseTiming(); + CreateQuery(query); + state.ResumeTiming(); + + QueryWorld(state, query); + } +} + +template +void InsertEntry( + TestMap& tree, const PhBoxD& point, const payload_t& data) { + BucketType& bucket = tree.emplace(point).first; + bucket.emplace(data); +} + +template +void InsertEntry( + TestMap& tree, const PhBoxD& point, const payload_t& data) { + tree.emplace(point, data); +} + +bool CheckPosition(const payload_t& entity, const QueryBox& query) { + const auto& box = entity; + bool result = true; + for (int d = 0; d < 3; ++d) { + result = query.min()[d] <= box.max()[0] && query.max()[d] >= box.min()[d]; + } + return result; +} + +struct CounterTreeWithMap { + void operator()(const PhBoxD<3>& key, const BucketType& value) { + for (auto& x : value) { + n_ += CheckPosition(x, box_); + } + } + const QueryBox& box_; + size_t n_; +}; + +struct CounterMultiMap { + void operator()(const PhBoxD<3>& key, const payload_t& value) { + n_ += CheckPosition(value, box_); + } + const QueryBox& box_; + size_t n_; +}; + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + CounterTreeWithMap counter{query.box_, 0}; + tree.for_each(query.box_, counter); + return counter.n_; +} + +template +int CountEntries(TestMap& tree, const Query& query) { + CounterMultiMap counter{query.box_, 0}; + tree.for_each(query.box_, counter); + return counter.n_; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + // create data with about 10% duplicate coordinates + CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN, 0.1); + for (size_t i = 0; i < num_entities_; ++i) { + InsertEntry(tree_, boxes_[i], boxes_[i]); + } + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { + int n = CountEntries(tree_, query); + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(Query& query) { + double radius = query_endge_length() * 0.5; + for (dimension_t d = 0; d < DIM; ++d) { + auto x = cube_distribution_(random_engine_); + query.box_.min()[d] = x - radius; + query.box_.max()[d] = x + radius; + } +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMultiMapM3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, avg_query_result_size +// PhTree +BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMapM3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_mm_d_benchmark.cc b/phtree/benchmark/query_mm_d_benchmark.cc new file mode 100644 index 00000000..85558b9c --- /dev/null +++ b/phtree/benchmark/query_mm_d_benchmark.cc @@ -0,0 +1,230 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for querying entries in multi-map implementations. + * This benchmarks uses a SPHERE shaped query! + */ +namespace { + +const double GLOBAL_MAX = 10000; + +enum Scenario { TREE_WITH_MAP, MULTI_MAP }; + +using TestPoint = PhPointD<3>; +using QueryBox = PhBoxD<3>; +using payload_t = TestPoint; +using BucketType = std::set; + +struct Query { + QueryBox box{}; + TestPoint center{}; + double radius{}; +}; + +template +using CONVERTER = ConverterIEEE; + +template +using TestMap = typename std::conditional_t< + SCENARIO == TREE_WITH_MAP, + PhTreeD>, + PhTreeMultiMapD>>; + +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, const Query& query); + void CreateQuery(Query& query); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr double query_endge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + TestMap tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities_) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + Query query{}; + for (auto _ : state) { + state.PauseTiming(); + CreateQuery(query); + state.ResumeTiming(); + + QueryWorld(state, query); + } +} + +template +void InsertEntry( + TestMap& tree, + const PhPointD& point, + const payload_t& data) { + BucketType& bucket = tree.emplace(point).first; + bucket.emplace(data); +} + +template +void InsertEntry( + TestMap& tree, const PhPointD& point, const payload_t& data) { + tree.emplace(point, data); +} + +bool CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { + const auto& point = entity; + double dx = center[0] - point[0]; + double dy = center[1] - point[1]; + double dz = center[2] - point[2]; + return dx * dx + dy * dy + dz * dz <= radius * radius; +} + +struct CounterTreeWithMap { + void operator()(const PhPointD<3>& key, const BucketType& value) { + for (auto& x : value) { + // n_ += (x.entity_id_ >= 0); + n_ += CheckPosition(x, center_, radius_); + } + } + const TestPoint& center_; + double radius_; + size_t n_; +}; + +struct CounterMultiMap { + void operator()(const PhPointD<3>& key, const payload_t& value) { + n_ += CheckPosition(value, center_, radius_); + } + const TestPoint& center_; + double radius_; + size_t n_; +}; + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + CounterTreeWithMap counter{query.center, query.radius, 0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +int CountEntries(TestMap& tree, const Query& query) { + CounterMultiMap counter{query.center, query.radius, 0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + // create data with about 10% duplicate coordinates + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX, 0.1); + for (size_t i = 0; i < num_entities_; ++i) { + InsertEntry(tree_, points_[i], points_[i]); + } + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { + int n = CountEntries(tree_, query); + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(Query& query) { + double radius = query_endge_length() * 0.5; + for (dimension_t d = 0; d < DIM; ++d) { + auto x = cube_distribution_(random_engine_); + query.box.min()[d] = x - radius; + query.box.max()[d] = x + radius; + query.center[d] = x; + } + query.radius = radius; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMultiMapM3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, avg_query_result_size +// PhTree +BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMapM3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_box_d_benchmark.cc b/phtree/benchmark/update_box_d_benchmark.cc index 18acc8f3..ab825e26 100644 --- a/phtree/benchmark/update_box_d_benchmark.cc +++ b/phtree/benchmark/update_box_d_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree_box_d.h" +#include "phtree/phtree.h" #include -#include -#include using namespace improbable; using namespace improbable::phtree; @@ -31,11 +30,17 @@ constexpr double MOVE_DISTANCE = 10; const double GLOBAL_MAX = 10000; const double BOX_LEN = 10; +template +using BoxType = PhBoxD; + +template +using TreeType = PhTreeBoxD; + template struct UpdateOp { - scalar_t id_; - PhBoxD old_; - PhBoxD new_; + size_t id_; + BoxType old_; + BoxType new_; }; /* @@ -59,12 +64,12 @@ class IndexBenchmark { void UpdateWorld(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; - const int updates_per_round_; + const size_t num_entities_; + const size_t updates_per_round_; const double move_distance_; - PhTreeBoxD tree_; - std::vector> boxes_; + TreeType tree_; + std::vector> boxes_; std::vector> updates_; std::default_random_engine random_engine_; std::uniform_int_distribution<> entity_id_distribution_; @@ -85,11 +90,7 @@ IndexBenchmark::IndexBenchmark( , updates_(updates_per_round) , random_engine_{0} , entity_id_distribution_{0, num_entities - 1} { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } @@ -106,15 +107,15 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); - for (int i = 0; i < num_entities_; ++i) { + for (size_t i = 0; i < num_entities_; ++i) { tree_.emplace(boxes_[i], i); } state.counters["total_upd_count"] = benchmark::Counter(0); state.counters["update_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } template @@ -135,17 +136,21 @@ void IndexBenchmark::BuildUpdates() { template void IndexBenchmark::UpdateWorld(benchmark::State& state) { size_t initial_tree_size = tree_.size(); + size_t n = 0; for (auto& update : updates_) { size_t result_erase = tree_.erase(update.old_); auto result_emplace = tree_.emplace(update.new_, update.id_); - assert(result_erase == 1); - assert(result_emplace.second); + n += result_erase == 1 && result_emplace.second; + } + + if (n != updates_.size()) { + logging::error("Invalid update count: {}/{}", updates_.size(), n); } // For normal indexes we expect num_entities==size(), but the PhTree> index has // size() as low as (num_entities-duplicates). if (tree_.size() > num_entities_ || tree_.size() + updates_per_round_ < initial_tree_size) { - spdlog::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); + logging::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); } state.counters["total_upd_count"] += updates_per_round_; diff --git a/phtree/benchmark/update_d_benchmark.cc b/phtree/benchmark/update_d_benchmark.cc index 242691a8..f358c564 100644 --- a/phtree/benchmark/update_d_benchmark.cc +++ b/phtree/benchmark/update_d_benchmark.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "logging.h" #include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree_d.h" +#include "phtree/phtree.h" #include -#include -#include using namespace improbable; using namespace improbable::phtree; @@ -25,22 +24,30 @@ using namespace improbable::phtree::phbenchmark; namespace { -constexpr int UPDATES_PER_ROUND = 1000; -constexpr double MOVE_DISTANCE = 10; +constexpr size_t UPDATES_PER_ROUND = 1000; +std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; +enum UpdateType { ERASE_BY_KEY, ERASE_BY_ITER, EMPLACE_HINT }; + +template +using PointType = PhPointD; + +template +using TreeType = PhTreeD; + template struct UpdateOp { - scalar_t id_; - PhPointD old_; - PhPointD new_; + size_t id_; + PointType old_; + PointType new_; }; /* * Benchmark for updating the position of entries. */ -template +template class IndexBenchmark { public: IndexBenchmark( @@ -48,7 +55,7 @@ class IndexBenchmark { TestGenerator data_type, int num_entities, int updates_per_round = UPDATES_PER_ROUND, - double move_distance = MOVE_DISTANCE); + std::vector move_distance = MOVE_DISTANCE); void Benchmark(benchmark::State& state); @@ -58,42 +65,38 @@ class IndexBenchmark { void UpdateWorld(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; - const int updates_per_round_; - const double move_distance_; + const size_t num_entities_; + const size_t updates_per_round_; + const std::vector move_distance_; - PhTreeD tree_; - std::vector> points_; + TreeType tree_; + std::vector> points_; std::vector> updates_; std::default_random_engine random_engine_; std::uniform_int_distribution<> entity_id_distribution_; }; -template -IndexBenchmark::IndexBenchmark( +template +IndexBenchmark::IndexBenchmark( benchmark::State& state, TestGenerator data_type, int num_entities, int updates_per_round, - double move_distance) + std::vector move_distance) : data_type_{data_type} , num_entities_(num_entities) , updates_per_round_(updates_per_round) -, move_distance_(move_distance) +, move_distance_(std::move(move_distance)) , points_(num_entities) , updates_(updates_per_round) , random_engine_{0} , entity_id_distribution_{0, num_entities - 1} { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::warn); - + logging::SetupDefaultLogging(); SetupWorld(state); } -template -void IndexBenchmark::Benchmark(benchmark::State& state) { +template +void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); BuildUpdates(); @@ -103,47 +106,100 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { } } -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { - spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { + for (size_t i = 0; i < num_entities_; ++i) { tree_.emplace(points_[i], i); } state.counters["total_upd_count"] = benchmark::Counter(0); state.counters["update_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - spdlog::info("World setup complete."); + logging::info("World setup complete."); } -template -void IndexBenchmark::BuildUpdates() { +template +void IndexBenchmark::BuildUpdates() { + size_t move_id = 0; for (auto& update : updates_) { int point_id = entity_id_distribution_(random_engine_); update.id_ = point_id; update.old_ = points_[point_id]; for (dimension_t d = 0; d < DIM; ++d) { - update.new_[d] = update.old_[d] + move_distance_; + update.new_[d] = update.old_[d] + move_distance_[move_id]; } // update reference data points_[point_id] = update.new_; + + move_id = (move_id + 1) % move_distance_.size(); + } +} + +template +size_t UpdateByKey(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + // naive erase + emplace + size_t result_erase = tree.erase(update.old_); + auto result_emplace = tree.emplace(update.new_, update.id_); + n += result_erase == 1 && result_emplace.second; + } + return n; +} + +template +size_t UpdateByIter(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + // find + erase + emplace + // This is not immediately useful, but demonstrates that find + erase is as fast + // as erase(key). + auto iter = tree.find(update.old_); + size_t result_erase = tree.erase(iter); + auto result_emplace = tree.emplace(update.new_, update.id_); + n += result_erase == 1 && result_emplace.second; } + return n; } template -void IndexBenchmark::UpdateWorld(benchmark::State& state) { +size_t UpdateByIterHint(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + // find + erase + emplace_hint + auto iter = tree.find(update.old_); + size_t result_erase = tree.erase(iter); + auto result_emplace = tree.emplace_hint(iter, update.new_, update.id_); + n += result_erase == 1 && result_emplace.second; + } + return n; +} + +template +void IndexBenchmark::UpdateWorld(benchmark::State& state) { size_t initial_tree_size = tree_.size(); - for (auto& update : updates_) { - size_t result_erase = tree_.erase(update.old_); - auto result_emplace = tree_.emplace(update.new_, update.id_); - assert(result_erase == 1); - assert(result_emplace.second); + size_t n = 0; + switch (UPDATE_TYPE) { + case UpdateType::ERASE_BY_KEY: + n = UpdateByKey(tree_, updates_); + break; + case UpdateType::ERASE_BY_ITER: + n = UpdateByIter(tree_, updates_); + break; + case UpdateType::EMPLACE_HINT: + n = UpdateByIterHint(tree_, updates_); + break; + } + + if (n != updates_.size()) { + logging::error("Invalid update count: {}/{}", updates_.size(), n); } // For normal indexes we expect num_entities==size(), but the PhTree> index has // size() as low as (num_entities-duplicates). if (tree_.size() > num_entities_ || tree_.size() + updates_per_round_ < initial_tree_size) { - spdlog::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); + logging::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); } state.counters["total_upd_count"] += updates_per_round_; @@ -153,36 +209,101 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; +void PhTreeEraseKey3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::ERASE_BY_KEY> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeEraseIter3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::ERASE_BY_ITER> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeEmplaceHint3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::EMPLACE_HINT> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance // PhTree3D CUBE -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) ->Unit(benchmark::kMillisecond); // PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) ->Unit(benchmark::kMillisecond); +// index type, scenario name, data_type, num_entities, updates_per_round, move_distance +// PhTree3D CUBE +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) + ->Unit(benchmark::kMillisecond); + +// PhTree3D CLUSTER +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_type, num_entities, updates_per_round, move_distance +// PhTree3D CUBE +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) + ->Unit(benchmark::kMillisecond); + +// PhTree3D CLUSTER +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_mm_box_d_benchmark.cc b/phtree/benchmark/update_mm_box_d_benchmark.cc new file mode 100644 index 00000000..46e600d0 --- /dev/null +++ b/phtree/benchmark/update_mm_box_d_benchmark.cc @@ -0,0 +1,248 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for updating the position of entries in multi-map implementations. + */ +namespace { + +constexpr size_t UPDATES_PER_ROUND = 1000; +std::vector MOVE_DISTANCE = {0, 1.0, 10}; + +const double GLOBAL_MAX = 10000; +const double BOX_LEN = 100; + +enum Scenario { TREE_WITH_MAP, MULTI_MAP }; + +using payload_t = scalar_64_t; + +using BucketType = std::set; + +template +using CONVERTER = ConverterBoxIEEE; + +template +using TestMap = typename std::conditional_t< + SCENARIO == TREE_WITH_MAP, + PhTreeBoxD>, + PhTreeMultiMapBoxD>>; + +template +struct UpdateOp { + payload_t id_; + PhBoxD old_; + PhBoxD new_; +}; + +template +class IndexBenchmark { + public: + explicit IndexBenchmark( + benchmark::State& state, + size_t updates_per_round = UPDATES_PER_ROUND, + std::vector move_distance = MOVE_DISTANCE); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void BuildUpdates(); + void UpdateWorld(benchmark::State& state); + + const TestGenerator data_type_; + const size_t num_entities_; + const size_t updates_per_round_; + const std::vector move_distance_; + + TestMap tree_; + std::vector> boxes_; + std::vector> updates_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> entity_id_distribution_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, size_t updates_per_round, std::vector move_distance) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, updates_per_round_(updates_per_round) +, move_distance_(std::move(move_distance)) +, boxes_(num_entities_) +, updates_(updates_per_round) +, random_engine_{0} +, entity_id_distribution_{0, static_cast(num_entities_ - 1)} { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + BuildUpdates(); + state.ResumeTiming(); + + UpdateWorld(state); + } +} + +template +void InsertEntry( + TestMap& tree, const PhBoxD& point, payload_t data) { + BucketType& bucket = tree.emplace(point).first; + bucket.emplace(data); +} + +template +void InsertEntry( + TestMap& tree, const PhBoxD& point, payload_t data) { + tree.emplace(point, data); +} + +template +typename std::enable_if::type UpdateEntry( + TestMap& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + auto pair_with_bucket = tree.emplace(update.new_); + auto result_of_bucket_emplace = pair_with_bucket.first.emplace(update.id_); + if (!result_of_bucket_emplace.second) { + // emplace failed -> entry already exists. We are done! + ++n; + continue; + } + + // TODO implement erase_hint or find_hint or something? + + // Entry is already inserted, now remove old entry. + auto iter_old_bucket = tree.find(update.old_); + assert(iter_old_bucket != tree.end()); + bool success = iter_old_bucket->erase(update.id_); + if (iter_old_bucket->empty()) { + success &= tree.erase(iter_old_bucket); + } + n += success; + } + return n; +} + +template +typename std::enable_if::type UpdateEntry( + TestMap& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + n += tree.relocate(update.old_, update.new_, update.id_); + } + return n; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + // create data with about 10% duplicate coordinates + CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN, 0.1); + for (size_t i = 0; i < num_entities_; ++i) { + InsertEntry(tree_, boxes_[i], i); + } + + state.counters["total_upd_count"] = benchmark::Counter(0); + state.counters["update_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::BuildUpdates() { + size_t move_id = 0; + for (auto& update : updates_) { + auto box_id = entity_id_distribution_(random_engine_); + update.id_ = box_id; + update.old_ = boxes_[box_id]; + for (dimension_t d = 0; d < DIM; ++d) { + auto move_distance = move_distance_[move_id]; + update.new_.min()[d] = update.old_.min()[d] + move_distance; + update.new_.max()[d] = update.old_.max()[d] + move_distance; + } + // update reference data + boxes_[box_id] = update.new_; + + move_id = (move_id + 1) % move_distance_.size(); + } +} + +template +void IndexBenchmark::UpdateWorld(benchmark::State& state) { + size_t initial_tree_size = tree_.size(); + size_t n = 0; + n = UpdateEntry(tree_, updates_); + if (n != updates_.size()) { + logging::error("Invalid update count: {}/{}", updates_.size(), n); + } + + if constexpr (SCENARIO == MULTI_MAP) { + if (tree_.size() != num_entities_) { + logging::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); + } + } else { + // For normal indexes we expect num_entities==size(), but the PhTree> index has + // size() as low as (num_entities-duplicates). + if (tree_.size() > num_entities_ || tree_.size() + updates_per_round_ < initial_tree_size) { + logging::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); + } + } + + state.counters["total_upd_count"] += updates_per_round_; + state.counters["update_rate"] += updates_per_round_; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, updates_per_round, move_distance +// PhTree +BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMap3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_mm_d_benchmark.cc b/phtree/benchmark/update_mm_d_benchmark.cc new file mode 100644 index 00000000..739ca914 --- /dev/null +++ b/phtree/benchmark/update_mm_d_benchmark.cc @@ -0,0 +1,242 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for updating the position of entries in multi-map implementations. + */ +namespace { + +constexpr size_t UPDATES_PER_ROUND = 1000; +std::vector MOVE_DISTANCE = {0, 1.0, 10}; + +const double GLOBAL_MAX = 10000; + +enum Scenario { TREE_WITH_MAP, MULTI_MAP }; + +using payload_t = scalar_64_t; + +using BucketType = std::set; + +template +using PointType = PhPointD; + +template +using CONVERTER = ConverterIEEE; + +template +using TestMap = typename std::conditional_t< + SCENARIO == TREE_WITH_MAP, + PhTreeD>, + PhTreeMultiMapD>>; + +template +struct UpdateOp { + payload_t id_; + PointType old_; + PointType new_; +}; + +template +class IndexBenchmark { + public: + explicit IndexBenchmark( + benchmark::State& state, + size_t updates_per_round = UPDATES_PER_ROUND, + std::vector move_distance = MOVE_DISTANCE); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void BuildUpdates(); + void UpdateWorld(benchmark::State& state); + + const TestGenerator data_type_; + const size_t num_entities_; + const size_t updates_per_round_; + const std::vector move_distance_; + + TestMap tree_; + std::vector> points_; + std::vector> updates_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> entity_id_distribution_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, size_t updates_per_round, std::vector move_distance) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, updates_per_round_(updates_per_round) +, move_distance_(std::move(move_distance)) +, points_(num_entities_) +, updates_(updates_per_round) +, random_engine_{0} +, entity_id_distribution_{0, static_cast(num_entities_ - 1)} { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + BuildUpdates(); + state.ResumeTiming(); + + UpdateWorld(state); + } +} + +template +void InsertEntry( + TestMap& tree, const PointType& point, payload_t data) { + BucketType& bucket = tree.emplace(point).first; + bucket.emplace(data); +} + +template +void InsertEntry( + TestMap& tree, const PointType& point, payload_t data) { + tree.emplace(point, data); +} + +template +typename std::enable_if::type UpdateEntry( + TestMap& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + auto pair_with_bucket = tree.emplace(update.new_); + auto result_of_bucket_emplace = pair_with_bucket.first.emplace(update.id_); + if (!result_of_bucket_emplace.second) { + // emplace failed -> entry already exists. We are done! + ++n; + continue; + } + + // TODO implement erase_hint or find_hint or something? + + // Entry is already inserted, now remove old entry. + auto iter_old_bucket = tree.find(update.old_); + assert(iter_old_bucket != tree.end()); + bool success = iter_old_bucket->erase(update.id_); + if (iter_old_bucket->empty()) { + success &= tree.erase(iter_old_bucket); + } + n += success; + } + return n; +} + +template +typename std::enable_if::type UpdateEntry( + TestMap& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + n += tree.relocate(update.old_, update.new_, update.id_); + } + return n; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + // create data with about 10% duplicate coordinates + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX, 0.1); + for (size_t i = 0; i < num_entities_; ++i) { + InsertEntry(tree_, points_[i], i); + } + + state.counters["total_upd_count"] = benchmark::Counter(0); + state.counters["update_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::BuildUpdates() { + size_t move_id = 0; + for (auto& update : updates_) { + auto point_id = entity_id_distribution_(random_engine_); + update.id_ = point_id; + update.old_ = points_[point_id]; + for (dimension_t d = 0; d < DIM; ++d) { + update.new_[d] = update.old_[d] + move_distance_[move_id]; + } + // update reference data + points_[point_id] = update.new_; + + move_id = (move_id + 1) % move_distance_.size(); + } +} + +template +void IndexBenchmark::UpdateWorld(benchmark::State& state) { + size_t initial_tree_size = tree_.size(); + size_t n = 0; + n = UpdateEntry(tree_, updates_); + if (n != updates_.size()) { + logging::error("Invalid update count: {}/{}", updates_.size(), n); + } + + // For normal indexes we expect num_entities==size(), but the PhTree> index has + // size() as low as (num_entities-duplicates). + if (tree_.size() > num_entities_ || tree_.size() + updates_per_round_ < initial_tree_size) { + logging::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); + } + + state.counters["total_upd_count"] += updates_per_round_; + state.counters["update_rate"] += updates_per_round_; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, updates_per_round, move_distance +// PhTree +BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMap3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/common/BUILD b/phtree/common/BUILD index e4927f35..7ef3b6bf 100644 --- a/phtree/common/BUILD +++ b/phtree/common/BUILD @@ -3,16 +3,16 @@ package(default_visibility = ["//visibility:private"]) cc_library( name = "common", hdrs = [ - "ph_base_types.h", - "ph_bits.h", - "ph_common.h", - "ph_distance.h", - "ph_filter.h", - "ph_flat_array_map.h", - "ph_flat_sparse_map.h", - "ph_preprocessor.h", - "ph_tree_debug_helper.h", - "ph_tree_stats.h", + "base_types.h", + "bits.h", + "common.h", + "converter.h", + "debug_helper.h", + "distance.h", + "filter.h", + "flat_array_map.h", + "flat_sparse_map.h", + "tree_stats.h", ], visibility = [ "//visibility:public", @@ -25,7 +25,7 @@ cc_test( name = "base_types_test", timeout = "long", srcs = [ - "ph_base_types_test.cc", + "base_types_test.cc", ], linkstatic = True, deps = [ @@ -38,7 +38,7 @@ cc_test( name = "bits_test", timeout = "long", srcs = [ - "ph_bits_test.cc", + "bits_test.cc", ], linkstatic = True, deps = [ @@ -51,7 +51,7 @@ cc_test( name = "common_test", timeout = "long", srcs = [ - "ph_common_test.cc", + "common_test.cc", ], linkstatic = True, deps = [ @@ -64,7 +64,7 @@ cc_test( name = "distance_test", timeout = "long", srcs = [ - "ph_distance_test.cc", + "distance_test.cc", ], linkstatic = True, deps = [ @@ -77,7 +77,7 @@ cc_test( name = "filter_test", timeout = "long", srcs = [ - "ph_filter_test.cc", + "filter_test.cc", ], linkstatic = True, deps = [ @@ -90,7 +90,7 @@ cc_test( name = "flat_array_map_test", timeout = "long", srcs = [ - "ph_flat_array_map_test.cc", + "flat_array_map_test.cc", ], linkstatic = True, deps = [ @@ -103,7 +103,7 @@ cc_test( name = "flat_sparse_map_test", timeout = "long", srcs = [ - "ph_flat_sparse_map_test.cc", + "flat_sparse_map_test.cc", ], linkstatic = True, deps = [ @@ -116,7 +116,7 @@ cc_test( name = "preprocessor_test", timeout = "long", srcs = [ - "ph_preprocessor_test.cc", + "converter_test.cc", ], linkstatic = True, deps = [ diff --git a/phtree/common/CMakeLists.txt b/phtree/common/CMakeLists.txt index 0cd11641..bb07ca12 100644 --- a/phtree/common/CMakeLists.txt +++ b/phtree/common/CMakeLists.txt @@ -2,14 +2,14 @@ cmake_minimum_required(VERSION 3.14) target_sources(phtree PRIVATE - ph_common.h - ph_base_types.h - ph_bits.h - ph_distance.h - ph_filter.h - ph_flat_array_map.h - ph_flat_sparse_map.h - ph_preprocessor.h - ph_tree_debug_helper.h - ph_tree_stats.h + common.h + base_types.h + bits.h + distance.h + filter.h + flat_array_map.h + flat_sparse_map.h + converter.h + debug_helper.h + tree_stats.h ) diff --git a/phtree/common/base_types.h b/phtree/common/base_types.h new file mode 100644 index 00000000..05f7fd83 --- /dev/null +++ b/phtree/common/base_types.h @@ -0,0 +1,171 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_BASE_TYPES_H +#define PHTREE_COMMON_BASE_TYPES_H + +#include +#include +#include +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains specifications for various types used in the PH-Tree, including + * PhPoint, PhPointD and PhPointBox. + */ +namespace improbable::phtree { + +// ************************************************************************ +// Constants and base types +// ************************************************************************ + +using scalar_64_t = int64_t; +using scalar_32_t = int32_t; +using scalar_16_t = int16_t; + +// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices) +using bit_width_t = uint16_t; +// Number of bit for 'scalar_64_t' or 'scalar_32_t'. Note that 'digits' does _not_ include sign bit, +// so e.g. int64_t has 63 `digits`, however we need all bits, i.e. 64. +template +static constexpr bit_width_t MAX_BIT_WIDTH = + std::numeric_limits::digits + std::numeric_limits::is_signed; +// Bit mask +template +using bit_mask_t = typename std::make_unsigned::type; +template +static constexpr bit_mask_t MAX_MASK = std::numeric_limits>::max(); +using dimension_t = size_t; // Number of dimensions +using hc_pos_t = uint64_t; + +// ************************************************************************ +// Basic structs and classes +// ************************************************************************ + +// The SCALAR type needs to be a signet integer, i.e. int32_t or int64_t. +template +using PhPoint = std::array; + +template +using PhPointD = std::array; + +template +using PhPointF = std::array; + +template +class PhBox { + using Point = PhPoint; + + public: + explicit PhBox() = default; + + PhBox(const PhBox& orig) = default; + + PhBox(const std::array& min, const std::array& max) + : min_{min}, max_{max} {} + + [[nodiscard]] const Point& min() const { + return min_; + } + + [[nodiscard]] const Point& max() const { + return max_; + } + + [[nodiscard]] Point& min() { + return min_; + } + + [[nodiscard]] Point& max() { + return max_; + } + + void min(const std::array& new_min) { + min_ = new_min; + } + + void max(const std::array& new_max) { + max_ = new_max; + } + + auto operator==(const PhBox& other) const -> bool { + return min_ == other.min_ && max_ == other.max_; + } + + private: + Point min_; + Point max_; +}; + +template +using PhBoxD = PhBox; + +template +using PhBoxF = PhBox; + +template +std::ostream& operator<<(std::ostream& os, const PhPoint& data) { + assert(DIM >= 1); + os << "["; + for (dimension_t i = 0; i < DIM - 1; ++i) { + os << data[i] << ","; + } + os << data[DIM - 1] << "]"; + return os; +} + +template +std::ostream& operator<<(std::ostream& os, const PhBox& data) { + os << data.min() << ":" << data.max(); + return os; +} + +// Taken from boost::hash_combine +template +inline void hash_combine(std::size_t& seed, const T& v) { + seed ^= std::hash{}(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +} // namespace improbable::phtree + +namespace std { +template +struct hash> { + size_t operator()(const improbable::phtree::PhPoint& x) const { + std::size_t hash_val = 0; + for (improbable::phtree::dimension_t i = 0; i < DIM; ++i) { + improbable::phtree::hash_combine(hash_val, x[i]); + } + return hash_val; + } +}; +template +struct hash> { + size_t operator()(const improbable::phtree::PhBox& x) const { + std::size_t hash_val = 0; + for (improbable::phtree::dimension_t i = 0; i < DIM; ++i) { + improbable::phtree::hash_combine(hash_val, x.min()[i]); + improbable::phtree::hash_combine(hash_val, x.max()[i]); + } + return hash_val; + } +}; +} // namespace std +#endif // PHTREE_COMMON_BASE_TYPES_H diff --git a/phtree/common/base_types_test.cc b/phtree/common/base_types_test.cc new file mode 100644 index 00000000..04a45d6a --- /dev/null +++ b/phtree/common/base_types_test.cc @@ -0,0 +1,86 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base_types.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeBaseTypesTest, PhPointD) { + PhPointD<3> point{1, 2, 3}; + + for (int i = 0; i < 3; i++) { + ASSERT_EQ(point[i], i + 1); + } + + // try assigning coordinates + point = {7, 8, 9}; + for (int i = 0; i < 3; i++) { + ASSERT_EQ(point[i], i + 7); + } + + // Test '<<' operator + std::stringstream ss; + ss << point; + ASSERT_EQ("[7,8,9]", ss.str()); +} + +TEST(PhTreeBaseTypesTest, PhPointF) { + PhPoint<3, float> point{1, 2, 3}; + + for (int i = 0; i < 3; i++) { + ASSERT_EQ(point[i], i + 1); + } + + // try assigning coordinates + point = {7, 8, 9}; + for (int i = 0; i < 3; i++) { + ASSERT_EQ(point[i], i + 7); + } + + // Test '<<' operator + std::stringstream ss; + ss << point; + ASSERT_EQ("[7,8,9]", ss.str()); +} + +TEST(PhTreeBaseTypesTest, PhBoxD) { + PhBoxD<3> box({1, 2, 3}, {4, 5, 6}); + + for (int i = 0; i < 3; i++) { + ASSERT_EQ(box.min()[i], i + 1); + ASSERT_EQ(box.max()[i], i + 4); + } + + // try assigning coordinates + box.min() = {7, 8, 9}; + box.max() = {10, 11, 12}; + for (int i = 0; i < 3; i++) { + ASSERT_EQ(box.min()[i], i + 7); + ASSERT_EQ(box.max()[i], i + 10); + } + + // Test that hash function works + std::unordered_set> map; + ASSERT_TRUE(map.insert(box).second); + ASSERT_NE(42, std::hash>()(box)); + + // Test '<<' operator + std::stringstream ss; + ss << box; + ASSERT_EQ("[7,8,9]:[10,11,12]", ss.str()); +} diff --git a/phtree/common/ph_bits.h b/phtree/common/bits.h similarity index 58% rename from phtree/common/ph_bits.h rename to phtree/common/bits.h index ba264e2c..46925d67 100644 --- a/phtree/common/ph_bits.h +++ b/phtree/common/bits.h @@ -14,23 +14,19 @@ * limitations under the License. */ -#ifndef PHTREE_PH_COMMON_BITS_H -#define PHTREE_PH_COMMON_BITS_H +#ifndef PHTREE_COMMON_BITS_H +#define PHTREE_COMMON_BITS_H -#include "ph_base_types.h" +#include "base_types.h" #include -#if defined(__clang__) -#include -#elif defined(__GNUC__) -#include -#elif defined(_MSC_VER) +#if defined(_MSC_VER) // https://docs.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=vs-2019 -#include +#include #endif /* - * PLEASE do not include this file directly, it is included via ph_common.h. + * PLEASE do not include this file directly, it is included via common.h. * * This file defines how certain bit level operations are implemented, such as: * - count leading zeroes @@ -69,7 +65,7 @@ inline bit_width_t NumberOfLeadingZeros(std::uint64_t bit_string) { return n; } -inline bit_width_t NumberOfLeadingZeros(std::int32_t bit_string) { +inline bit_width_t NumberOfLeadingZeros(std::uint32_t bit_string) { if (bit_string == 0) { return 32; } @@ -130,32 +126,55 @@ inline bit_width_t NumberOfTrailingZeros(std::uint64_t bit_string) { } return n - ((x << 1) >> 31); } -} // namespace -#if defined(__clang__) -#define CountLeadingZeros(bits) NumberOfLeadingZeros(bits) -//#define CountLeadingZeros(bits) __lzcnt64(bits) -//#define CountTrailingZeros(bits) NumberOfTrailingZeros(bits) -#define CountTrailingZeros(bits) __tzcnt_u64(bits) +#if defined(__clang__) || defined(__GNUC__) +// See https://en.cppreference.com/w/cpp/language/types +inline bit_width_t NumberOfLeadingZeros_GCC_BUILTIN(std::uint64_t bit_string) { + return bit_string == 0 ? 64U : __builtin_clzll(bit_string); +} -#elif defined(__GNUC__) -#define CountLeadingZeros(bits) NumberOfLeadingZeros(bits) - // TODO this works only on 64 bit arch, otherwise __builtin_clzll (double 'l') -//#define CountLeadingZeros(bits) __builtin_clzl(bits) -#define CountTrailingZeros(bits) NumberOfTrailingZeros(bits) - // TODO this works only on 64 bit arch, otherwise __builtin_ctzll (double 'l') -//#define CountTrailingZeros(bits) __builtin_ctzl(bits) +inline bit_width_t NumberOfLeadingZeros_GCC_BUILTIN(std::uint32_t bit_string) { + return bit_string == 0 ? 32U : __builtin_clz(bit_string); +} + +inline bit_width_t NumberOfTrailingZeros_GCC_BUILTIN(std::uint64_t bit_string) { + return bit_string == 0 ? 64U : __builtin_ctzll(bit_string); +} + +inline bit_width_t NumberOfTrailingZeros_GCC_BUILTIN(std::uint32_t bit_string) { + return bit_string == 0 ? 32U : __builtin_ctz(bit_string); +} #elif defined(_MSC_VER) // https://docs.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=vs-2019 -// static inline size_t CountLeadingZeros(std::uint64_t bits) { -// // TODO there is alo __lzcnt_u64 (AMD/INTEL) -//// TODO this is MS: -> #include -// #define CountTrailingZeros(bits) __lzcnt64(bits); -#define CountLeadingZeros(bits) NumberOfLeadingZeros(bits) -#define CountTrailingZeros(bits) NumberOfTrailingZeros(bits) -//#define CountTrailingZeros(bits) _tzcnt_u64(bits); +inline bit_width_t NumberOfLeadingZeros_MSVC_BUILTIN(std::uint64_t bit_string) { + unsigned long leading_zero = 0; + return _BitScanReverse64(&leading_zero, bit_string) ? 63 - leading_zero : 64U; +} + +inline bit_width_t NumberOfLeadingZeros_MSVC_BUILTIN(std::uint32_t bit_string) { + unsigned long leading_zero = 0; + return _BitScanReverse(&leading_zero, bit_string) ? 31 - leading_zero : 32U; +} + +inline bit_width_t NumberOfTrailingZeros_MSVC_BUILTIN(std::uint64_t bit_string) { + unsigned long trailing_zero = 0; + return _BitScanForward64(&trailing_zero, bit_string) ? trailing_zero : 64U; +} +inline bit_width_t NumberOfTrailingZeros_MSVC_BUILTIN(std::uint32_t bit_string) { + unsigned long trailing_zero = 0; + return _BitScanForward(&trailing_zero, bit_string) ? trailing_zero : 32U; +} +#endif +} // namespace + +#if defined(__clang__) || defined(__GNUC__) +#define CountLeadingZeros(bits) NumberOfLeadingZeros_GCC_BUILTIN(bits) +#define CountTrailingZeros(bits) NumberOfTrailingZeros_GCC_BUILTIN(bits) +#elif defined(_MSC_VER) +#define CountLeadingZeros(bits) NumberOfLeadingZeros_MSVC_BUILTIN(bits) +#define CountTrailingZeros(bits) NumberOfTrailingZeros_MSVC_BUILTIN(bits) #else #define CountLeadingZeros(bits) NumberOfLeadingZeros(bits) #define CountTrailingZeros(bits) NumberOfTrailingZeros(bits) @@ -163,4 +182,4 @@ inline bit_width_t NumberOfTrailingZeros(std::uint64_t bit_string) { } // namespace improbable::phtree -#endif // PHTREE_PH_COMMON_BITS_H +#endif // PHTREE_COMMON_BITS_H diff --git a/phtree/common/ph_bits_test.cc b/phtree/common/bits_test.cc similarity index 66% rename from phtree/common/ph_bits_test.cc rename to phtree/common/bits_test.cc index a451aba6..e4129bf3 100644 --- a/phtree/common/ph_bits_test.cc +++ b/phtree/common/bits_test.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "ph_bits.h" +#include "bits.h" #include #include using namespace improbable::phtree; -TEST(PhTreeBitsTest, CountLeadingZeros) { +TEST(PhTreeBitsTest, CountLeadingZeros64) { std::uint64_t x = 1; x <<= 63; for (int i = 0; i < 64; i++) { @@ -30,7 +30,7 @@ TEST(PhTreeBitsTest, CountLeadingZeros) { } } -TEST(PhTreeBitsTest, CountTrailingZeros) { +TEST(PhTreeBitsTest, CountTrailingZeros64) { std::uint64_t x = 1; for (int i = 0; i < 64; i++) { int ctz = CountTrailingZeros(x); @@ -38,3 +38,22 @@ TEST(PhTreeBitsTest, CountTrailingZeros) { x <<= 1; } } + +TEST(PhTreeBitsTest, CountLeadingZeros32) { + std::uint32_t x = 1; + x <<= 31; + for (int i = 0; i < 32; i++) { + int ctz = CountLeadingZeros(x); + ASSERT_EQ(i, ctz); + x >>= 1; + } +} + +TEST(PhTreeBitsTest, CountTrailingZeros32) { + std::uint32_t x = 1; + for (int i = 0; i < 32; i++) { + int ctz = CountTrailingZeros(x); + ASSERT_EQ(i, ctz); + x <<= 1; + } +} diff --git a/phtree/common/ph_common.h b/phtree/common/common.h similarity index 66% rename from phtree/common/ph_common.h rename to phtree/common/common.h index 1c3e6b9d..32736fe5 100644 --- a/phtree/common/ph_common.h +++ b/phtree/common/common.h @@ -14,17 +14,17 @@ * limitations under the License. */ -#ifndef PHTREE_PH_COMMON_H -#define PHTREE_PH_COMMON_H +#ifndef PHTREE_COMMON_COMMON_H +#define PHTREE_COMMON_COMMON_H -#include "ph_base_types.h" -#include "ph_bits.h" -#include "ph_distance.h" -#include "ph_filter.h" -#include "ph_flat_array_map.h" -#include "ph_flat_sparse_map.h" -#include "ph_preprocessor.h" -#include "ph_tree_stats.h" +#include "base_types.h" +#include "bits.h" +#include "converter.h" +#include "distance.h" +#include "filter.h" +#include "flat_array_map.h" +#include "flat_sparse_map.h" +#include "tree_stats.h" #include #include #include @@ -50,8 +50,8 @@ namespace improbable::phtree { * @returns Encoded HC position, which is the index in the array if the entries would be stored in * an array. */ -template -static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t postfix_len) { +template +static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t postfix_len) { // n=DIM, i={0..n-1} // i = 0 : |0|1|0|1|0|1|0|1| // i = 1 : | 0 | 1 | 0 | 1 | @@ -59,9 +59,9 @@ static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t postfix_l // len = 2^n // Following formula was for inverse ordering of current ordering... // pos = sum (i=1..n, len/2^i) = sum (..., 2^(n-i)) - bit_mask_t valMask = bit_mask_t(1) << postfix_len; + bit_mask_t valMask = bit_mask_t(1) << postfix_len; hc_pos_t pos = 0; - for (dimension_t i = 0; i < DIM; i++) { + for (dimension_t i = 0; i < DIM; ++i) { pos <<= 1; // set pos-bit if bit is set in value pos |= (valMask & valSet[i]) >> postfix_len; @@ -69,11 +69,13 @@ static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t postfix_l return pos; } -template +template static bool IsInRange( - const PhPoint& candidate, const PhPoint& range_min, const PhPoint& range_max) { - for (dimension_t i = 0; i < DIM; i++) { - scalar_t k = candidate[i]; + const PhPoint& candidate, + const PhPoint& range_min, + const PhPoint& range_max) { + for (dimension_t i = 0; i < DIM; ++i) { + auto k = candidate[i]; if (k < range_min[i] || k > range_max[i]) { return false; } @@ -90,19 +92,22 @@ static bool IsInRange( * the two keys. In case of key1==key2 we return 0. In other words, for 64 bit keys, we return 64 * minus the number of leading bits that are common in both keys across all dimensions. */ -template -static bit_width_t NumberOfDivergingBits(const PhPoint& v1, const PhPoint& v2) { +template +static bit_width_t NumberOfDivergingBits( + const PhPoint& v1, const PhPoint& v2) { // write all differences to diff, we just check diff afterwards - bit_mask_t diff = 0; - for (dimension_t i = 0; i < DIM; i++) { + bit_mask_t diff = 0; + for (dimension_t i = 0; i < DIM; ++i) { diff |= (v1[i] ^ v2[i]); } - return MAX_BIT_WIDTH - CountLeadingZeros(diff); + assert(CountLeadingZeros(diff) <= MAX_BIT_WIDTH); + return MAX_BIT_WIDTH - CountLeadingZeros(diff); } -template -static bool KeyEquals(const PhPoint& key_a, const PhPoint& key_b, bit_mask_t mask) { - for (dimension_t i = 0; i < DIM; i++) { +template +static bool KeyEquals( + const PhPoint& key_a, const PhPoint& key_b, bit_mask_t mask) { + for (dimension_t i = 0; i < DIM; ++i) { if (((key_a[i] ^ key_b[i]) & mask) != 0) { return false; } @@ -114,11 +119,12 @@ static bool KeyEquals(const PhPoint& key_a, const PhPoint& key_b, bit_ // String helpers // ************************************************************************ -static inline std::string ToBinary(scalar_t l, bit_width_t width = MAX_BIT_WIDTH) { +template +static inline std::string ToBinary(SCALAR l, bit_width_t width = MAX_BIT_WIDTH) { std::ostringstream sb; // long mask = DEPTH < 64 ? (1<<(DEPTH-1)) : 0x8000000000000000L; - for (bit_width_t i = 0; i < width; i++) { - bit_mask_t mask = (bit_mask_t(1) << (width - i - 1)); + for (bit_width_t i = 0; i < width; ++i) { + bit_mask_t mask = (bit_mask_t(1) << (width - i - 1)); sb << ((l & mask) != 0 ? "1" : "0"); if ((i + 1) % 8 == 0 && (i + 1) < width) { sb << '.'; @@ -127,8 +133,9 @@ static inline std::string ToBinary(scalar_t l, bit_width_t width = MAX_BIT_WIDTH return sb.str(); } -template -static inline std::string ToBinary(const PhPoint& la, bit_width_t width = MAX_BIT_WIDTH) { +template +static inline std::string ToBinary( + const PhPoint& la, bit_width_t width = MAX_BIT_WIDTH) { std::ostringstream sb; for (dimension_t i = 0; i < DIM; ++i) { sb << ToBinary(la[i], width) << ", "; @@ -138,4 +145,4 @@ static inline std::string ToBinary(const PhPoint& la, bit_width_t width = M } // namespace improbable::phtree -#endif // PHTREE_COMMON_H +#endif // PHTREE_COMMON_COMMON_H diff --git a/phtree/common/ph_common_test.cc b/phtree/common/common_test.cc similarity index 83% rename from phtree/common/ph_common_test.cc rename to phtree/common/common_test.cc index 81ffb00e..788c9fd2 100644 --- a/phtree/common/ph_common_test.cc +++ b/phtree/common/common_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ph_common.h" +#include "common.h" #include #include @@ -24,12 +24,12 @@ TEST(PhTreeCommonTest, NumberOfDivergingBits) { double d1 = -55; double d2 = 7; - scalar_t l1 = Preprocessors::ToSortableLong(d1); - scalar_t l2 = Preprocessors::ToSortableLong(d2); - scalar_t l_min = std::numeric_limits::lowest(); - scalar_t l_max = std::numeric_limits::max(); + auto l1 = ScalarConverterIEEE::pre(d1); + auto l2 = ScalarConverterIEEE::pre(d2); + scalar_64_t l_min = std::numeric_limits::lowest(); + scalar_64_t l_max = std::numeric_limits::max(); - bit_mask_t x = NumberOfDivergingBits(PhPoint<2>({l1, l1}), PhPoint<2>({l2, l2})); + bit_width_t x = NumberOfDivergingBits(PhPoint<2>({l1, l1}), PhPoint<2>({l2, l2})); ASSERT_EQ(64, x); x = NumberOfDivergingBits(PhPoint<2>({-1, -1}), PhPoint<2>({l_min, l_min})); ASSERT_EQ(63, x); diff --git a/phtree/common/converter.h b/phtree/common/converter.h new file mode 100644 index 00000000..22b80601 --- /dev/null +++ b/phtree/common/converter.h @@ -0,0 +1,363 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_CONVERTER_H +#define PHTREE_COMMON_CONVERTER_H + +#include "base_types.h" + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains conversion/transformation functions for converting user coordinates and + * shapes, such as PhPointD and PhBoxD, into PH-Tree native coordinates (PhPoint). + */ +namespace improbable::phtree { + +class ScalarConverterIEEE { + static_assert(std::is_same()); + + public: + static scalar_64_t pre(double value) { + // To create a sortable long, we convert the double to a long using the IEEE-754 standard, + // which stores floats in the form . + // This result is properly ordered longs for all positive doubles. Negative values have + // inverse ordering. For negative doubles, we therefore simply invert them to make them + // sortable, however the sign must be inverted again to stay negative. + scalar_64_t r = reinterpret_cast(value); + return r >= 0 ? r : r ^ 0x7FFFFFFFFFFFFFFFL; + } + + static double post(scalar_64_t value) { + auto v = value >= 0 ? value : value ^ 0x7FFFFFFFFFFFFFFFL; + return reinterpret_cast(v); + } + + static scalar_32_t pre(float value) { + // To create a sortable long, we convert the double to a long using the IEEE-754 standard, + // which stores floats in the form . + // This result is properly ordered longs for all positive doubles. Negative values have + // inverse ordering. For negative doubles, we therefore simply invert them to make them + // sortable, however the sign must be inverted again to stay negative. + scalar_32_t r = reinterpret_cast(value); + return r >= 0 ? r : r ^ 0x7FFFFFFFL; + } + + static float post(scalar_32_t value) { + auto v = value >= 0 ? value : value ^ 0x7FFFFFFFL; + return reinterpret_cast(v); + } +}; + +/* + * The ScalarMultiplyConverter converts floating point scalars 'f' (double or float) to integral + * scalars 'i' and back. + * Unlike other scalar converters, the multiply-converter does this by multiplying the floating + * point scalar with a fraction such that "i = (cast to integral) f * NUMERATOR/DENOMINATOR". + * + * Warning: This conversion is inherently lossy due to the cast to an integral type. + * Converting a value f1 to i and then back to f2 will likely result in f1 != f2. + */ +template +class ScalarConverterMultiply { + static_assert(std::is_same()); + static_assert(NUMERATOR != 0); + static_assert(DENOMINATOR != 0); + static constexpr double MULTIPLY = NUMERATOR / (double)DENOMINATOR; + static constexpr double DIVIDE = DENOMINATOR / (double)NUMERATOR; + + public: + static scalar_64_t pre(double value) { + return value * MULTIPLY; + } + + static double post(scalar_64_t value) { + return value * DIVIDE; + } + + static scalar_32_t pre(float value) { + return value * MULTIPLY; + } + + static float post(scalar_32_t value) { + return value * DIVIDE; + } +}; + +/* + * Converters convert points and boxes using a ScalarConverter. + * The common base class provides type information for users of the converters. + */ +template < + dimension_t DIM_EXTERNAL, + dimension_t DIM_INTERNAL, + typename SCALAR_EXTERNAL, + typename SCALAR_INTERNAL, + typename KEY_EXTERNAL, + typename QUERY_POINT_EXTERNAL = PhBox> +class ConverterBase { + public: + static constexpr dimension_t DimExternal = DIM_EXTERNAL; + static constexpr dimension_t DimInternal = DIM_INTERNAL; + using ScalarExternal = SCALAR_EXTERNAL; + using ScalarInternal = SCALAR_INTERNAL; + using KeyExternal = KEY_EXTERNAL; + using KeyInternal = PhPoint; + using QueryBoxExternal = QUERY_POINT_EXTERNAL; + using QueryBoxInternal = PhBox; +}; + +/* + * Common base class for converters for point keys. + * This class exists only as convenience for developing custom converters. + */ +template +using ConverterPointBase = + ConverterBase>; + +/* + * Common base class for converters for box keys. + * This class exists only as convenience for developing custom converters. + */ +template +using ConverterBoxBase = + ConverterBase>; + +template +struct ConverterNoOp : public ConverterPointBase { + using BASE = ConverterPointBase; + using Point = typename BASE::KeyExternal; + using PointInternal = typename BASE::KeyInternal; + + constexpr const PointInternal& pre(const Point& point) const { + return point; + } + + constexpr const Point& post(const PointInternal& point) const { + return point; + } + + constexpr const PhBox& pre_query(const PhBox& box) const { + return box; + } +}; + +/* + * Simple point converter that treats all dimensions the same way. + */ +template < + dimension_t DIM, + typename SCALAR_EXTERNAL, + typename SCALAR_INTERNAL, + typename CONVERT = ScalarConverterIEEE> +class SimplePointConverter : public ConverterPointBase { + using BASE = ConverterPointBase; + using Point = typename BASE::KeyExternal; + using PointInternal = typename BASE::KeyInternal; + using QueryBox = typename BASE::QueryBoxExternal; + + static_assert(std::is_same>::value); + static_assert(std::is_same>::value); + + public: + explicit SimplePointConverter(const CONVERT converter = CONVERT()) : converter_{converter} {}; + + PointInternal pre(const Point& point) const { + PointInternal out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = converter_.pre(point[i]); + } + return out; + } + + Point post(const PointInternal& point) const { + Point out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = converter_.post(point[i]); + } + return out; + } + + PhBox pre_query(const QueryBox& query_box) const { + return {pre(query_box.min()), pre(query_box.max())}; + } + + private: + CONVERT converter_; +}; + +template < + dimension_t DIM, + typename SCALAR_EXTERNAL, + typename SCALAR_INTERNAL, + typename CONVERT = ScalarConverterIEEE> +class SimpleBoxConverter : public ConverterBoxBase { + using BASE = ConverterBoxBase; + using Box = typename BASE::KeyExternal; + using PointInternal = typename BASE::KeyInternal; + using QueryBox = typename BASE::QueryBoxExternal; + + static_assert(std::is_same>::value); + static_assert(std::is_same>::value); + + public: + explicit SimpleBoxConverter(const CONVERT converter = CONVERT()) : converter_{converter} {}; + PointInternal pre(const Box& box) const { + PointInternal out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = converter_.pre(box.min()[i]); + out[i + DIM] = converter_.pre(box.max()[i]); + } + return out; + } + + Box post(const PointInternal& point) const { + Box out; + for (dimension_t i = 0; i < DIM; ++i) { + out.min()[i] = converter_.post(point[i]); + out.max()[i] = converter_.post(point[i + DIM]); + } + return out; + } + + auto pre_query(const QueryBox& query_box) const { + PhBox out; + auto& min = out.min(); + auto& max = out.max(); + for (dimension_t i = 0; i < DIM; ++i) { + min[i] = converter_.pre(query_box.min()[i]); + max[i] = converter_.pre(query_box.max()[i]); + } + return out; + } + + private: + CONVERT converter_; +}; + +/* + * IEEE Converters convert float/double to integral types using some bit operations. + * The conversion maintain ordering and precision (i.e. it is a loss-less conversion). + * However, it results in a strongly non-metric distortion of the space, so distances between + * converted coordinates cannot easily be converted back to normal double/float metric space. + */ +template +using ConverterIEEE = SimplePointConverter; + +template +using ConverterFloatIEEE = SimplePointConverter; + +template +using ConverterBoxIEEE = SimpleBoxConverter; + +template +using ConverterBoxFloatIEEE = SimpleBoxConverter; + +/* + * Multiply Converter. + * The multiply converter converts float/double to integral type by multiplying them + * with NUMERATOR/DENOMINATOR and then casting them to the desired integral type. + * This conversion may lossy, i.e. values may lose precision when converted with this converter. + */ + +template +using ConverterMultiply = + SimplePointConverter>; + +template +using ConverterBoxMultiply = + SimpleBoxConverter>; + +/* + * The behaviour of window-queries on Ph-Trees with box keys can be configured with QueryTypes. + * For example, a window query of type QueryIntersect will return all keys that have at least + * some overlap with the query window. Queries of type QueryInclude will only return keys that + * completely overlap with (= are included in) the query window. + * + * The query types (QueryIntersect etc) take as argument a pair of points or a box that represent + * an axis aligned rectangular query window. The input point need to have been transformed already + * into the tree's internal coordinate system with the respective converter. + * + * The output of the query type functions is a pair of box keys. + * The meaning of these boxes is somewhat unintuitive, but they represent the min/max coordinates + * in the tree's internal representation where boxes are stored as points. + * + * Default implementations: + * - QueryPoint is a no-op implementation for point keys + * (no-op because intersect/include make no sense for points) + * - QueryIntersect returns all box keys that intersects with the query box + * - QueryInclude returns all box keys that lies completely inside the query box + */ + +/** + * No-op query-type for querying a PH-Tree that contains point keys. + */ +struct QueryPoint { + template + auto operator()(const PhBox& query_box) { + return query_box; + } +}; + +/* + * The function produces a pair of 2*DIM points that can be used to perform an 'intersect' + * type query on a PhTree that contains box data. I.e. the query will return any boxes that + * intersect with the original query window. + * Here 'intersect' includes any boxes with marginal overlap, i.e. any box that has at least + * one point in common with the query window. + * For example a box {{1,1},{3,3}} is considered to overlap with a box {{3,3},{5,5}}. + */ +struct QueryIntersect { + template + auto operator()(const PhBox& query_box) { + auto neg_inf = std::numeric_limits::min(); + auto pos_inf = std::numeric_limits::max(); + PhBox<2 * DIM, SCALAR_INTERNAL> min_max; + auto& min = min_max.min(); + auto& max = min_max.max(); + for (dimension_t i = 0; i < DIM; i++) { + min[i] = neg_inf; + min[i + DIM] = query_box.min()[i]; + max[i] = query_box.max()[i]; + max[i + DIM] = pos_inf; + } + return min_max; + } +}; + +/* + * QueryInclude set up a query that return only key that are completely included in a rectangular + * query window defined by min/max (inclusive). + * For example, a box {{1,1},{3,3}} is considered to be included in a box {{1,1},{5,5}}. + */ +struct QueryInclude { + template + auto operator()(const PhBox& query_box) { + PhBox<2 * DIM, SCALAR_INTERNAL> min_max; + auto& min = min_max.min(); + auto& max = min_max.max(); + for (dimension_t i = 0; i < DIM; i++) { + min[i] = query_box.min()[i]; + min[i + DIM] = query_box.min()[i]; + max[i] = query_box.max()[i]; + max[i + DIM] = query_box.max()[i]; + } + return min_max; + } +}; +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_CONVERTER_H diff --git a/phtree/common/converter_test.cc b/phtree/common/converter_test.cc new file mode 100644 index 00000000..c9ede115 --- /dev/null +++ b/phtree/common/converter_test.cc @@ -0,0 +1,49 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "converter.h" +#include "common.h" +#include + +using namespace improbable::phtree; + +template +void test_less_than(PRE pre, POST post, float d1, float d2) { + auto l1 = pre(d1); + auto l2 = pre(d2); + ASSERT_LT(l1, l2); + ASSERT_EQ(d1, post(l1)); + ASSERT_EQ(d2, post(l2)); +} + +template +void testAll(PRE pre, POST post) { + test_less_than(pre, post, 55.0f, 71.0f); + test_less_than(pre, post, -55.0f, 7.0f); + test_less_than(pre, post, -55.0f, -7.0f); +} + +TEST(PhTreePreprocessorTest, IEEE_Double_SmokeTest) { + auto pre = [](double d) { return ScalarConverterIEEE::pre(d); }; + auto post = [](scalar_64_t s) { return ScalarConverterIEEE::post(s); }; + testAll(pre, post); +} + +TEST(PhTreePreprocessorTest, IEEE_Float_SmokeTest) { + auto pre = [](float f) { return ScalarConverterIEEE::pre(f); }; + auto post = [](scalar_32_t s) { return ScalarConverterIEEE::post(s); }; + testAll(pre, post); +} diff --git a/phtree/common/ph_tree_debug_helper.h b/phtree/common/debug_helper.h similarity index 92% rename from phtree/common/ph_tree_debug_helper.h rename to phtree/common/debug_helper.h index 73cd6f6a..ede89586 100644 --- a/phtree/common/ph_tree_debug_helper.h +++ b/phtree/common/debug_helper.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#ifndef PHTREE_COMMON_PH_TREE_DEBUG_HELPER_H -#define PHTREE_COMMON_PH_TREE_DEBUG_HELPER_H +#ifndef PHTREE_COMMON_DEBUG_HELPER_H +#define PHTREE_COMMON_DEBUG_HELPER_H -#include "ph_tree_stats.h" +#include "tree_stats.h" namespace improbable::phtree { @@ -67,4 +67,4 @@ class PhTreeDebugHelper { }; } // namespace improbable::phtree -#endif // PHTREE_COMMON_PH_TREE_DEBUG_HELPER_H +#endif // PHTREE_COMMON_DEBUG_HELPER_H diff --git a/phtree/common/ph_distance.h b/phtree/common/distance.h similarity index 56% rename from phtree/common/ph_distance.h rename to phtree/common/distance.h index 367ac0ca..102e9287 100644 --- a/phtree/common/ph_distance.h +++ b/phtree/common/distance.h @@ -14,15 +14,15 @@ * limitations under the License. */ -#ifndef PHTREE_PH_COMMON_DISTANCES_H -#define PHTREE_PH_COMMON_DISTANCES_H +#ifndef PHTREE_COMMON_DISTANCES_H +#define PHTREE_COMMON_DISTANCES_H -#include "ph_base_types.h" -#include "ph_bits.h" -#include "ph_flat_array_map.h" -#include "ph_flat_sparse_map.h" -#include "ph_preprocessor.h" -#include "ph_tree_stats.h" +#include "base_types.h" +#include "bits.h" +#include "converter.h" +#include "flat_array_map.h" +#include "flat_sparse_map.h" +#include "tree_stats.h" #include #include #include @@ -36,16 +36,28 @@ namespace improbable::phtree { * by the kNN (k nearest neighbor) query facility. * * The implementations in this file are: - * - PhDistanceDoubleEuclidean: Euclidean distance for PhPointD - * - PhDistanceDoubleL1: L1 distance (manhattan distance / taxi distance) for PhPointD - * - PhDistanceLongEuclidean: Euclidean distance for PhPoint + * - DistanceEuclidean: Euclidean distance for PhPoint & PhPointD + * - DistanceL1: L1 distance (Manhattan distance / taxi distance) for PhPoint & PhPointD */ template -struct PhDistanceDoubleEuclidean { +struct DistanceEuclidean { + double operator()(const PhPoint& v1, const PhPoint& v2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + assert( + (v1[i] >= 0) != (v2[i] >= 0) || + double(v1[i]) - double(v2[i]) < + double(std::numeric_limits::max())); + double d2 = double(v1[i] - v2[i]); + sum2 += d2 * d2; + } + return sqrt(sum2); + }; + double operator()(const PhPointD& p1, const PhPointD& p2) const { double sum2 = 0; - for (dimension_t i = 0; i < DIM; i++) { + for (dimension_t i = 0; i < DIM; ++i) { double d2 = p1[i] - p2[i]; sum2 += d2 * d2; } @@ -54,35 +66,28 @@ struct PhDistanceDoubleEuclidean { }; template -struct PhDistanceDoubleL1 { - double operator()(const PhPointD& v1, const PhPointD& v2) const { +struct DistanceL1 { + double operator()(const PhPoint& v1, const PhPoint& v2) const { double sum = 0; - for (dimension_t i = 0; i < DIM; i++) { - sum += std::abs(v1[i] - v2[i]); + for (dimension_t i = 0; i < DIM; ++i) { + assert( + (v1[i] >= 0) != (v2[i] >= 0) || + double(v1[i]) - double(v2[i]) < + double(std::numeric_limits::max())); + sum += std::abs(double(v1[i] - v2[i])); } return sum; }; -}; -template -struct PhDistanceLongEuclidean { - double operator()(const PhPoint& v1, const PhPoint& v2) const { - // Substraction of large long integers can easily overflow because the distance can be - // larger than the value range. Such large values are common when using the IEEE - // double-to-long converter, however, if we use a converter we should use a distance - // function that processes converted values. - double sum2 = 0; - for (dimension_t i = 0; i < DIM; i++) { - assert( - (v1[i] >= 0) == (v2[i] >= 0) || - double(v1[i]) - double(v2[i]) < double(std::numeric_limits::max())); - double d2 = double(v1[i] - v2[i]); - sum2 += d2 * d2; + double operator()(const PhPointD& v1, const PhPointD& v2) const { + double sum = 0; + for (dimension_t i = 0; i < DIM; ++i) { + sum += std::abs(v1[i] - v2[i]); } - return sqrt(sum2); + return sum; }; }; } // namespace improbable::phtree -#endif // PHTREE_PH_COMMON_DISTANCES_H +#endif // PHTREE_COMMON_DISTANCES_H diff --git a/phtree/common/ph_distance_test.cc b/phtree/common/distance_test.cc similarity index 61% rename from phtree/common/ph_distance_test.cc rename to phtree/common/distance_test.cc index a0313495..0038285a 100644 --- a/phtree/common/ph_distance_test.cc +++ b/phtree/common/distance_test.cc @@ -14,23 +14,28 @@ * limitations under the License. */ -#include "ph_common.h" +#include "common.h" #include #include using namespace improbable::phtree; TEST(PhTreeDistanceTest, DoubleEuclidean) { - auto distance = PhDistanceDoubleEuclidean<2>(); - ASSERT_DOUBLE_EQ(5, distance({-1, -1}, {2, 3})); + auto distance = DistanceEuclidean<2>(); + ASSERT_DOUBLE_EQ(5, distance(PhPointD<2>{-1, -1}, PhPointD<2>{2, 3})); } TEST(PhTreeDistanceTest, DoubleL1) { - auto distance = PhDistanceDoubleL1<2>(); - ASSERT_DOUBLE_EQ(7, distance({-1, -1}, {2, 3})); + auto distance = DistanceL1<2>(); + ASSERT_DOUBLE_EQ(7, distance(PhPointD<2>{-1, -1}, PhPointD<2>{2, 3})); } TEST(PhTreeDistanceTest, LongEuclidean) { - auto distance = PhDistanceLongEuclidean<2>(); - ASSERT_DOUBLE_EQ(5, distance({-1, -1}, {2, 3})); + auto distance = DistanceEuclidean<2>(); + ASSERT_DOUBLE_EQ(5, distance(PhPoint<2>{-1, -1}, PhPoint<2>{2, 3})); +} + +TEST(PhTreeDistanceTest, LongL1) { + auto distance = DistanceL1<2>(); + ASSERT_DOUBLE_EQ(7, distance(PhPoint<2>{-1, -1}, PhPoint<2>{2, 3})); } diff --git a/phtree/common/ph_filter.h b/phtree/common/filter.h similarity index 61% rename from phtree/common/ph_filter.h rename to phtree/common/filter.h index b74805b2..20c58c6a 100644 --- a/phtree/common/ph_filter.h +++ b/phtree/common/filter.h @@ -14,15 +14,15 @@ * limitations under the License. */ -#ifndef PHTREE_PH_COMMON_FILTERS_H -#define PHTREE_PH_COMMON_FILTERS_H - -#include "ph_base_types.h" -#include "ph_bits.h" -#include "ph_flat_array_map.h" -#include "ph_flat_sparse_map.h" -#include "ph_preprocessor.h" -#include "ph_tree_stats.h" +#ifndef PHTREE_COMMON_FILTERS_H +#define PHTREE_COMMON_FILTERS_H + +#include "base_types.h" +#include "bits.h" +#include "converter.h" +#include "flat_array_map.h" +#include "flat_sparse_map.h" +#include "tree_stats.h" #include #include #include @@ -56,14 +56,14 @@ namespace improbable::phtree { /* * The no-op filter is the default filter for the PH-Tree. It always returns 'true'. */ -template -struct PhFilterNoOp { +struct FilterNoOp { /* * @param key The key/coordinate of the entry. * @param value The value of the entry. * @returns This default implementation always returns `true`. */ - constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + template + constexpr bool IsEntryValid(const KEY& key, const T& value) const { return true; } @@ -75,7 +75,8 @@ struct PhFilterNoOp { * bits_to_ignore is 64-10=54. * @returns This default implementation always returns `true`. */ - constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + template + constexpr bool IsNodeValid(const KEY& prefix, int bits_to_ignore) const { return true; } }; @@ -84,41 +85,57 @@ struct PhFilterNoOp { * The AABB filter can be used to query a point tree for an axis aligned bounding box (AABB). * The result is equivalent to that of the 'begin_query(...)' function. */ -template -class PhFilterAABB { +template > +class FilterAABB { + using KeyExternal = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + + static constexpr auto DIM = CONVERTER::DimInternal; + public: - PhFilterAABB(const PhPoint& minInclude, const PhPoint& maxInclude) - : minIncludeBits{minInclude}, maxIncludeBits{maxInclude} {}; + FilterAABB( + const KeyExternal& min_include, + const KeyExternal& max_include, + CONVERTER converter = CONVERTER()) + : min_external_{min_include} + , max_external_{max_include} + , min_internal_{converter.pre(min_include)} + , max_internal_{converter.pre(max_include)} + , converter_{converter} {}; /* * This function allows resizing/shifting the AABB while iterating over the tree. */ - void set(const PhPoint& minExclude, const PhPoint& maxExclude) { - minIncludeBits = minExclude; - maxIncludeBits = maxExclude; + void set(const KeyExternal& min_include, const KeyExternal& max_include) { + min_external_ = min_include; + max_external_ = max_include; + min_internal_ = converter_.pre(min_include); + max_internal_ = converter_.pre(max_include); } - [[nodiscard]] bool IsEntryValid(const PhPoint& key, const T& value) const { - for (int i = 0; i < DIM; ++i) { - if (key[i] < minIncludeBits[i] || key[i] > maxIncludeBits[i]) { + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& value) const { + auto point = converter_.post(key); + for (dimension_t i = 0; i < DIM; ++i) { + if (point[i] < min_external_[i] || point[i] > max_external_[i]) { return false; } } return true; } - [[nodiscard]] bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) - if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { return true; } - bit_mask_t maskMin = MAX_MASK << bits_to_ignore; - bit_mask_t maskMax = ~maskMin; + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; for (size_t i = 0; i < prefix.size(); ++i) { - scalar_t minBits = prefix[i] & maskMin; - scalar_t maxBits = prefix[i] | maskMax; - if (maxBits < minIncludeBits[i] || minBits > maxIncludeBits[i]) { + if ((prefix[i] | node_max_bits) < min_internal_[i] || + (prefix[i] & node_min_bits) > max_internal_[i]) { return false; } } @@ -126,10 +143,13 @@ class PhFilterAABB { } private: - const PhPoint minIncludeBits; - const PhPoint maxIncludeBits; + const KeyExternal min_external_; + const KeyExternal max_external_; + const KeyInternal min_internal_; + const KeyInternal max_internal_; + const CONVERTER converter_; }; } // namespace improbable::phtree -#endif // PHTREE_PH_COMMON_FILTERS_H +#endif // PHTREE_COMMON_FILTERS_H diff --git a/phtree/common/ph_filter_test.cc b/phtree/common/filter_test.cc similarity index 82% rename from phtree/common/ph_filter_test.cc rename to phtree/common/filter_test.cc index c77a4c62..d3a9226a 100644 --- a/phtree/common/ph_filter_test.cc +++ b/phtree/common/filter_test.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include "ph_common.h" +#include "common.h" #include #include using namespace improbable::phtree; TEST(PhTreeFilterTest, BoxFilterTest) { - auto filter = PhFilterAABB<2, int*>({3, 3}, {7, 7}); + FilterAABB> filter{{3, 3}, {7, 7}}; // root is always valid ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); // valid because node encompasses the AABB @@ -36,7 +36,7 @@ TEST(PhTreeFilterTest, BoxFilterTest) { } TEST(PhTreeFilterTest, FilterNoOpSmokeTest) { - auto filter = PhFilterNoOp<3, int>(); - ASSERT_TRUE(filter.IsNodeValid({3, 7, 2}, 10)); - ASSERT_TRUE(filter.IsEntryValid({3, 7, 2}, 10)); + auto filter = FilterNoOp(); + ASSERT_TRUE(filter.IsNodeValid>({3, 7, 2}, 10)); + ASSERT_TRUE(filter.IsEntryValid>({3, 7, 2}, 10)); } \ No newline at end of file diff --git a/phtree/common/ph_flat_array_map.h b/phtree/common/flat_array_map.h similarity index 70% rename from phtree/common/ph_flat_array_map.h rename to phtree/common/flat_array_map.h index 27633b1f..3529f2f7 100644 --- a/phtree/common/ph_flat_array_map.h +++ b/phtree/common/flat_array_map.h @@ -17,12 +17,13 @@ #ifndef PHTREE_COMMON_FLAT_ARRAY_MAP_H #define PHTREE_COMMON_FLAT_ARRAY_MAP_H -#include "ph_bits.h" +#include "bits.h" +#include #include #include /* - * PLEASE do not include this file directly, it is included via ph_common.h. + * PLEASE do not include this file directly, it is included via common.h. * * This file contains the array_map implementation, which is used in low-dimensional nodes in the * PH-Tree. @@ -54,7 +55,15 @@ class array_map { static_assert(SIZE > 0); public: - explicit array_map() : occupancy{0}, size_{0} {}; + ~array_map() { + if (occupancy != 0) { + for (size_t i = 0; i < SIZE; ++i) { + if (occupied(i)) { + data(i).~pair(); + } + } + } + } [[nodiscard]] auto find(size_t index) const { return occupied(index) ? PhFlatMapIterator{index, *this} : end(); @@ -71,14 +80,14 @@ class array_map { [[nodiscard]] auto begin() const { size_t index = CountTrailingZeros(occupancy); // Assert index points to a valid position or outside the map if the map is empty - assert((size_ == 0 && index >= SIZE) || occupied(index)); + assert((size() == 0 && index >= SIZE) || occupied(index)); return PhFlatMapIterator{index < SIZE ? index : SIZE, *this}; } [[nodiscard]] auto cbegin() const { size_t index = CountTrailingZeros(occupancy); // Assert index points to a valid position or outside the map if the map is empty - assert((size_ == 0 && index >= SIZE) || occupied(index)); + assert((size() == 0 && index >= SIZE) || occupied(index)); return PhFlatMapIterator{index < SIZE ? index : SIZE, *this}; } @@ -86,50 +95,58 @@ class array_map { return PhFlatMapIterator{SIZE, *this}; } - auto emplace(size_t index, T&& value) { - return emplace_base(index, std::forward(value)); + template + auto emplace(_Args&&... __args) { + return try_emplace_base(std::forward<_Args>(__args)...); } template - auto emplace(_Args&&... __args) { - return emplace_base(std::forward<_Args>(__args)...); + auto try_emplace(size_t index, _Args&&... __args) { + return try_emplace_base(index, std::forward<_Args>(__args)...); } bool erase(size_t index) { if (occupied(index)) { + data(index).~pair(); occupied(index, false); - --size_; - data_[index].second.~T(); return true; } return false; } bool erase(PhFlatMapIterator& iterator) { - size_t index = iterator.first; - if (occupied(index)) { - occupied(index, false); - --size_; - data_[index].second.~T(); - return true; - } - return false; + return erase(iterator.first); } [[nodiscard]] size_t size() const { - return size_; + return std::bitset<64>(occupancy).count(); } private: - std::pair*, bool> emplace_base(size_t index, T&& value) { + template + std::pair*, bool> try_emplace_base(size_t index, _Args&&... __args) { if (!occupied(index)) { - data_[index].first = index; - data_[index].second = std::forward(value); - ++size_; + new (reinterpret_cast(&data_[index])) PhFlatMapPair( + std::piecewise_construct, + std::forward_as_tuple(index), + std::forward_as_tuple(std::forward<_Args>(__args)...)); occupied(index, true); - return {&data_[index], true}; + return {&data(index), true}; } - return {&data_[index], false}; + return {&data(index), false}; + } + + /* + * This returns the element at the given index, which is _not_ the n'th element (for n = index). + */ + PhFlatMapPair& data(size_t index) { + assert(occupied(index)); + return *std::launder(reinterpret_cast*>(&data_[index])); + } + + const PhFlatMapPair& data(size_t index) const { + assert(occupied(index)); + return *std::launder(reinterpret_cast*>(&data_[index])); } [[nodiscard]] size_t lower_bound_index(size_t index) const { @@ -151,9 +168,9 @@ class array_map { return (occupancy >> index) & U64_ONE; } - bit_string_t occupancy; - std::uint32_t size_; - PhFlatMapPair data_[SIZE]; + bit_string_t occupancy = 0; + // We use an untyped array to avoid implicit calls to constructors and destructors of entries. + std::aligned_storage_t), alignof(PhFlatMapPair)> data_[SIZE]; }; namespace { @@ -172,16 +189,16 @@ class PhFlatMapIterator { auto& operator*() const { assert(first < SIZE && map_->occupied(first)); - return const_cast&>(map_->data_[first]); + return const_cast&>(map_->data(first)); } auto* operator-> () const { assert(first < SIZE && map_->occupied(first)); - return const_cast*>(&map_->data_[first]); + return const_cast*>(&map_->data(first)); } auto& operator++() { - first = (first + 1) >= SIZE ? SIZE : first = map_->lower_bound_index(first + 1); + first = (first + 1) >= SIZE ? SIZE : map_->lower_bound_index(first + 1); return *this; } diff --git a/phtree/common/ph_flat_array_map_test.cc b/phtree/common/flat_array_map_test.cc similarity index 67% rename from phtree/common/ph_flat_array_map_test.cc rename to phtree/common/flat_array_map_test.cc index 3b230fac..e0250820 100644 --- a/phtree/common/ph_flat_array_map_test.cc +++ b/phtree/common/flat_array_map_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ph_flat_array_map.h" +#include "flat_array_map.h" #include #include @@ -36,8 +36,41 @@ TEST(PhTreeFlatArrayMapTest, SmokeTest) { ASSERT_EQ(hasVal, hasValRef); if (!hasVal) { reference_map.emplace(val, val); - // TODO avoid move() - test_map.emplace(val, std::move(val)); + test_map.emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeFlatArrayMapTest, SmokeTestWithTryEmplace) { + const int max_size = 8; + + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + array_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); } ASSERT_EQ(test_map.size(), reference_map.size()); for (auto it : reference_map) { @@ -63,8 +96,7 @@ TEST(PhTreeFlatArrayMapTest, IteratorPostIncrementTest) { size_t val = j * 2; bool hasVal = test_map.find(val) != test_map.end(); if (!hasVal) { - // TODO avoid move() - test_map.emplace(val, std::move(val)); + test_map.try_emplace(val, val); } } diff --git a/phtree/common/ph_flat_sparse_map.h b/phtree/common/flat_sparse_map.h similarity index 78% rename from phtree/common/ph_flat_sparse_map.h rename to phtree/common/flat_sparse_map.h index 25a90484..51c4f321 100644 --- a/phtree/common/ph_flat_sparse_map.h +++ b/phtree/common/flat_sparse_map.h @@ -17,13 +17,13 @@ #ifndef PHTREE_COMMON_FLAT_SPARSE_MAP_H #define PHTREE_COMMON_FLAT_SPARSE_MAP_H -#include "ph_bits.h" +#include "bits.h" #include #include #include /* - * PLEASE do not include this file directly, it is included via ph_common.h. + * PLEASE do not include this file directly, it is included via common.h. * * This file contains the sparse_map implementation, which is used in medium-dimensional nodes in * the PH-Tree. @@ -98,13 +98,14 @@ class sparse_map { return data_.end(); } - auto emplace(size_t index, T&& value) { - return emplace_base(index, std::forward(value)); + template + auto emplace(_Args&&... __args) { + return try_emplace_base(std::forward<_Args>(__args)...); } template - auto emplace(_Args&&... __args) { - return emplace_base(std::forward<_Args>(__args)...); + auto try_emplace(size_t key, _Args&&... __args) { + return try_emplace_base(key, std::forward<_Args>(__args)...); } void erase(size_t key) { @@ -123,12 +124,28 @@ class sparse_map { } private: - auto emplace_base(size_t key, T&& value) { + template + auto emplace_base(size_t key, _Args&&... __args) { + auto it = lower_bound(key); + if (it != end() && it->first == key) { + return std::make_pair(it, false); + } else { + return std::make_pair(data_.emplace(it, key, std::forward(__args)...), true); + } + } + + template + auto try_emplace_base(size_t key, _Args&&... __args) { auto it = lower_bound(key); if (it != end() && it->first == key) { return std::make_pair(it, false); } else { - return std::make_pair(data_.insert(it, {key, std::forward(value)}), true); + auto x = data_.emplace( + it, + std::piecewise_construct, + std::forward_as_tuple(key), + std::forward_as_tuple(std::forward<_Args>(__args)...)); + return std::make_pair(x, true); } } diff --git a/phtree/common/ph_flat_sparse_map_test.cc b/phtree/common/flat_sparse_map_test.cc similarity index 58% rename from phtree/common/ph_flat_sparse_map_test.cc rename to phtree/common/flat_sparse_map_test.cc index 58c48b49..dcb72bba 100644 --- a/phtree/common/ph_flat_sparse_map_test.cc +++ b/phtree/common/flat_sparse_map_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ph_flat_sparse_map.h" +#include "flat_sparse_map.h" #include #include @@ -36,8 +36,41 @@ TEST(PhTreeFlatSparseMapTest, SmokeTest) { ASSERT_EQ(hasVal, hasValRef); if (!hasVal) { reference_map.emplace(val, val); - // TODO avoid move() - test_map.emplace(val, std::move(val)); + test_map.emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeFlatSparseMapTest, SmokeTestWithTryEmplace) { + const int max_size = 8; + + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + sparse_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); } ASSERT_EQ(test_map.size(), reference_map.size()); for (auto it : reference_map) { diff --git a/phtree/common/ph_base_types.h b/phtree/common/ph_base_types.h deleted file mode 100644 index 40a4b218..00000000 --- a/phtree/common/ph_base_types.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_COMMON_BASE_TYPES_H -#define PHTREE_COMMON_BASE_TYPES_H - -#include -#include -#include -#include -#include -#include - -/* - * PLEASE do not include this file directly, it is included via ph_common.h. - * - * This file contains specifications for various types used in the PH-Tree, including - * PhPoint, PhPointD and PhPointBox. - */ -namespace improbable::phtree { - -// ************************************************************************ -// Constants and base types -// ************************************************************************ - -using scalar_t = int64_t; -// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices) -using bit_width_t = uint16_t; -// Number of bit for 'scalar_t'. Note that 'digits' does _not_ include sign bit, so e.g. int64_t has -// 63 `digits`, however we need all bits, i.e. 64. -static constexpr bit_width_t MAX_BIT_WIDTH = - std::numeric_limits::digits + std::numeric_limits::is_signed; -using node_size_t = int32_t; // Node sizes -using bit_mask_t = uint64_t; // Bit mask -static constexpr bit_mask_t MAX_MASK = std::numeric_limits::max(); -using dimension_t = size_t; // Number of dimensions -using hc_pos_t = uint64_t; - -template -static constexpr hc_pos_t END_POS = (hc_pos_t(1) << DIM); // Max hypercube address + 1 - -// double -using scalar_d_t = double; -static constexpr scalar_d_t D_INFINITY = std::numeric_limits::infinity(); -static constexpr scalar_d_t D_NEG_INFINITY = -std::numeric_limits::infinity(); - -// ************************************************************************ -// Basic structs and classes -// ************************************************************************ - -template -class PhBoxD; - -struct HashPhBoxD; - -template -using PhPoint = std::array; - -template -using PhPointD = std::array; - -template -class PhBoxD { - friend HashPhBoxD; - - public: - explicit PhBoxD() = default; - - PhBoxD(const PhBoxD& orig) = default; - - PhBoxD(const std::array& min, const std::array& max) - : min_{min}, max_{max} {} - - [[nodiscard]] PhPointD min() const { - return min_; - } - - [[nodiscard]] PhPointD max() const { - return max_; - } - - [[nodiscard]] PhPointD& min() { - return min_; - } - - [[nodiscard]] PhPointD& max() { - return max_; - } - - void min(const std::array& new_min) { - min_ = new_min; - } - - void max(const std::array& new_max) { - max_ = new_max; - } - - auto operator==(const PhBoxD& other) const -> bool { - return min_ == other.min_ && max_ == other.max_; - } - - private: - PhPointD min_; - PhPointD max_; -}; - -struct HashPhBoxD { - template - std::size_t operator()(const PhBoxD& x) const { - std::size_t hash_val = 0; - for (dimension_t i = 0; i < DIM; i++) { - hash_val = std::hash{}(x.min_[i]) ^ (hash_val * 31); - hash_val = std::hash{}(x.max_[i]) ^ (hash_val * 31); - } - return hash_val; - } -}; - -template -std::ostream& operator<<(std::ostream& os, const PhPoint& data) { - assert(DIM >= 1); - os << "["; - for (dimension_t i = 0; i < DIM - 1; i++) { - os << data[i] << ","; - } - os << data[DIM - 1] << "]"; - return os; -} - -template -std::ostream& operator<<(std::ostream& os, const PhPointD& data) { - assert(DIM >= 1); - os << "["; - for (dimension_t i = 0; i < DIM - 1; i++) { - os << data[i] << ","; - } - os << data[DIM - 1] << "]"; - return os; -} - -template -std::ostream& operator<<(std::ostream& os, const PhBoxD& data) { - os << data.min() << ":" << data.max(); - return os; -} - -} // namespace improbable::phtree - -#endif // PHTREE_COMMON_BASE_TYPES_H diff --git a/phtree/common/ph_base_types_test.cc b/phtree/common/ph_base_types_test.cc deleted file mode 100644 index 048ceccb..00000000 --- a/phtree/common/ph_base_types_test.cc +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ph_base_types.h" -#include -#include - -using namespace improbable::phtree; - -TEST(PhTreeBaseTypesTest, PhBoxD) { - PhBoxD<3> box({1, 2, 3}, {4, 5, 6}); - - for (int i = 0; i < 3; i++) { - ASSERT_EQ(box.min()[i], i + 1); - ASSERT_EQ(box.max()[i], i + 4); - } - - // try assigning coordinates - box.min() = {7, 8, 9}; - box.max() = {10, 11, 12}; - for (int i = 0; i < 3; i++) { - ASSERT_EQ(box.min()[i], i + 7); - ASSERT_EQ(box.max()[i], i + 10); - } -} diff --git a/phtree/common/ph_preprocessor.h b/phtree/common/ph_preprocessor.h deleted file mode 100644 index 3c558a55..00000000 --- a/phtree/common/ph_preprocessor.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_COMMON_PH_PREPROCESSOR_H -#define PHTREE_COMMON_PH_PREPROCESSOR_H - -#include "ph_base_types.h" -#include -#include - -/* - * PLEASE do not include this file directly, it is included via ph_common.h. - * - * This file contains conversion/tranmsformation functions for converting user coordinates and - * shapes, such as PhPointD and PhBoxD, into PH-Tree native coordinates (PhPoint). - */ -namespace improbable::phtree { - -template -using PhPreprocessor = PhPoint (*)(const KEY& point); - -template -using PhPostprocessor = KEY (*)(const PhPoint& point); - -template -using PhPreprocessorD = PhPreprocessor>; - -template -using PhPostprocessorD = PhPostprocessor>; - -template -using PhPreprocessorBoxD = PhPoint<2 * DIM> (*)(const PhBoxD& point); - -template -using PhPostprocessorBoxD = PhBoxD (*)(const PhPoint<2 * DIM>& point); - -class Preprocessors { - public: - static std::int64_t ToSortableLong(double value) { - // To create a sortable long, we convert the double to a long using the IEEE-754 standard, - // which stores floats in the form . - // This result is properly ordered longs for all positive doubles. Negative values have - // inverse ordering. For negative doubles, we therefore simply invert them to make them - // sortable, however the sign must be inverted again to stay negative. - std::int64_t r = reinterpret_cast(value); - return r >= 0 ? r : r ^ 0x7FFFFFFFFFFFFFFFL; - } - - static double ToDouble(scalar_t value) { - auto v = value >= 0.0 ? value : value ^ 0x7FFFFFFFFFFFFFFFL; - return reinterpret_cast(v); - } -}; - -// These are the IEEE and no-op conversion functions for KEY/PRE/POST - -template -PhPoint PrePostNoOp(const PhPoint& in) { - return in; -} - -template -PhPoint PreprocessIEEE(const PhPointD& point) { - PhPoint out; - for (dimension_t i = 0; i < DIM; ++i) { - out[i] = Preprocessors::ToSortableLong(point[i]); - } - return out; -} - -template -PhPointD PostprocessIEEE(const PhPoint& in) { - PhPointD out; - for (dimension_t i = 0; i < DIM; ++i) { - out[i] = Preprocessors::ToDouble(in[i]); - } - return out; -} - -template -PhPoint<2 * DIM> PreprocessBoxIEEE(const PhBoxD& box) { - PhPoint<2 * DIM> out; - for (dimension_t i = 0; i < DIM; ++i) { - out[i] = Preprocessors::ToSortableLong(box.min()[i]); - out[i + DIM] = Preprocessors::ToSortableLong(box.max()[i]); - } - return out; -} - -template -PhBoxD PostprocessBoxIEEE(const PhPoint<2 * DIM>& in) { - PhBoxD out; - for (dimension_t i = 0; i < DIM; ++i) { - out.min()[i] = Preprocessors::ToDouble(in[i]); - out.max()[i] = Preprocessors::ToDouble(in[i + DIM]); - } - return out; -} - -} // namespace improbable::phtree - -#endif // PHTREE_COMMON_PH_PREPROCESSOR_H diff --git a/phtree/common/ph_preprocessor_test.cc b/phtree/common/ph_preprocessor_test.cc deleted file mode 100644 index 937e5fcf..00000000 --- a/phtree/common/ph_preprocessor_test.cc +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ph_preprocessor.h" -#include -#include - -using namespace improbable::phtree; - -TEST(PhTreePreprocessorTest, IEEE_SmokeTest) { - double d1 = -55; - double d2 = 7; - - scalar_t l1 = Preprocessors::ToSortableLong(d1); - scalar_t l2 = Preprocessors::ToSortableLong(d2); - - ASSERT_GT(l2, l1); - - ASSERT_EQ(d1, Preprocessors::ToDouble(l1)); - ASSERT_EQ(d2, Preprocessors::ToDouble(l2)); -} diff --git a/phtree/common/ph_tree_stats.h b/phtree/common/tree_stats.h similarity index 80% rename from phtree/common/ph_tree_stats.h rename to phtree/common/tree_stats.h index 2319aae0..923aafbc 100644 --- a/phtree/common/ph_tree_stats.h +++ b/phtree/common/tree_stats.h @@ -14,15 +14,15 @@ * limitations under the License. */ -#ifndef PHTREE_COMMON_PH_TREE_STATS_H -#define PHTREE_COMMON_PH_TREE_STATS_H +#ifndef PHTREE_COMMON_TREE_STATS_H +#define PHTREE_COMMON_TREE_STATS_H -#include "ph_base_types.h" +#include "base_types.h" #include #include /* - * PLEASE do not include this file directly, it is included via ph_common.h. + * PLEASE do not include this file directly, it is included via common.h. * * This file defines the type returned by the getStats() method of the PH-Tree. * They provide various statistics on the PH-Tree instance that returns them. @@ -30,6 +30,8 @@ namespace improbable::phtree { class PhTreeStats { + using SCALAR = scalar_64_t; + public: std::string ToString() { std::ostringstream s; @@ -37,7 +39,7 @@ class PhTreeStats { s << " avgNodeDepth = " << ((double)q_total_depth_ / (double)n_nodes_) << std::endl; s << " AHC=" << n_AHC_ << " NI=" << n_nt_ << " nNtNodes_=" << n_nt_nodes_ << std::endl; double apl = GetAvgPostlen(); - s << " avgPostLen = " << apl << " (" << (MAX_BIT_WIDTH - apl) << ")" << std::endl; + s << " avgPostLen = " << apl << " (" << (MAX_BIT_WIDTH - apl) << ")" << std::endl; return s.str(); } @@ -55,15 +57,13 @@ class PhTreeStats { } /* - * - * @param r String builder * @return average postfix_len, including the HC/LHC bit. */ double GetAvgPostlen() { size_t total = 0; size_t num_entry = 0; - for (bit_width_t i = 0; i < MAX_BIT_WIDTH; i++) { - total += (MAX_BIT_WIDTH - i) * q_n_post_fix_n_[i]; + for (bit_width_t i = 0; i < MAX_BIT_WIDTH; ++i) { + total += (MAX_BIT_WIDTH - i) * q_n_post_fix_n_[i]; num_entry += q_n_post_fix_n_[i]; } return (double)total / (double)num_entry; @@ -96,11 +96,12 @@ class PhTreeStats { size_t size_ = 0; // calculated size in bytes size_t q_total_depth_ = 0; std::vector q_n_post_fix_n_ = - std::vector(MAX_BIT_WIDTH, (size_t)0); // filled with x[current_depth] = nPost; - std::vector infix_hist_ = std::vector(MAX_BIT_WIDTH, (size_t)0); // prefix len - std::vector node_depth_hist_ = std::vector(MAX_BIT_WIDTH, (size_t)0); // prefix len + std::vector(MAX_BIT_WIDTH, (size_t)0); // filled with x[current_depth] = nPost; + std::vector infix_hist_ = std::vector(MAX_BIT_WIDTH, (size_t)0); // prefix len + std::vector node_depth_hist_ = + std::vector(MAX_BIT_WIDTH, (size_t)0); // prefix len std::vector node_size_log_hist_ = std::vector(32, (size_t)0); // log (num_entries) }; } // namespace improbable::phtree -#endif // PHTREE_COMMON_PH_TREE_STATS_H +#endif // PHTREE_COMMON_TREE_STATS_H diff --git a/phtree/phtree.h b/phtree/phtree.h index 8fb08475..f4a86250 100644 --- a/phtree/phtree.h +++ b/phtree/phtree.h @@ -17,7 +17,7 @@ #ifndef PHTREE_PHTREE_H #define PHTREE_PHTREE_H -#include "common/ph_common.h" +#include "common/common.h" #include "v16/phtree_v16.h" namespace improbable::phtree { @@ -29,15 +29,21 @@ namespace improbable::phtree { * * For more information please refer to the README of this project. */ -template < - dimension_t DIM, - typename T, - typename KEY = PhPoint, - PhPostprocessor POST = PrePostNoOp> +template > class PhTree { friend PhTreeDebugHelper; + using KeyInternal = typename CONVERTER::KeyInternal; + using QueryBox = typename CONVERTER::QueryBoxExternal; + using Key = typename CONVERTER::KeyExternal; + static constexpr dimension_t DimInternal = CONVERTER::DimInternal; + + // DimInternal==DIM indicates point keys. Box keys have DimInternal==2*DIM. + using DEFAULT_QUERY_TYPE = + typename std::conditional<(DIM == DimInternal), QueryPoint, QueryIntersect>::type; public: + explicit PhTree(CONVERTER converter = CONVERTER()) : tree_{converter}, converter_{converter} {} + /* * Attempts to build and insert a key and a value into the tree. * @@ -53,8 +59,28 @@ class PhTree { * entry instead of inserting a new one. */ template - std::pair emplace(const PhPoint key, _Args&&... __args) { - return tree_.emplace(key, std::forward<_Args>(__args)...); + std::pair emplace(const Key& key, _Args&&... __args) { + return tree_.emplace(converter_.pre(key), std::forward<_Args>(__args)...); + } + + /* + * The emplace_hint() method uses an iterator as hint for insertion. + * The hint is ignored if it is not useful or is equal to end(). + * + * Iterators should normally not be used after the tree has been modified. As an exception to + * this rule, an iterator can be used as hint if it was previously used with at most one call + * to erase() and if no other modifications occurred. + * The following is valid: + * + * // Move value from key1 to key2 + * auto iter = tree.find(key1); + * auto value = iter.second(); // The value may become invalid in erase() + * erase(iter); + * emplace_hint(iter, key2, value); // the iterator can still be used as hint here + */ + template + std::pair emplace_hint(const ITERATOR& iterator, const Key& key, _Args&&... __args) { + return tree_.emplace_hint(iterator, converter_.pre(key), std::forward<_Args>(__args)...); } /* @@ -63,16 +89,16 @@ class PhTree { * @return a pair consisting of the inserted element (or to the element that prevented the * insertion) and a bool denoting whether the insertion took place. */ - std::pair insert(const PhPoint& key, const T& value) { - return tree_.insert(key, value); + std::pair insert(const Key& key, const T& value) { + return tree_.insert(converter_.pre(key), value); } /* * @return the value stored at position 'key'. If no such value exists, one is added to the tree * and returned. */ - T& operator[](const PhPoint& key) { - return tree_[key]; + T& operator[](const Key& key) { + return tree_[converter_.pre(key)]; } /* @@ -80,8 +106,8 @@ class PhTree { * * @return '1', if a value is associated with the provided key, otherwise '0'. */ - size_t count(const PhPoint& key) const { - return tree_.count(key); + size_t count(const Key& key) const { + return tree_.count(converter_.pre(key)); } /* @@ -92,8 +118,8 @@ class PhTree { * @return an iterator that points either to the associated value or to {@code end()} if the key * was found */ - auto find(const PhPoint& key) const { - return tree_.find(key); + auto find(const Key& key) const { + return tree_.find(converter_.pre(key)); } /* @@ -101,18 +127,73 @@ class PhTree { * * @return '1' if a value was found, otherwise '0'. */ - size_t erase(const PhPoint& key) { - return tree_.erase(key); + size_t erase(const Key& key) { + return tree_.erase(converter_.pre(key)); + } + + /* + * See std::map::erase(). Removes any entry located at the provided iterator. + * + * This function attempts to use the iterator to directly erase the current entry from + * its node. However, not all iterators provide all required information so this function + * may resort to erase|(key, value) and thus may not be faster than that. + * + * Currently only iterators returned by find(key) will result in faster erase. + * + * @return '1' if a value was found, otherwise '0'. + */ + template + size_t erase(const ITERATOR& iterator) { + return tree_.erase(iterator); } /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter - * functions must implement the same signature as the default 'PhFilterNoOp'. + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @param callback The callback function to be called for every entry that matches the query. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + */ + template + void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { + tree_.for_each(callback, filter); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param callback The callback function to be called for every entry that matches the query. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param query_type The type of query, such as QueryIntersect or QueryInclude + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + */ + template < + typename CALLBACK_FN, + typename FILTER = FilterNoOp, + typename QUERY_TYPE = DEFAULT_QUERY_TYPE> + void for_each( + QueryBox query_box, + CALLBACK_FN& callback, + FILTER filter = FILTER(), + QUERY_TYPE query_type = QUERY_TYPE()) const { + tree_.for_each(query_type(converter_.pre_query(query_box)), callback, filter); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. * * @return an iterator over all (filtered) entries in the tree, */ - template > + template auto begin(FILTER filter = FILTER()) const { return tree_.begin(filter); } @@ -120,21 +201,26 @@ class PhTree { /* * Performs a rectangular window query. The parameters are the min and max keys which * contain the minimum respectively the maximum keys in every dimension. - * @param min Minimum values - * @param max Maximum values + * @param query_box The query window. + * @param query_type The type of query, such as QueryIntersect or QueryInclude * @param filter An optional filter function. The filter function allows filtering entries and * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'PhFilterNoOp`. + * signature of the default 'FilterNoOp`. * @return Result iterator. */ - template > + template auto begin_query( - const PhPoint& min, const PhPoint& max, FILTER filter = FILTER()) const { - return tree_.begin_query(min, max, filter); + const QueryBox& query_box, + FILTER filter = FILTER(), + QUERY_TYPE query_type = DEFAULT_QUERY_TYPE()) const { + return tree_.begin_query(query_type(converter_.pre_query(query_box)), filter); } /* * Locate nearest neighbors for a given point in space. + * + * NOTE: This method is not (currently) available for box keys. + * * @param min_results number of entries to be returned. More entries may or may not be returned * when several entries have the same distance. * @param center center point @@ -144,14 +230,20 @@ class PhTree { * @return Result iterator. */ template < - typename DISTANCE = PhDistanceLongEuclidean, - typename FILTER = PhFilterNoOp> + typename DISTANCE, + typename FILTER = FilterNoOp, + // Some magic to disable this in case of box keys, i.e. if DIM != DimInternal + dimension_t DUMMY = DIM, + typename std::enable_if<(DUMMY == DimInternal), int>::type = 0> auto begin_knn_query( size_t min_results, - const PhPoint& center, + const Key& center, DISTANCE distance_function = DISTANCE(), FILTER filter = FILTER()) const { - return tree_.begin_knn_query(min_results, center, distance_function, filter); + // We use pre() instead of pre_query() here because, strictly speaking, we want to + // find the nearest neighbors of a (fictional) key, which may as well be a box. + return tree_.begin_knn_query( + min_results, converter_.pre(center), distance_function, filter); } /* @@ -182,15 +274,69 @@ class PhTree { return tree_.empty(); } + /* + * @return the converter associated with this tree. + */ + [[nodiscard]] const CONVERTER& converter() const { + return converter_; + } + private: // This is used by PhTreeDebugHelper const auto& GetInternalTree() const { return tree_; } - v16::PhTreeV16 tree_; + v16::PhTreeV16 tree_; + CONVERTER converter_; }; +/* + * Floating-point `double` version of the PH-Tree. + * This version of the tree accepts multi-dimensional keys with floating point (`double`) + * coordinates. + * + * The default implementation uses a direct lossless (in terms of numeric precision) mapping from + * 64bit double to 64bit long integer. The mapping is defined in the Converter functions. + * Other, lossy mapping have been shown to provide somewhat better performance (due to + * better tree structure), but this default mapping has been chosen because it is lossless. + * + * For more information please refer to the README of this project. + */ +template > +using PhTreeD = PhTree; + +/* + * Floating-point `float` version of the PH-Tree. + * This version of the tree accepts multi-dimensional keys with floating point (`float`) + * coordinates. + * + * See 'PhTreeD' for details. + */ +template > +using PhTreeF = PhTree; + +template +using PhTreeBox = PhTree; + +/** + * A PH-Tree that uses (axis aligned) boxes as keys. + * The boxes are defined with 64bit 'double' floating point coordinates. + * + * See 'PhTreeD' for details. + */ +template > +using PhTreeBoxD = PhTreeBox; + +/** + * A PH-Tree that uses (axis aligned) boxes as keys. + * The boxes are defined with 32bit 'float' coordinates. + * + * See 'PhTreeD' for details. + */ +template > +using PhTreeBoxF = PhTreeBox; + } // namespace improbable::phtree #endif // PHTREE_PHTREE_H diff --git a/phtree/phtree_box_d.h b/phtree/phtree_box_d.h deleted file mode 100644 index ad837891..00000000 --- a/phtree/phtree_box_d.h +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_PHTREE_BOX_D_H -#define PHTREE_PHTREE_BOX_D_H - -#include "common/ph_common.h" -#include "v16/phtree_v16.h" - -namespace improbable::phtree { - -/* - * Floating-point `double` Box version of the PH-Tree. - * This wrapper accepts axis aligned boxes as key. The boxes are defined by their minimum and - * maximum coordinates in each dimension. - * - * Encoding boxes as points - * ======================== - * The native PH-Tree can only handle points, not boxes. This PhTreeBoxD class solves this by - * encoding the boxes into points by concatenating the minimum and maximum coordinates (with DIM - * dimensions) of each box to a single point with 2*DIM dimensions. For example, a 2D box - * (1,3)/(9,8) becomes (1,3,9,8). - * - * Querying boxes - * ============== - * Executing window queries on these encoded boxes requires some transformation of the query - * constraints. - * - * The transformation has two steps: one steps is to transform the requested query min_req/max_req - * points into useful internal 4D min_int/max_int points, the other step is to transform floating - * point coordinates into integer coordinates. The second step is equivalent to the transformation - * in normal floating-point point trees, so it is not discussed further here. Also note that the two - * steps can be swapped. - * - * The default window query works as 'intersection' query, i.e. it returns all boxes that intersect - * or lie completely inside the query window. The solution is to fill the lower half of the internal - * min_int point with -infinity and the upper half with the requested min_req coordinate. For the - * internal max_int point we fill the lower half with the requested max_req value and the upper half - * with +infinity. - * - * For example, since the internal tree is 4D, a 2D window query with min_req=(2,4)/max_req=(12,10) - * is transformed to min_int=(-infinity,-infinity,2,4) / max_int=(12,10,+infinity,+infinity). The - * internal query of the PH-Tree simply returns any 4D point (= encoded box) that is strictly larger - * than min_int and strictly smaller than max_int. The result is that it returns all boxes that - * somehow intersect with, or lie inside of, the requested query window. - * - * For more information please refer to the README of this project. - */ -template < - dimension_t DIM, - typename T, - typename KEY = PhBoxD, - PhPreprocessorBoxD PRE = PreprocessBoxIEEE, - PhPostprocessorBoxD POST = PostprocessBoxIEEE, - PhPreprocessorD<2 * DIM> PRE_QUERY = PreprocessIEEE<2 * DIM>> -class PhTreeBoxD { - friend PhTreeDebugHelper; - static const dimension_t TREE_DIM = 2 * DIM; - - public: - PhTreeBoxD() : tree_() {} - - /* - * Attempts to build and insert a key and a value into the tree. - * - * @param key The key for the new entry. - * - * @param __args Arguments used to generate a new value. - * - * @return A pair, whose first element points to the possibly inserted pair, - * and whose second element is a bool that is true if the pair was actually inserted. - * - * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is - * effectively a map, so if an entry with the same key was already in the tree, returns that - * entry instead of inserting a new one. - */ - template - std::pair emplace(const PhBoxD& key, _Args&&... __args) { - return tree_.emplace(PRE(key), std::forward<_Args>(__args)...); - } - - /* - * See std::map::insert(). - * - * @return a pair consisting of the inserted element (or to the element that prevented the - * insertion) and a bool denoting whether the insertion took place. - */ - std::pair insert(const PhBoxD& key, const T& value) { - return tree_.insert(PRE(key), value); - } - - /* - * @return the value stored at position 'key'. If no such value exists, one is added to the tree - * and returned. - */ - T& operator[](const PhBoxD& key) { - return tree_[PRE(key)]; - } - - /* - * Analogous to map:count(). - * - * @return '1', if a value is associated with the provided key, otherwise '0'. - */ - size_t count(const PhBoxD& key) const { - return tree_.count(PRE(key)); - } - - /* - * Analogous to map:find(). - * - * Get an entry associated with a k dimensional key. - * @param key the key to look up - * @return an iterator that points either to the associated value or to {@code end()} if the key - * was found - */ - auto find(const PhBoxD& key) const { - return tree_.find(PRE(key)); - } - - /* - * See std::map::erase(). Removes any value associated with the provided key. - * - * @return '1' if a value was found, otherwise '0'. - */ - size_t erase(const PhBoxD& key) { - return tree_.erase(PRE(key)); - } - - /* - * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter - * functions must implement the same signature as the default 'PhFilterNoOp'. - * - * @return an iterator over all (filtered) entries in the tree, - */ - template > - auto begin(FILTER filter = FILTER()) const { - return tree_.begin(filter); - } - - /* - * Performs a rectangular window query. The parameters are the min and max keys which - * contain the minimum respectively the maximum keys in every dimension. - * @param min Minimum values - * @param max Maximum values - * @param filter An optional filter function. The filter function allows filtering entries and - * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'PhFilterNoOp`. - * @return Result iterator. - */ - template > - auto begin_query( - const PhPointD& min, const PhPointD& max, FILTER filter = FILTER()) const { - PhPointD min_2_DIM; - PhPointD max_2_DIM; - for (dimension_t i = 0; i < DIM; i++) { - min_2_DIM[i] = D_NEG_INFINITY; - max_2_DIM[i] = max[i]; - } - for (dimension_t i = DIM; i < 2 * DIM; i++) { - min_2_DIM[i] = min[i - DIM]; - max_2_DIM[i] = D_INFINITY; - } - return tree_.begin_query(PRE_QUERY(min_2_DIM), PRE_QUERY(max_2_DIM), filter); - } - - /* - * @return An iterator representing the tree's 'end'. - */ - const auto& end() const { - return tree_.end(); - } - - /* - * Remove all entries from the tree. - */ - void clear() { - tree_.clear(); - } - - /* - * @return the number of entries (key/value pairs) in the tree. - */ - [[nodiscard]] size_t size() const { - return tree_.size(); - } - - /* - * @return 'true' if the tree is empty, otherwise 'false'. - */ - [[nodiscard]] bool empty() const { - return tree_.empty(); - } - - private: - // This is used by PhTreeDebugHelper - const auto& GetInternalTree() const { - return tree_; - } - - v16::PhTreeV16 tree_; -}; - -} // namespace improbable::phtree - -#endif // PHTREE_PHTREE_BOX_D_H diff --git a/phtree/phtree_box_d_test.cc b/phtree/phtree_box_d_test.cc index 8d5c4bec..0a1d55ef 100644 --- a/phtree/phtree_box_d_test.cc +++ b/phtree/phtree_box_d_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "phtree/phtree_box_d.h" +#include "phtree/phtree.h" #include #include #include @@ -37,7 +37,7 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i(i){}; bool operator==(Id& rhs) { return _i == rhs._i; @@ -45,7 +45,7 @@ struct Id { Id& operator=(Id const& rhs) = default; - int _i; + size_t _i; }; struct PointDistance { @@ -72,13 +72,15 @@ double distance(const PhPointD& p1, const PhPointD& p2) { template void generateCube(std::vector>& points, size_t N, double boxLen = 10) { DoubleRng rng(-1000, 1000); - auto refTree = std::unordered_map, size_t, HashPhBoxD>(); + auto refTree = std::unordered_map, size_t>(); points.reserve(N); for (size_t i = 0; i < N; i++) { - auto min = PhPointD({rng.next(), rng.next(), rng.next()}); - auto max = PhPointD({min[0] + boxLen, min[1] + boxLen, min[2] + boxLen}); - auto box = PhBoxD(min, max); + PhBoxD box{}; + for (dimension_t d = 0; d < DIM; ++d) { + box.min()[d] = rng.next(); + box.max()[d] = box.min()[d] + boxLen; + } if (refTree.count(box) != 0) { i--; continue; @@ -87,35 +89,28 @@ void generateCube(std::vector>& points, size_t N, double boxLen = 10 refTree.emplace(box, i); points.push_back(box); } - assert(refTree.size() == N); - assert(points.size() == N); + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); } template using TestPoint = PhBoxD; template -using TestTree = PhTreeBoxD< - DIM, - T, - TestPoint, - PreprocessBoxIEEE, - PostprocessBoxIEEE, - PreprocessIEEE<2 * DIM>>; - -TEST(PhTreeBoxDTest, SmokeTestBasicOps) { - const dimension_t dim = 3; - TestTree tree; - size_t N = 10000; +using TestTree = PhTreeBoxD; - std::vector> points; +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; generateCube(points, N); ASSERT_EQ(0, tree.size()); ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); for (size_t i = 0; i < N; i++) { - PhBoxD& p = points.at(i); + TestPoint& p = points.at(i); ASSERT_EQ(tree.count(p), 0); ASSERT_EQ(tree.end(), tree.find(p)); @@ -141,26 +136,19 @@ TEST(PhTreeBoxDTest, SmokeTestBasicOps) { } for (size_t i = 0; i < N; i++) { - PhBoxD& p = points.at(i); + TestPoint& p = points.at(i); // With intersection queries we may get multiple results. - int found = 0; - for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { found += (i == (*q)._i); } ASSERT_EQ(1, found); } - ASSERT_LE(10, PhTreeDebugHelper::ToString(tree, PhTreeDebugHelper::PrintDetail::name).length()); - ASSERT_LE( - N * 10, - PhTreeDebugHelper::ToString(tree, PhTreeDebugHelper::PrintDetail::entries).length()); - ASSERT_LE( - N * 10, PhTreeDebugHelper::ToString(tree, PhTreeDebugHelper::PrintDetail::tree).length()); - ASSERT_EQ(N, PhTreeDebugHelper::GetStats(tree).size_); PhTreeDebugHelper::CheckConsistency(tree); for (size_t i = 0; i < N; i++) { - PhBoxD& p = points.at(i); + TestPoint& p = points.at(i); ASSERT_NE(tree.find(p), tree.end()); ASSERT_EQ(tree.count(p), 1); ASSERT_EQ(i, tree.find(p)->_i); @@ -181,6 +169,16 @@ TEST(PhTreeBoxDTest, SmokeTestBasicOps) { } ASSERT_EQ(0, tree.size()); ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeDTest, SmokeTestBasicOps) { + SmokeTestBasicOps<1>(100); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(1000); + SmokeTestBasicOps<31>(100); } TEST(PhTreeDTest, TestDebug) { @@ -245,8 +243,8 @@ TEST(PhTreeBoxDTest, TestInsert) { for (size_t i = 0; i < N; i++) { PhBoxD& p = points.at(i); // With intersection queries we may get multiple results. - int found = 0; - for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { found += (i == (*q)._i); } ASSERT_EQ(1, found); @@ -293,8 +291,8 @@ TEST(PhTreeBoxDTest, TestEmplace) { for (size_t i = 0; i < N; i++) { PhBoxD& p = points.at(i); // With intersection queries we may get multiple results. - int found = 0; - for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { found += (i == (*q)._i); } ASSERT_EQ(1, found); @@ -338,8 +336,8 @@ TEST(PhTreeBoxDTest, TestSquareBrackets) { for (size_t i = 0; i < N; i++) { PhBoxD& p = points.at(i); // With intersection queries we may get multiple results. - int found = 0; - for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { found += (i == (*q)._i); } ASSERT_EQ(1, found); @@ -368,7 +366,7 @@ void populate( TestTree& tree, std::vector>& points, size_t N, double boxLen = 10) { generateCube(points, N, boxLen); for (size_t i = 0; i < N; i++) { - ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + ASSERT_TRUE(tree.emplace(points[i], i).second); } ASSERT_EQ(N, tree.size()); } @@ -436,7 +434,7 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplace) { {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); int n = tree.erase(pOld); ASSERT_EQ(1, n); - tree.emplace(pNew, 42); + tree.emplace(pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); p = pNew; @@ -446,6 +444,74 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplace) { tree.clear(); } +TEST(PhTreeBoxDTest, TestUpdateWithEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + double delta = deltas[d_n]; + PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + TestPoint pNew{min, max}; + auto iter = tree.find(pOld); + int n = tree.erase(iter); + ASSERT_EQ(1, n); + tree.emplace_hint(iter, pNew, 42u); + ASSERT_EQ(1, tree.count(pNew)); + if (delta != 0.0) { + ASSERT_EQ(0, tree.count(pOld)); + } + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeBoxDTest, TestEraseByIterator) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + auto iter = tree.find(p); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + ASSERT_EQ(tree.end(), tree.find(p)); + i++; + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeBoxDTest, TestEraseByIteratorQuery) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + for (size_t i = 0; i < N; ++i) { + auto iter = tree.begin(); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + TEST(PhTreeBoxDTest, TestExtent) { const dimension_t dim = 3; TestTree tree; @@ -456,7 +522,7 @@ TEST(PhTreeBoxDTest, TestExtent) { int num_e = 0; auto qE = tree.begin(); while (qE != tree.end()) { - ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i >= 0); qE++; num_e++; } @@ -472,14 +538,14 @@ TEST(PhTreeBoxDTest, TestRangeBasedForLoop) { int num_e1 = 0; for (auto& x : tree) { - ASSERT_TRUE(x._i > -1); + ASSERT_TRUE(x._i >= 0); num_e1++; } ASSERT_EQ(N, num_e1); size_t num_e2 = 0; for (auto& x : tree) { - ASSERT_TRUE(x._i > -1); + ASSERT_TRUE(x._i >= 0); num_e2++; } ASSERT_EQ(N, num_e2); @@ -505,30 +571,32 @@ void referenceQuery( } } +// We use 'int&' because gtest does not compile with assertions in non-void functions. template -int testQuery(PhPointD& min, PhPointD& max, int N) { +void testQuery(PhPointD& min, PhPointD& max, size_t N, int& result) { TestTree tree; - std::vector> points; + std::vector> points; populate(tree, points, N); std::set referenceResult; referenceQuery(points, min, max, referenceResult); - size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto& x = *it; - assert(x._i >= 0); - assert(referenceResult.count(x._i) == 1); - n++; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; } - assert(referenceResult.size() == n); - return n; + ASSERT_EQ(referenceResult.size(), result); } TEST(PhTreeBoxDTest, TestWindowQuery0) { const dimension_t dim = 3; PhPointD p{-10000, -10000, -10000}; - ASSERT_EQ(0, testQuery(p, p, 10000)); + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); } TEST(PhTreeBoxDTest, TestWindowQuery1) { @@ -538,25 +606,26 @@ TEST(PhTreeBoxDTest, TestWindowQuery1) { std::vector> points; populate(tree, points, N); - int n = 0; + size_t n = 0; for (size_t i = 0; i < N; i++) { PhBoxD& p = points.at(i); // With intersection queries we may get multiple results. - int found = 0; - for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { found += (i == (*q)._i); } ASSERT_EQ(1, found); n++; } - ASSERT_TRUE(N == n); + ASSERT_EQ(N, n); } TEST(PhTreeBoxDTest, TestWindowQueryMany) { const dimension_t dim = 3; PhPointD min{-100, -100, -100}; PhPointD max{100, 100, 100}; - int n = testQuery(min, max, 10000); + int n = 0; + testQuery(min, max, 10000, n); ASSERT_LE(3, n); ASSERT_GE(100, n); } @@ -566,7 +635,9 @@ TEST(PhTreeBoxDTest, TestWindowQueryAll) { const size_t N = 10000; PhPointD min{-10000, -10000, -10000}; PhPointD max{10000, 10000, 10000}; - ASSERT_EQ(N, testQuery(min, max, N)); + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); } TEST(PhTreeBoxDTest, TestWindowQueryManyMoving) { @@ -585,7 +656,7 @@ TEST(PhTreeBoxDTest, TestWindowQueryManyMoving) { referenceQuery(points, min, max, referenceResult); size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto& x = *it; ASSERT_EQ(referenceResult.count(x._i), 1); n++; @@ -599,7 +670,7 @@ TEST(PhTreeBoxDTest, TestWindowQueryManyMoving) { } ASSERT_GE(100, n); } - ASSERT_LE(3, 500); + ASSERT_LE(500, nn); ASSERT_GE(5000, nn); } @@ -611,13 +682,13 @@ TEST(PhTreeBoxDTest, TestWindowQueryManyMovingPoint) { populate(tree, points, N, 100); size_t nTotal = 0; - for (scalar_t i = -120; i < 120; i++) { + for (int i = -120; i < 120; i++) { PhPointD min_max{i * 10., i * 9., i * 11.}; std::set referenceResult; referenceQuery(points, min_max, min_max, referenceResult); int n = 0; - for (auto it = tree.begin_query(min_max, min_max); it != tree.end(); it++) { + for (auto it = tree.begin_query({min_max, min_max}); it != tree.end(); it++) { auto& x = *it; ASSERT_EQ(referenceResult.count(x._i), 1); n++; @@ -631,6 +702,41 @@ TEST(PhTreeBoxDTest, TestWindowQueryManyMovingPoint) { ASSERT_LE(10, nTotal); } +TEST(PhTreeBoxDTest, TestWindowForEachManyMovingPoint) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N, 100); + + size_t nTotal = 0; + for (int i = -120; i < 120; i++) { + PhPointD min_max{i * 10., i * 9., i * 11.}; + std::set referenceResult; + referenceQuery(points, min_max, min_max, referenceResult); + + struct Counter { + void operator()(PhBoxD key, Id& t) { + ++n_; + ASSERT_EQ(referenceResult.count(t._i), 1); + } + std::set& referenceResult; + size_t n_ = 0; + }; + + size_t n = 0; + Counter callback{referenceResult, 0}; + tree.for_each({min_max, min_max}, callback); + n += callback.n_; + ASSERT_EQ(referenceResult.size(), n); + nTotal += n; + + // basic check to ensure healthy queries + ASSERT_GE(N / 10, n); + } + ASSERT_LE(10, nTotal); +} + TEST(PhTreeBoxDTest, SmokeTestPointAPI) { PhBoxD<3> p({1, 2, 3}, {4, 5, 6}); (void)p; diff --git a/phtree/phtree_box_d_test_query_types.cc b/phtree/phtree_box_d_test_query_types.cc new file mode 100644 index 00000000..c5460665 --- /dev/null +++ b/phtree/phtree_box_d_test_query_types.cc @@ -0,0 +1,62 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhBoxD; + +template +using TestTree = PhTreeBoxD; + +TEST(PhTreeBoxDTestQueryTypes, SmokeTestQuery) { + const dimension_t DIM = 2; + TestTree tree; + + PhPointD p00{-10, -10}; + PhPointD p11{10, 10}; + PhPointD pm{0, 0}; + + PhBoxD b00{p00, pm}; + PhBoxD b11{pm, p11}; + + tree.emplace(b00, -1); + tree.emplace(b11, 1); + + auto query_type = QueryInclude(); + + // empty + auto q1 = tree.begin_query({{-9, -9}, {9, 9}}, FilterNoOp(), query_type); + ASSERT_EQ(q1, tree.end()); + + // Find box00 but not box11 + auto q2 = tree.begin_query({{-11, -11}, {9, 9}}, FilterNoOp(), query_type); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(-1, (*q2)); + q2++; + ASSERT_EQ(q2, tree.end()); + + // Find box11 but not box00 + auto q3 = tree.begin_query({{-9, -9}, {11, 11}}, FilterNoOp(), query_type); + ASSERT_NE(q3, tree.end()); + ASSERT_EQ(1, (*q3)); + q3++; + ASSERT_EQ(q3, tree.end()); +} diff --git a/phtree/phtree_box_f_test.cc b/phtree/phtree_box_f_test.cc new file mode 100644 index 00000000..5a738af2 --- /dev/null +++ b/phtree/phtree_box_f_test.cc @@ -0,0 +1,760 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhBoxF; + +template +using TestTree = PhTreeBoxF; + +class FloatRng { + public: + FloatRng(double minIncl, double maxExcl) + : eng(), rnd{static_cast(minIncl), static_cast(maxExcl)} {} + + float next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const size_t i) : _i(i){}; + + bool operator==(Id& rhs) { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + size_t _i; +}; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const PhPointD& p1, const PhPointD& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +void generateCube(std::vector>& points, size_t N, float boxLen = 10) { + FloatRng rng(-1000, 1000); + auto refTree = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + TestPoint box{}; + for (dimension_t d = 0; d < DIM; ++d) { + box.min()[d] = rng.next(); + box.max()[d] = box.min()[d] + boxLen; + } + if (refTree.count(box) != 0) { + i--; + continue; + } + + refTree.emplace(box, i); + points.push_back(box); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + // With intersection queries we may get multiple results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeDTest, SmokeTestBasicOps) { + SmokeTestBasicOps<1>(100); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(1000); + SmokeTestBasicOps<31>(100); +} + +TEST(PhTreeDTest, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeBoxFTest, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + // With intersection queries we may get multiple results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeBoxFTest, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again, this should _not_ replace the existing value + Id id2(-i); + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // Check that the returned value is a reference + tree.emplace(p, id2).first._i++; + ASSERT_EQ(i + 1, tree.emplace(p, id).first._i); + tree.emplace(p, id2).first = id; + ASSERT_EQ(i, tree.emplace(p, id).first._i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + // With intersection queries we may get multiple results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeBoxFTest, TestSquareBrackets) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(0, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + if (i % 2 == 0) { + tree[p]._i = i; + } else { + tree[p] = id; + } + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try `add` again + ASSERT_EQ(i, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + // With intersection queries we may get multiple results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i, tree[p]._i); + } +} + +template +void populate( + PhTreeBoxF& tree, + std::vector>& points, + size_t N, + double boxLen = 10) { + generateCube(points, N, boxLen); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate( + TestTree& tree, std::vector>& points, size_t N, double boxLen = 10) { + generateCube(points, N, boxLen); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeBoxFTest, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeBoxFTest, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(tree.find(p)->_i, i); + i++; + } + + TestPoint p({1, 1, 1}, {2, 2, 10000000}); + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); +} + +TEST(PhTreeBoxFTest, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + float delta = 20; + std::vector> points; + populate(tree, points, N); + + for (auto& p : points) { + auto pOld = p; + TestPoint pNew( + {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, + {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); + int n = tree.erase(pOld); + ASSERT_EQ(1, n); + tree.emplace(pNew, 42u); + ASSERT_EQ(1, tree.count(pNew)); + ASSERT_EQ(0, tree.count(pOld)); + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeBoxFTest, TestUpdateWithEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + float delta = deltas[d_n]; + PhPointF min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointF max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + TestPoint pNew{min, max}; + auto iter = tree.find(pOld); + int n = tree.erase(iter); + ASSERT_EQ(1, n); + tree.emplace_hint(iter, pNew, 42u); + ASSERT_EQ(1, tree.count(pNew)); + if (delta != 0.0) { + ASSERT_EQ(0, tree.count(pOld)); + } + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeBoxFTest, TestEraseByIterator) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + auto iter = tree.find(p); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + ASSERT_EQ(tree.end(), tree.find(p)); + i++; + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeBoxFTest, TestEraseByIteratorQuery) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + for (size_t i = 0; i < N; ++i) { + auto iter = tree.begin(); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeBoxFTest, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i >= 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); +} + +TEST(PhTreeBoxFTest, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e1 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i >= 0); + num_e1++; + } + ASSERT_EQ(N, num_e1); + + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i >= 0); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +template +void referenceQuery( + std::vector>& points, + PhPointF& min, + PhPointF& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + auto pMin = p.min(); + auto pMax = p.max(); + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= pMax[d] >= min[d] && pMin[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testQuery(PhPointF& min, PhPointF& max, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeBoxFTest, TestWindowQuery0) { + const dimension_t dim = 3; + PhPointF p{-10000, -10000, -10000}; + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeBoxFTest, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + size_t n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + // With intersection queries we may get multiple results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeBoxFTest, TestWindowQueryMany) { + const dimension_t dim = 3; + PhPointF min{-100, -100, -100}; + PhPointF max{100, 100, 100}; + int n = 0; + testQuery(min, max, 10000, n); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeBoxFTest, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + PhPointF min{-10000, -10000, -10000}; + PhPointF max{10000, 10000, 10000}; + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); +} + +TEST(PhTreeBoxFTest, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + float query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + PhPointF min{i * 10.0f, i * 9.0f, i * 11.0f}; + PhPointF max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeBoxFTest, TestWindowQueryManyMovingPoint) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N, 100); + + size_t nTotal = 0; + for (int i = -120; i < 120; i++) { + PhPointF min_max{i * 10.0f, i * 9.0f, i * 11.0f}; + std::set referenceResult; + referenceQuery(points, min_max, min_max, referenceResult); + + int n = 0; + for (auto it = tree.begin_query({min_max, min_max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + } + ASSERT_EQ(referenceResult.size(), n); + nTotal += n; + + // basic check to ensure healthy queries + ASSERT_GE(N / 10, n); + } + ASSERT_LE(10, nTotal); +} + +TEST(PhTreeBoxFTest, TestWindowForEachManyMovingPoint) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N, 100); + + size_t nTotal = 0; + for (int i = -120; i < 120; i++) { + PhPointF min_max{i * 10.0f, i * 9.0f, i * 11.0f}; + std::set referenceResult; + referenceQuery(points, min_max, min_max, referenceResult); + + struct Counter { + void operator()(TestPoint key, Id& t) { + ++n_; + ASSERT_EQ(referenceResult.count(t._i), 1); + } + std::set& referenceResult; + size_t n_ = 0; + }; + + size_t n = 0; + Counter callback{referenceResult, 0}; + tree.for_each({min_max, min_max}, callback); + n += callback.n_; + ASSERT_EQ(referenceResult.size(), n); + nTotal += n; + + // basic check to ensure healthy queries + ASSERT_GE(N / 10, n); + } + ASSERT_LE(10, nTotal); +} + +TEST(PhTreeBoxFTest, SmokeTestPointAPI) { + TestPoint<3> p({1, 2, 3}, {4, 5, 6}); + (void)p; +} + +TEST(PhTreeBoxFTest, SmokeTestTreeAPI) { + std::map mapPtr; + PhTreeBoxF<3, Id*> treePtr; + Id* idPtr = new Id(1); + treePtr.emplace(TestPoint<3>({1, 2, 3}, {4, 5, 6}), idPtr); + treePtr.clear(); + delete idPtr; + + std::map mapConst; + PhTreeBoxF<3, const Id> treeConst; + treeConst.emplace(TestPoint<3>({1, 2, 3}, {4, 5, 6}), Id(1)); +} diff --git a/phtree/phtree_d.h b/phtree/phtree_d.h deleted file mode 100644 index 36bc4d99..00000000 --- a/phtree/phtree_d.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_PHTREE_D_H -#define PHTREE_PHTREE_D_H - -#include "common/ph_common.h" -#include "v16/phtree_v16.h" - -namespace improbable::phtree { - -/* - * Floating-point `double` version of the PH-Tree. - * This version of the tree accepts multi-dimensional keys with floatring point (`double`) - * coordinates. - * - * The default implementation uses a direct lossless (in terms of numeric precision) mapping from - * 64bit double to 64bit long integer. The mapping is defined in the Preprocessor/PostProcessor - * functions. Other, lossy, mapping have been shown to provide somewhat better performance (due to - * better tree structure), but this default mapping has been chosen because it is lossless. - * - * For more information please refer to the README of this project. - */ -template < - dimension_t DIM, - typename T, - typename KEY = PhPointD, - PhPreprocessor PRE = PreprocessIEEE, - PhPostprocessor POST = PostprocessIEEE> -class PhTreeD { - friend PhTreeDebugHelper; - - public: - PhTreeD() : tree_{} {} - - /* - * Attempts to build and insert a key and a value into the tree. - * - * @param key The key for the new entry. - * - * @param __args Arguments used to generate a new value. - * - * @return A pair, whose first element points to the possibly inserted pair, - * and whose second element is a bool that is true if the pair was actually inserted. - * - * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is - * effectively a map, so if an entry with the same key was already in the tree, returns that - * entry instead of inserting a new one. - */ - template - std::pair emplace(const PhPointD& key, _Args&&... __args) { - return tree_.emplace(PRE(key), std::forward<_Args>(__args)...); - } - - /* - * See std::map::insert(). - * - * @return a pair consisting of the inserted element (or to the element that prevented the - * insertion) and a bool denoting whether the insertion took place. - */ - std::pair insert(const PhPointD& key, const T& value) { - return tree_.insert(PRE(key), value); - } - - /* - * @return the value stored at position 'key'. If no such value exists, one is added to the tree - * and returned. - */ - T& operator[](const PhPointD& key) { - return tree_[PRE(key)]; - } - - /* - * Analogous to map:count(). - * - * @return '1', if a value is associated with the provided key, otherwise '0'. - */ - size_t count(const PhPointD& key) const { - return tree_.count(PRE(key)); - } - - /* - * Analogous to map:find(). - * - * Get an entry associated with a k dimensional key. - * @param key the key to look up - * @return an iterator that points either to the associated value or to {@code end()} if the key - * was found - */ - auto find(const PhPointD& key) const { - return tree_.find(PRE(key)); - } - - /* - * See std::map::erase(). Removes any value associated with the provided key. - * - * @return '1' if a value was found, otherwise '0'. - */ - size_t erase(const PhPointD& key) { - return tree_.erase(PRE(key)); - } - - /* - * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter - * functions must implement the same signature as the default 'PhFilterNoOp'. - * - * @return an iterator over all (filtered) entries in the tree, - */ - template > - auto begin(FILTER filter = FILTER()) const { - return tree_.begin(filter); - } - - /* - * Performs a rectangular window query. The parameters are the min and max keys which - * contain the minimum respectively the maximum keys in every dimension. - * @param min Minimum values - * @param max Maximum values - * @param filter An optional filter function. The filter function allows filtering entries and - * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'PhFilterNoOp`. - * @return Result iterator. - */ - template > - auto begin_query( - const PhPointD& min, const PhPointD& max, FILTER filter = FILTER()) const { - return tree_.begin_query(PRE(min), PRE(max), filter); - } - - /* - * Locate nearest neighbors for a given point in space. - * @param min_results number of entries to be returned. More entries may or may not be returned - * when several entries have the same distance. - * @param center center point - * @param distance_function optional distance function, defaults to euclidean distance - * @param filter optional filter predicate that excludes nodes/entries before their distance is - * calculated. - * @return Result iterator. - */ - template < - typename DISTANCE = PhDistanceDoubleEuclidean, - typename FILTER = PhFilterNoOp> - auto begin_knn_query( - size_t min_results, - const PhPointD& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { - return tree_.begin_knn_query(min_results, PRE(center), distance_function, filter); - } - - /* - * @return An iterator representing the tree's 'end'. - */ - const auto& end() const { - return tree_.end(); - } - - /* - * Remove all entries from the tree. - */ - void clear() { - tree_.clear(); - } - - /* - * @return the number of entries (key/value pairs) in the tree. - */ - [[nodiscard]] size_t size() const { - return tree_.size(); - } - - /* - * @return 'true' if the tree is empty, otherwise 'false'. - */ - [[nodiscard]] bool empty() const { - return tree_.empty(); - } - - private: - // This is used by PhTreeDebugHelper - const auto& GetInternalTree() const { - return tree_; - } - - v16::PhTreeV16 tree_; -}; - -} // namespace improbable::phtree - -#endif // PHTREE_PHTREE_D_H diff --git a/phtree/phtree_d_test.cc b/phtree/phtree_d_test.cc index 207a369c..c8e51822 100644 --- a/phtree/phtree_d_test.cc +++ b/phtree/phtree_d_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "phtree/phtree_d.h" +#include "phtree/phtree.h" #include #include @@ -24,7 +24,7 @@ template using TestPoint = PhPointD; template -using TestTree = PhTreeD, PreprocessIEEE, PostprocessIEEE>; +using TestTree = PhTreeD; class DoubleRng { public: @@ -90,7 +90,10 @@ void generateCube(std::vector>& points, size_t N) { points.reserve(N); for (size_t i = 0; i < N; i++) { - auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + TestPoint point{}; + for (dimension_t d = 0; d < DIM; ++d) { + point[d] = rng.next(); + } if (refTree.count(point) != 0) { i--; continue; @@ -99,15 +102,13 @@ void generateCube(std::vector>& points, size_t N) { refTree.emplace(point, i); points.push_back(point); } - assert(refTree.size() == N); - assert(points.size() == N); + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); } template -void SmokeTestBasicOps() { +void SmokeTestBasicOps(size_t N) { TestTree tree; - size_t N = 10000; - std::vector> points; generateCube(points, N); @@ -143,7 +144,7 @@ void SmokeTestBasicOps() { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -157,7 +158,12 @@ void SmokeTestBasicOps() { ASSERT_NE(tree.find(p), tree.end()); ASSERT_EQ(tree.count(p), 1); ASSERT_EQ(i, tree.find(p)->_i); - ASSERT_EQ(1, tree.erase(p)); + if (i % 2 == 0) { + ASSERT_EQ(1, tree.erase(p)); + } else { + auto iter = tree.find(p); + ASSERT_EQ(1, tree.erase(iter)); + } ASSERT_EQ(tree.count(p), 0); ASSERT_EQ(tree.end(), tree.find(p)); @@ -178,10 +184,12 @@ void SmokeTestBasicOps() { } TEST(PhTreeDTest, SmokeTestBasicOps) { - SmokeTestBasicOps<3>(); - SmokeTestBasicOps<6>(); - SmokeTestBasicOps<10>(); - SmokeTestBasicOps<20>(); + SmokeTestBasicOps<1>(10000); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(10000); + SmokeTestBasicOps<20>(10000); + SmokeTestBasicOps<63>(100); } TEST(PhTreeDTest, TestDebug) { @@ -245,7 +253,7 @@ TEST(PhTreeDTest, TestInsert) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -292,7 +300,7 @@ TEST(PhTreeDTest, TestEmplace) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -336,7 +344,7 @@ TEST(PhTreeDTest, TestSquareBrackets) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -445,6 +453,72 @@ TEST(PhTreeDTest, TestUpdateWithEmplace) { tree.clear(); } +TEST(PhTreeDTest, TestUpdateWithEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + double delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + auto iter = tree.find(pOld); + int n = tree.erase(iter); + ASSERT_EQ(1, n); + tree.emplace_hint(iter, pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + if (delta != 0.0) { + ASSERT_EQ(0, tree.count(pOld)); + } + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeDTest, TestEraseByIterator) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + auto iter = tree.find(p); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + ASSERT_EQ(tree.end(), tree.find(p)); + i++; + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeDTest, TestEraseByIteratorQuery) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + for (size_t i = 0; i < N; ++i) { + auto iter = tree.begin(); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + TEST(PhTreeDTest, TestExtent) { const dimension_t dim = 3; TestTree tree; @@ -468,7 +542,7 @@ TEST(PhTreeDTest, TestExtent) { } template -struct PhFilterEvenId { +struct FilterEvenId { [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { return value._i % 2 == 0; } @@ -485,7 +559,7 @@ TEST(PhTreeDTest, TestExtentFilter) { populate(tree, points, N); int num_e = 0; - auto qE = tree.begin(PhFilterEvenId()); + auto qE = tree.begin(FilterEvenId()); while (qE != tree.end()) { ASSERT_TRUE(qE->_i > -1); ASSERT_TRUE(qE->_i % 2 == 0); @@ -535,8 +609,9 @@ void referenceQuery( } } +// We use 'int&' because gtest does not compile with assertions in non-void functions. template -int testQuery(TestPoint& min, TestPoint& max, size_t N) { +void testQuery(TestPoint& min, TestPoint& max, size_t N, int& result) { TestTree tree; std::vector> points; populate(tree, points, N); @@ -544,21 +619,22 @@ int testQuery(TestPoint& min, TestPoint& max, size_t N) { std::set referenceResult; referenceQuery(points, min, max, referenceResult); - size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto& x = *it; - assert(x._i >= 0); - assert(referenceResult.count(x._i) == 1); - n++; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; } - assert(referenceResult.size() == n); - return n; + ASSERT_EQ(referenceResult.size(), result); } TEST(PhTreeDTest, TestWindowQuery0) { const dimension_t dim = 3; TestPoint p{-10000, -10000, -10000}; - ASSERT_EQ(0, testQuery(p, p, 10000)); + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); } TEST(PhTreeDTest, TestWindowQuery1) { @@ -571,7 +647,7 @@ TEST(PhTreeDTest, TestWindowQuery1) { int n = 0; for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); // just read the entry auto& x = *q; @@ -587,7 +663,8 @@ TEST(PhTreeDTest, TestWindowQueryMany) { const dimension_t dim = 3; TestPoint min{-100, -100, -100}; TestPoint max{100, 100, 100}; - int n = testQuery(min, max, 10000); + int n = 0; + testQuery(min, max, 10000, n); ASSERT_LE(3, n); ASSERT_GE(100, n); } @@ -597,7 +674,9 @@ TEST(PhTreeDTest, TestWindowQueryAll) { const size_t N = 10000; TestPoint min{-10000, -10000, -10000}; TestPoint max{10000, 10000, 10000}; - ASSERT_EQ(N, testQuery(min, max, N)); + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); } TEST(PhTreeDTest, TestWindowQueryManyMoving) { @@ -616,7 +695,7 @@ TEST(PhTreeDTest, TestWindowQueryManyMoving) { referenceQuery(points, min, max, referenceResult); size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto& x = *it; ASSERT_EQ(referenceResult.count(x._i), 1); n++; @@ -630,7 +709,48 @@ TEST(PhTreeDTest, TestWindowQueryManyMoving) { } ASSERT_GE(100, n); } - ASSERT_LE(3, 500); + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeDTest, TestWindowForEachQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10., i * 9., i * 11.}; + TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + struct Counter { + void operator()(TestPoint key, Id& t) { + ++n_; + ASSERT_EQ(referenceResult.count(t._i), 1); + } + std::set& referenceResult; + size_t n_ = 0; + }; + + size_t n = 0; + Counter callback{referenceResult, 0}; + tree.for_each({min, max}, callback); + n += callback.n_; + nn += callback.n_; + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); ASSERT_GE(5000, nn); } @@ -644,8 +764,8 @@ TEST(PhTreeDTest, TestWindowQueryIterators) { int n = 0; for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q1 = tree.begin_query(p, p); - auto q2 = tree.begin_query(p, p); + auto q1 = tree.begin_query({p, p}); + auto q2 = tree.begin_query({p, p}); ASSERT_NE(q1, tree.end()); ASSERT_NE(q2, tree.end()); ASSERT_EQ(q1, q2); @@ -667,7 +787,7 @@ TEST(PhTreeDTest, TestWindowQueryFilter) { int num_e = 0; TestPoint min{-100, -100, -100}; TestPoint max{100, 100, 100}; - auto qE = tree.begin_query(min, max, PhFilterEvenId()); + auto qE = tree.begin_query({min, max}, FilterEvenId()); while (qE != tree.end()) { ASSERT_TRUE(qE->_i > -1); ASSERT_TRUE(qE->_i % 2 == 0); @@ -702,7 +822,7 @@ TEST(PhTreeDTest, TestKnnQuery) { size_t n = 0; double prevDist = -1; - auto q = tree.begin_knn_query(Nq, center); + auto q = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); while (q != tree.end()) { // just read the entry auto& e = *q; @@ -754,8 +874,7 @@ TEST(PhTreeDTest, TestKnnQueryFilterAndDistanceL1) { size_t n = 0; double prevDist = -1; - auto q = - tree.begin_knn_query(Nq, center, PhDistanceLongL1(), PhFilterEvenId()); + auto q = tree.begin_knn_query(Nq, center, PhDistanceLongL1(), FilterEvenId()); while (q != tree.end()) { // just read the entry auto& e = *q; @@ -786,8 +905,8 @@ TEST(PhTreeDTest, TestKnnQueryIterator) { TestPoint center{rng.next(), rng.next(), rng.next()}; size_t n = 0; - auto q1 = tree.begin_knn_query(Nq, center); - auto q2 = tree.begin_knn_query(Nq, center); + auto q1 = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); + auto q2 = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); while (q1 != tree.end()) { ASSERT_NE(q1, tree.end()); ASSERT_NE(q2, tree.end()); @@ -807,13 +926,13 @@ TEST(PhTreeDTest, SmokeTestPoint0) { ASSERT_EQ(tree.size(), 0); ASSERT_EQ(tree.find(p), tree.end()); - auto q_window = tree.begin_query(p, p); + auto q_window = tree.begin_query({p, p}); ASSERT_EQ(q_window, tree.end()); auto q_extent = tree.begin(); ASSERT_EQ(q_extent, tree.end()); - auto q_knn = tree.begin_knn_query(10, p); + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); ASSERT_EQ(q_knn, tree.end()); ASSERT_EQ(0, tree.erase(p)); @@ -843,7 +962,7 @@ TEST(PhTreeDTest, SmokeTestPointInfinity) { // Note that the tree returns result in z-order, however, since the z-order is based on // the (unsigned) bit representation, negative values come _after_ positive values. - auto q_window = tree.begin_query(p_neg, p_pos); + auto q_window = tree.begin_query({p_neg, p_pos}); ASSERT_EQ(1, q_window->_i); ++q_window; ASSERT_EQ(10, q_window->_i); @@ -861,7 +980,7 @@ TEST(PhTreeDTest, SmokeTestPointInfinity) { ++q_extent; ASSERT_EQ(q_extent, tree.end()); - auto q_knn = tree.begin_knn_query(10, p); + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); ASSERT_EQ(1, q_knn->_i); ++q_knn; ASSERT_NE(q_knn, tree.end()); diff --git a/phtree/phtree_d_test_custom_key.cc b/phtree/phtree_d_test_custom_key.cc new file mode 100644 index 00000000..cd6e254d --- /dev/null +++ b/phtree/phtree_d_test_custom_key.cc @@ -0,0 +1,216 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +static const double MY_MULTIPLIER = 1000000.; + +/* + * A custom key class. + * This allows using custom classes directly as keys without having to convert them to + * PhPoint or PhBox first. + * However, the general converter still needs to convert them. + * + * Allowing custom keys may not give a huge advantage in terms of performance or convenience, + * but we get it almost for free. This test also serves as an example how to implement it. + */ +struct MyPoint { + // Required for testing, not by the PH-Tree + bool operator==(const MyPoint& other) const { + return x_ == other.x_ && y_ == other.y_ && z_ == other.z_; + } + + // Required for testing, not by the PH-Tree + bool operator!=(const MyPoint& other) const { + return x_ != other.x_ || y_ != other.y_ || z_ == other.z_; + } + + // Required for testing, not by the PH-Tree + bool operator<(const MyPoint& other) const { + // A very simple ordering + return x_ < other.x_; + } + + double x_; + double y_; + double z_; +}; + +using MyBox = std::pair; + +template +using TestPoint = MyPoint; + +class MyConverterMultiply : public ConverterBase<3, 3, double, scalar_64_t, MyPoint, MyBox> { + using BASE = ConverterPointBase<3, double, scalar_64_t>; + using PointInternal = typename BASE::KeyInternal; + using QueryBoxInternal = typename BASE::QueryBoxInternal; + + public: + explicit MyConverterMultiply(double multiplier) + : multiplier_{multiplier}, divider_{1. / multiplier} {} + + [[nodiscard]] PointInternal pre(const MyPoint& point) const { + return {static_cast(point.x_ * multiplier_), + static_cast(point.y_ * multiplier_), + static_cast(point.z_ * multiplier_)}; + } + + [[nodiscard]] MyPoint post(const PointInternal& in) const { + return {in[0] * divider_, in[1] * divider_, in[2] * divider_}; + } + + [[nodiscard]] QueryBoxInternal pre_query(const MyBox& box) const { + return {pre(box.first), pre(box.second)}; + } + + private: + const double multiplier_; + const double divider_; +}; + +template +using TestTree = PhTreeD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + assert(refTree.size() == N); + assert(points.size() == N); +} + +template +void SmokeTestBasicOps() { + MyConverterMultiply tm{MY_MULTIPLIER}; + TestTree tree(tm); + size_t N = 10000; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeDTestCustomKey, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(); +} diff --git a/phtree/phtree_d_test_filter.cc b/phtree/phtree_d_test_filter.cc new file mode 100644 index 00000000..f5470190 --- /dev/null +++ b/phtree/phtree_d_test_filter.cc @@ -0,0 +1,70 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} diff --git a/phtree/phtree_d_test_preprocessor.cc b/phtree/phtree_d_test_preprocessor.cc index 11dc391c..7e2e9010 100644 --- a/phtree/phtree_d_test_preprocessor.cc +++ b/phtree/phtree_d_test_preprocessor.cc @@ -14,38 +14,17 @@ * limitations under the License. */ -#include "phtree/phtree_d.h" +#include "phtree/phtree.h" #include #include using namespace improbable::phtree; -static const double MY_MULTIPLIER = 1000000.; -static const double MY_DIVIDER = 1. / MY_MULTIPLIER; - -template -PhPoint PreprocessMultiply(const PhPointD& point) { - PhPoint out; - for (dimension_t i = 0; i < DIM; ++i) { - out[i] = point[i] * MY_MULTIPLIER; - } - return out; -} - -template -PhPointD PostprocessMultiply(const PhPoint& in) { - PhPointD out; - for (dimension_t i = 0; i < DIM; ++i) { - out[i] = ((double)in[i]) * MY_DIVIDER; - } - return out; -} - template using TestPoint = PhPointD; template -using TestTree = PhTreeD, PreprocessMultiply, PostprocessMultiply>; +using TestTree = PhTreeD>; class DoubleRng { public: @@ -134,7 +113,7 @@ void SmokeTestBasicOps() { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; diff --git a/phtree/phtree_f_test.cc b/phtree/phtree_f_test.cc new file mode 100644 index 00000000..fe45b4b1 --- /dev/null +++ b/phtree/phtree_f_test.cc @@ -0,0 +1,1005 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPointF; + +template +using TestTree = PhTreeF; + +class FloatRng { + public: + FloatRng(double minIncl, double maxExcl) + : eng(), rnd{static_cast(minIncl), static_cast(maxExcl)} {} + + float next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(Id& rhs) { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; +}; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + FloatRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + TestPoint point{}; + for (dimension_t d = 0; d < DIM; ++d) { + point[d] = rng.next(); + } + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + if (i % 2 == 0) { + ASSERT_EQ(1, tree.erase(p)); + } else { + auto iter = tree.find(p); + ASSERT_EQ(1, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeFTest, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(10000); + SmokeTestBasicOps<20>(1000); +} + +TEST(PhTreeFTest, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeFTest, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeFTest, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again, this should _not_ replace the existing value + Id id2(-i); + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // Check that the returned value is a reference + tree.emplace(p, id2).first._i++; + ASSERT_EQ(i + 1, tree.emplace(p, id).first._i); + tree.emplace(p, id2).first = id; + ASSERT_EQ(i, tree.emplace(p, id).first._i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeFTest, TestSquareBrackets) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(0, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + if (i % 2 == 0) { + tree[p]._i = i; + } else { + tree[p] = id; + } + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try `add` again + ASSERT_EQ(i, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i, tree[p]._i); + } +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeFTest, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeFTest, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(tree.find(p)->_i, i); + i++; + } + + TestPoint p{1, 1, 10000000}; + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); + + auto iter1 = tree.find(points[0]); + auto iter2 = tree.find(points[0]); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +TEST(PhTreeFTest, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + double delta = 20; + std::vector> points; + populate(tree, points, N); + + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{static_cast(pOld[0] + delta), + static_cast(pOld[1] + delta), + static_cast(pOld[2] + delta)}; + int n = tree.erase(pOld); + ASSERT_EQ(1, n); + tree.emplace(pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + ASSERT_EQ(0, tree.count(pOld)); + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeFTest, TestEraseByIterator) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + auto iter = tree.find(p); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + ASSERT_EQ(tree.end(), tree.find(p)); + i++; + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeFTest, TestEraseByIteratorQuery) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + for (size_t i = 0; i < N; ++i) { + auto iter = tree.begin(); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeFTest, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); + + auto iter1 = tree.begin(); + auto iter2 = tree.begin(); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +template +struct FilterEvenId { + [[nodiscard]] constexpr bool IsEntryValid( + const PhPoint& key, const T& value) const { + return value._i % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid( + const PhPoint& prefix, int bits_to_ignore) const { + return true; + } +}; + +TEST(PhTreeFTest, TestExtentFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(FilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e * 2); +} + +TEST(PhTreeFTest, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t num_e1 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e1++; + } + ASSERT_EQ(N, num_e1); + + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +template +void referenceQuery( + std::vector>& points, + TestPoint& min, + TestPoint& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= p[d] >= min[d] && p[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testQuery(TestPoint& min, TestPoint& max, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeFTest, TestWindowQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeFTest, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + // just read the entry + auto& x = *q; + ASSERT_EQ(i, x._i); + q++; + ASSERT_EQ(q, tree.end()); + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeFTest, TestWindowQueryMany) { + const dimension_t dim = 3; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + int n = 0; + testQuery(min, max, 10000, n); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeFTest, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + TestPoint min{-10000, -10000, -10000}; + TestPoint max{10000, 10000, 10000}; + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); +} + +TEST(PhTreeFTest, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{ + static_cast(i * 10.), static_cast(i * 9.), static_cast(i * 11.)}; + TestPoint max{static_cast(i * 10 + query_length), + static_cast(i * 9 + query_length), + static_cast(i * 11 + query_length)}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeFTest, TestWindowForEachQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{ + static_cast(i * 10.), static_cast(i * 9.), static_cast(i * 11.)}; + TestPoint max{static_cast(i * 10 + query_length), + static_cast(i * 9 + query_length), + static_cast(i * 11 + query_length)}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + struct Counter { + void operator()(TestPoint key, Id& t) { + ++n_; + ASSERT_EQ(referenceResult.count(t._i), 1); + } + std::set& referenceResult; + size_t n_ = 0; + }; + + size_t n = 0; + Counter callback{referenceResult, 0}; + tree.for_each({min, max}, callback); + n += callback.n_; + nn += callback.n_; + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeFTest, TestWindowQueryIterators) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q1 = tree.begin_query({p, p}); + auto q2 = tree.begin_query({p, p}); + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeFTest, TestWindowQueryFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + auto qE = tree.begin_query({min, max}, FilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_LE(2, num_e); + ASSERT_GE(50, num_e); +} + +template +struct DistanceEuclideanFloat { + double operator()(const TestPoint& v1, const TestPoint& v2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d2 = double(v1[i] - v2[i]); + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +TEST(PhTreeFTest, TestKnnQuery) { + // deliberately allowing outside of main points range + FloatRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i++) { + double dist = distance(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance); + + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center, DistanceEuclideanFloat<3>()); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + ASSERT_EQ(sorted_data[n]._id, e._i); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q.second()._i); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq, n); + } +} + +template +struct PhDistanceLongL1 { + double operator()(const TestPoint& v1, const TestPoint& v2) const { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(v1[i] - v2[i]); + } + return sum; + }; +}; + +TEST(PhTreeFTest, TestKnnQueryFilterAndDistanceL1) { + // deliberately allowing outside of main points range + FloatRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 100; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually by L1; skip every 2nd point + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i += 2) { + double dist = distanceL1(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance); + + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center, PhDistanceLongL1(), FilterEvenId()); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + // Note that this may fail for larger datasets if several points have the same distance. + ASSERT_EQ(sorted_data[n]._id, e._i); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q.second()._i); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq, n); + } +} + +TEST(PhTreeFTest, TestKnnQueryIterator) { + // deliberately allowing outside of main points range + FloatRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + TestPoint center{rng.next(), rng.next(), rng.next()}; + size_t n = 0; + auto q1 = tree.begin_knn_query(Nq, center, DistanceEuclideanFloat<3>()); + auto q2 = tree.begin_knn_query(Nq, center, DistanceEuclideanFloat<3>()); + while (q1 != tree.end()) { + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(Nq, n); +} + +TEST(PhTreeFTest, SmokeTestPoint0) { + // Test edge case: empty tree + TestPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree; + ASSERT_EQ(tree.size(), 0); + ASSERT_EQ(tree.find(p), tree.end()); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclideanFloat<3>()); + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeFTest, SmokeTestPointInfinity) { + // Test inifnity. + float positive_infinity = std::numeric_limits::infinity(); + float negative_infinity = -positive_infinity; + PhPointF<3> p_pos{positive_infinity, positive_infinity, positive_infinity}; + PhPointF<3> p_neg{negative_infinity, negative_infinity, negative_infinity}; + PhPointF<3> p{1, 2, 3}; + PhTreeF<3, Id> tree; + tree.emplace(p, Id{1}); + tree.emplace(p_pos, Id{10}); + tree.emplace(p_neg, Id{-10}); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree[p_neg]._i, -10); + ASSERT_EQ(tree[p]._i, 1); + ASSERT_EQ(tree[p_pos]._i, 10); + + ASSERT_EQ(positive_infinity, positive_infinity); + ASSERT_EQ(negative_infinity, negative_infinity); + ASSERT_GT(positive_infinity, negative_infinity); + + // Note that the tree returns result in z-order, however, since the z-order is based on + // the (unsigned) bit representation, negative values come _after_ positive values. + auto q_window = tree.begin_query({p_neg, p_pos}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(10, q_window->_i); + ++q_window; + ASSERT_EQ(-10, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(10, q_extent->_i); + ++q_extent; + ASSERT_EQ(-10, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclideanFloat<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_NE(q_knn, tree.end()); + ++q_knn; + ASSERT_NE(q_knn, tree.end()); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p_neg)); + ASSERT_EQ(1, tree.erase(p)); + ASSERT_EQ(1, tree.erase(p_pos)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p_neg)); + ASSERT_EQ(0, tree.erase(p_pos)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeFTest, SmokeTestTreeAPI) { + std::map mapPtr; + PhTreeF<3, Id*> treePtr; + Id* idPtr = new Id(1); + treePtr.emplace(PhPointF<3>{1, 2, 3}, idPtr); + treePtr.clear(); + delete idPtr; + + std::map mapConst; + PhTreeF<3, const Id> treeConst; + treeConst.emplace(PhPointF<3>{1, 2, 3}, Id(1)); +} diff --git a/phtree/phtree_multimap.h b/phtree/phtree_multimap.h new file mode 100644 index 00000000..3bb3c72e --- /dev/null +++ b/phtree/phtree_multimap.h @@ -0,0 +1,739 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PHTREE_MULTIMAP_H +#define PHTREE_PHTREE_MULTIMAP_H + +#include "common/common.h" +#include "v16/phtree_v16.h" +#include + +namespace improbable::phtree { + +/* + * PH-Tree multi-map main class. + * + * The PhTreeMultiMap is a wrapper around a normal PH-Tree (single value per key). The wrapper uses + * collections to store more than one value per key. + * By default, this multi-map is backed by std::unordered_set. + * + * The API follows mostly the std::unordered_multimap, exceptions are pointed out. + * + * Differences to PhTree + * - This is a multi-map and hence follows the std::unordered_multimap rather than std::map + * - erase() returns an iterator instead of a pairs {iterator, bool) + * - similar to the normal PH-Tree, emplace() returns a reference to the value instead of an + * iterator + * + * For more information please refer to the README of this project. + */ + +namespace { + +/* + * Base class for the internal PH-Tree multi-map iterators. + * + * This base class must be distinct from the other Iterator classes because it must be agnostic of + * the types of the fields that hold iterators. If it knew about these types then we would need + * to provide them for the ==/!= operators, which would then make it impossible to compare + * the generic end() iterator with any specialized iterator. + */ +template +class IteratorBase { + friend PHTREE; + using T = typename PHTREE::ValueType; + + public: + explicit IteratorBase() noexcept : current_value_ptr_{nullptr}, is_finished_{false} {} + + T& operator*() const noexcept { + assert(current_value_ptr_); + return const_cast(*current_value_ptr_); + } + + T* operator->() const noexcept { + assert(current_value_ptr_); + return const_cast(current_value_ptr_); + } + + friend bool operator==( + const IteratorBase& left, const IteratorBase& right) noexcept { + // Note: The following compares pointers to Entry objects (actually: their values T) + // so it should be _fast_ and return 'true' only for identical entries. + static_assert(std::is_pointer_v); + return (left.is_finished_ && right.Finished()) || + (!left.is_finished_ && !right.Finished() && + left.current_value_ptr_ == right.current_value_ptr_); + } + + friend bool operator!=( + const IteratorBase& left, const IteratorBase& right) noexcept { + return !(left == right); + } + + protected: + [[nodiscard]] bool Finished() const noexcept { + return is_finished_; + } + + void SetFinished() noexcept { + is_finished_ = true; + current_value_ptr_ = nullptr; + } + + void SetCurrentValue(const T* current_value_ptr) noexcept { + current_value_ptr_ = current_value_ptr; + } + + private: + const T* current_value_ptr_; + bool is_finished_; +}; + +template +class IteratorNormal : public IteratorBase { + friend PHTREE; + using BucketIterType = typename PHTREE::BucketIterType; + using PhTreeIterEndType = typename PHTREE::EndType; + + public: + explicit IteratorNormal(const PhTreeIterEndType& iter_ph_end) noexcept + : IteratorBase() + , iter_ph_end_{iter_ph_end} + , iter_ph_{iter_ph_end} + , iter_bucket_{} + , filter_{} { + this->SetFinished(); + } + + // Why are we passing two iterators by reference + std::move? + // See: https://abseil.io/tips/117 + IteratorNormal( + const PhTreeIterEndType& iter_ph_end, + ITERATOR_PH iter_ph, + BucketIterType iter_bucket, + const FILTER filter = FILTER()) noexcept + : IteratorBase() + , iter_ph_end_{iter_ph_end} + , iter_ph_{std::move(iter_ph)} + , iter_bucket_{std::move(iter_bucket)} + , filter_{filter} { + if (iter_ph == iter_ph_end) { + this->SetFinished(); + return; + } + FindNextElement(); + } + + IteratorNormal& operator++() noexcept { + ++iter_bucket_; + FindNextElement(); + return *this; + } + + IteratorNormal operator++(int) noexcept { + IteratorNormal iterator(*this); + ++(*this); + return iterator; + } + + /* + * Returns the external key (the 'first' part of the key/value pair). + */ + auto first() const { + return iter_ph_.first(); + } + + protected: + auto& GetIteratorOfBucket() const noexcept { + return iter_bucket_; + } + + auto& GetIteratorOfPhTree() const noexcept { + return iter_ph_; + } + + private: + void FindNextElement() { + while (iter_ph_ != iter_ph_end_) { + while (iter_bucket_ != iter_ph_->end()) { + // We filter only entries here, nodes are filtered elsewhere + if (filter_.IsEntryValid(iter_ph_.GetCurrentResult()->GetKey(), *iter_bucket_)) { + this->SetCurrentValue(&(*iter_bucket_)); + return; + } + ++iter_bucket_; + } + ++iter_ph_; + if (iter_ph_ != iter_ph_end_) { + iter_bucket_ = iter_ph_->begin(); + } + } + // finished + this->SetFinished(); + } + + PhTreeIterEndType& iter_ph_end_; + ITERATOR_PH iter_ph_; + BucketIterType iter_bucket_; + FILTER filter_; +}; + +template +class IteratorKnn : public IteratorNormal { + using BucketIterType = typename PHTREE::BucketIterType; + using PhTreeIterEndType = typename PHTREE::EndType; + + public: + IteratorKnn( + const PhTreeIterEndType& iter_ph_end, + const ITERATOR_PH iter_ph, + BucketIterType iter_bucket, + const FILTER filter) noexcept + : IteratorNormal(iter_ph_end, iter_ph, iter_bucket, filter) {} + + [[nodiscard]] double distance() const noexcept { + return this->GetIteratorOfPhTree().distance(); + } +}; + +} // namespace + +/* + * The PhTreeMultiMap class. + */ +template < + dimension_t DIM, + typename T, + typename CONVERTER = ConverterNoOp, + typename BUCKET = std::unordered_set, + bool POINT_KEYS = true, + typename DEFAULT_QUERY_TYPE = QueryPoint> +class PhTreeMultiMap { + friend PhTreeDebugHelper; + using KeyInternal = typename CONVERTER::KeyInternal; + using QueryBox = typename CONVERTER::QueryBoxExternal; + using Key = typename CONVERTER::KeyExternal; + static constexpr dimension_t DimInternal = CONVERTER::DimInternal; + using PHTREE = PhTreeMultiMap; + + public: + using ValueType = T; + using BucketIterType = decltype(std::declval().begin()); + using EndType = decltype(std::declval>().end()); + + explicit PhTreeMultiMap(CONVERTER converter = CONVERTER()) + : tree_{converter}, converter_{converter}, size_{0} {} + + /* + * Attempts to build and insert a key and a value into the tree. + * + * @param key The key for the new entry. + * + * @param __args Arguments used to generate a new value. + * + * @return A pair, whose first element points to the possibly inserted pair, + * and whose second element is a bool that is true if the pair was actually inserted. + * + * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is + * effectively a multi-set, so if an entry with the same key/value was already in the tree, it + * returns that entry instead of inserting a new one. + */ + template + std::pair emplace(const Key& key, _Args&&... __args) { + auto& outer_iter = tree_.emplace(converter_.pre(key)).first; + auto bucket_iter = outer_iter.emplace(std::forward<_Args>(__args)...); + size_ += bucket_iter.second ? 1 : 0; + return {const_cast(*bucket_iter.first), bucket_iter.second}; + } + + /* + * The emplace_hint() method uses an iterator as hint for insertion. + * The hint is ignored if it is not useful or is equal to end(). + * + * Iterators should normally not be used after the tree has been modified. As an exception to + * this rule, an iterator can be used as hint if it was previously used with at most one call + * to erase() and if no other modifications occurred. + * The following is valid: + * + * // Move value from key1 to key2 + * auto iter = tree.find(key1); + * auto value = iter.second(); // The value may become invalid in erase() + * erase(iter); + * emplace_hint(iter, key2, value); // the iterator can still be used as hint here + */ + template + std::pair emplace_hint(const ITERATOR& iterator, const Key& key, _Args&&... __args) { + auto result_ph = tree_.emplace_hint(iterator.GetIteratorOfPhTree(), converter_.pre(key)); + auto& bucket = result_ph.first; + if (result_ph.second) { + // new bucket + auto result = bucket.emplace(std::forward<_Args>(__args)...); + size_ += result.second; + return {const_cast(*result.first), result.second}; + } else { + // existing bucket -> we can use emplace_hint with iterator + size_t old_size = bucket.size(); + auto result = + bucket.emplace_hint(iterator.GetIteratorOfBucket(), std::forward<_Args>(__args)...); + bool success = old_size < bucket.size(); + size_ += success; + return {const_cast(*result), success}; + } + } + + /* + * See std::unordered_multimap::insert(). + * + * @return a pair consisting of the inserted value (or to the value that prevented the + * insertion if the key/value already existed) and a bool denoting whether the insertion + * took place. + */ + std::pair insert(const Key& key, const T& value) { + return emplace(key, value); + } + + /* + * @return '1', if a value is associated with the provided key, otherwise '0'. + */ + size_t count(const Key& key) const { + auto iter = tree_.find(converter_.pre(key)); + if (iter != tree_.end()) { + return iter->size(); + } + return 0; + } + + /* + * Estimates the result count of a rectangular window query by counting the sizes of all buckets + * that overlap with the query box. This estimate function should be much faster than a normal + * query, especially in trees with many entries per bucket. + * + * @param query_box The query window. + * @param query_type The type of query, such as QueryIntersect or QueryInclude + */ + template + size_t estimate_count(QueryBox query_box, QUERY_TYPE query_type = QUERY_TYPE()) const { + size_t n = 0; + auto counter_lambda = [&](const Key& key, const BUCKET& bucket) { n += bucket.size(); }; + tree_.for_each(query_type(converter_.pre_query(query_box)), counter_lambda); + return n; + } + + /* + * See std::unordered_multimap::find(). + * + * @param key the key to look up + * @return an iterator that points either to the the first value associated with the key or + * to {@code end()} if no value was found + */ + auto find(const Key& key) const { + auto outer_iter = tree_.find(converter_.pre(key)); + if (outer_iter == tree_.end()) { + return CreateIterator(tree_.end(), bucket_dummy_end_); + } + auto bucket_iter = outer_iter.second().begin(); + return CreateIterator(outer_iter, bucket_iter); + } + + /* + * See std::unordered_multimap::find(). + * + * @param key the key to look up + * @param value the value to look up + * @return an iterator that points either to the associated value of the key/value pair + * or to {@code end()} if the key/value pair was found + */ + auto find(const Key& key, const T& value) const { + auto outer_iter = tree_.find(converter_.pre(key)); + if (outer_iter == tree_.end()) { + return CreateIterator(tree_.end(), bucket_dummy_end_); + } + auto bucket_iter = outer_iter.second().find(value); + return CreateIterator(outer_iter, bucket_iter); + } + + /* + * See std::unordered_multimap::erase(). Removes the provided key/value pair if it exists. + * + * @return '1' if the key/value pair was found, otherwise '0'. + */ + size_t erase(const Key& key, const T& value) { + auto iter_outer = tree_.find(converter_.pre(key)); + if (iter_outer != tree_.end()) { + auto& bucket = *iter_outer; + auto result = bucket.erase(value); + if (bucket.empty()) { + tree_.erase(iter_outer); + } + size_ -= result; + return result; + } + return 0; + } + + /* + * See std::map::erase(). Removes any entry located at the provided iterator. + * + * This function uses the iterator to directly erase the entry so it is usually faster than + * erase(key, value). + * + * @return '1' if a value was found, otherwise '0'. + */ + template + size_t erase(const ITERATOR& iterator) { + static_assert( + std::is_convertible_v*>, + "erase(iterator) requires an iterator argument. For erasing by key please use " + "erase(key, value)."); + if (iterator != end()) { + auto& bucket = const_cast(*iterator.GetIteratorOfPhTree()); + size_t old_size = bucket.size(); + bucket.erase(iterator.GetIteratorOfBucket()); + bool success = bucket.size() < old_size; + if (bucket.empty()) { + success &= tree_.erase(iterator.GetIteratorOfPhTree()) > 0; + } + size_ -= success; + return success; + } + return 0; + } + + /* + * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The relocate will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the position and new position refer to the same bucket. + * + * The relocate will report_failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * This method will _always_ attempt to insert the value at the new position even if the value + * was not found at the old position. + * This method will _not_ remove the value from the old position if it is already present at the + * new position. + * + * @param old_key The old position + * @param new_key The new position + * @param always_erase Setting this flag to 'true' ensures that the value is removed from + * the old position even if it is already present at the new position. This may double the + * execution cost of this method. The default is 'false'. + * @return '1' if a value was found and reinserted, otherwise '0'. + */ + size_t relocate( + const Key& old_key, const Key& new_key, const T& value, bool always_erase = false) { + // Be smart: insert first, if the target-map already contains the entry we can avoid erase() + auto new_key_pre = converter_.pre(new_key); + auto& new_bucket = tree_.emplace(new_key_pre).first; + auto new_result = new_bucket.emplace(value); + if (!new_result.second) { + // Entry is already in correct place -> abort + // Return '1' if old/new refer to the same bucket, otherwise '0' + if (converter_.pre(old_key) == new_key_pre) { + return 1; + } + if (!always_erase) { + // Abort, unless we insist on erase() + return 0; + } + } + + auto old_outer_iter = tree_.find(converter_.pre(old_key)); + if (old_outer_iter == tree_.end()) { + // No entry for old_key -> fail + return 0; + } + + auto old_bucket_iter = old_outer_iter->find(value); + if (old_bucket_iter == old_outer_iter->end()) { + return 0; + } + old_outer_iter->erase(old_bucket_iter); + + // clean up + if (old_outer_iter->empty()) { + tree_.erase(old_outer_iter); + } + return 1; + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @param callback The callback function to be called for every entry that matches the filter. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are passed to the callback or traversed. Any filter function must + * follow the signature of the default 'FilterNoOp`. + * The default 'FilterNoOp` filter matches all entries. + */ + template + void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { + CallbackWrapper inner_callback{callback, filter, converter_}; + tree_.for_each(inner_callback, WrapFilter(filter)); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param callback The callback function to be called for every entry that matches the query + * and filter. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param query_type The type of query, such as QueryIntersect or QueryInclude + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + * The default 'FilterNoOp` filter matches all entries. + */ + template < + typename CALLBACK_FN, + typename FILTER = FilterNoOp, + typename QUERY_TYPE = DEFAULT_QUERY_TYPE> + void for_each( + QueryBox query_box, + CALLBACK_FN& callback, + const FILTER& filter = FILTER(), + QUERY_TYPE query_type = QUERY_TYPE()) const { + CallbackWrapper inner_callback{callback, filter, converter_}; + tree_.for_each( + query_type(converter_.pre_query(query_box)), inner_callback, WrapFilter(filter)); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @return an iterator over all (filtered) entries in the tree, + */ + template + auto begin(FILTER filter = FILTER()) const { + auto outer_iter = tree_.begin(WrapFilter(filter)); + if (outer_iter == tree_.end()) { + return CreateIterator(outer_iter, bucket_dummy_end_, filter); + } + auto bucket_iter = outer_iter.second().begin(); + assert(bucket_iter != outer_iter.second().end()); + return CreateIterator(outer_iter, bucket_iter, filter); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param query_type The type of query, such as QueryIntersect or QueryInclude + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + * @return Result iterator. + */ + template + auto begin_query( + const QueryBox& query_box, + FILTER filter = FILTER(), + QUERY_TYPE query_type = QUERY_TYPE()) const { + auto outer_iter = + tree_.begin_query(query_type(converter_.pre_query(query_box)), WrapFilter(filter)); + if (outer_iter == tree_.end()) { + return CreateIterator(outer_iter, bucket_dummy_end_, filter); + } + auto bucket_iter = outer_iter.second().begin(); + assert(bucket_iter != outer_iter.second().end()); + return CreateIterator(outer_iter, bucket_iter, filter); + } + + /* + * Locate nearest neighbors for a given point in space. + * + * NOTE: This method is not (currently) available for box keys. + * + * @param min_results number of entries to be returned. More entries may or may not be returned + * when several entries have the same distance. + * @param center center point + * @param distance_function optional distance function, defaults to euclidean distance + * @param filter optional filter predicate that excludes nodes/entries before their distance is + * calculated. + * @return Result iterator. + */ + template < + typename DISTANCE, + typename FILTER = FilterNoOp, + // Some magic to disable this in case of box keys + bool DUMMY = POINT_KEYS, + typename std::enable_if::type = 0> + auto begin_knn_query( + size_t min_results, + const Key& center, + DISTANCE distance_function = DISTANCE(), + FILTER filter = FILTER()) const { + // We use pre() instead of pre_query() here because, strictly speaking, we want to + // find the nearest neighbors of a (fictional) key, which may as well be a box. + auto outer_iter = tree_.begin_knn_query( + min_results, converter_.pre(center), distance_function, WrapFilter(filter)); + if (outer_iter == tree_.end()) { + return CreateIteratorKnn(outer_iter, bucket_dummy_end_, filter); + } + auto bucket_iter = outer_iter.second().begin(); + assert(bucket_iter != outer_iter.second().end()); + return CreateIteratorKnn(outer_iter, bucket_iter, filter); + } + + /* + * @return An iterator representing the tree's 'end'. + */ + const auto& end() const { + return the_end_; + } + + /* + * Remove all entries from the tree. + */ + void clear() { + tree_.clear(); + size_ = 0; + } + + /* + * @return the number of entries (key/value pairs) in the tree. + */ + [[nodiscard]] size_t size() const { + return size_; + } + + /* + * @return 'true' if the tree is empty, otherwise 'false'. + */ + [[nodiscard]] bool empty() const { + return tree_.empty(); + } + + /* + * @return the converter associated with this tree. + */ + [[nodiscard]] const CONVERTER& converter() const { + return converter_; + } + + private: + // This is used by PhTreeDebugHelper + const auto& GetInternalTree() const { + return tree_; + } + + template + auto CreateIterator( + OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { + return IteratorNormal( + tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + } + + template + auto CreateIteratorKnn( + OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { + return IteratorKnn( + tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + } + + template + static auto WrapFilter(FILTER filter) { + // We always have two iterators, one that traverses the PH-Tree and one that traverses the + // bucket. Using the FilterWrapper we create a new Filter for the PH-Tree iterator. This new + // filter checks only if nodes are valid. It cannot check whether buckets are valid. + // The original filter is then used when we iterate over the entries of a bucket. At this + // point, we do not need to check IsNodeValid anymore for each entry (see `IteratorNormal`). + struct FilterWrapper { + [[nodiscard]] constexpr bool IsEntryValid( + const KeyInternal& key, const BUCKET& value) const { + // This filter is checked in the Iterator. + return true; + } + [[nodiscard]] constexpr bool IsNodeValid( + const KeyInternal& prefix, int bits_to_ignore) const { + return filter_.IsNodeValid(prefix, bits_to_ignore); + } + FILTER filter_; + }; + return FilterWrapper{filter}; + } + + template + struct CallbackWrapper { + /* + * The CallbackWrapper ensures that we call the callback on each entry of the bucket. + * The vanilla PH-Tree call it only on the bucket itself. + */ + void operator()(const Key& key, const BUCKET& bucket) const { + auto internal_key = converter_.pre(key); + for (auto& entry : bucket) { + if (filter_.IsEntryValid(internal_key, entry)) { + callback_(key, entry); + } + } + } + CALLBACK_FN& callback_; + const FILTER filter_; + const CONVERTER& converter_; + }; + + v16::PhTreeV16 tree_; + CONVERTER converter_; + IteratorNormal the_end_{tree_.end()}; + BucketIterType bucket_dummy_end_; + size_t size_; +}; + +/** + * A PH-Tree multi-map that uses (axis aligned) points as keys. + * The points are defined with 64bit 'double' floating point coordinates. + * + * See 'PhTreeD' for details. + */ +template < + dimension_t DIM, + typename T, + typename CONVERTER = ConverterIEEE, + typename BUCKET = std::unordered_set> +using PhTreeMultiMapD = PhTreeMultiMap; + +template < + dimension_t DIM, + typename T, + typename CONVERTER_BOX, + typename BUCKET = std::unordered_set> +using PhTreeMultiMapBox = PhTreeMultiMap; + +/** + * A PH-Tree multi-map that uses (axis aligned) boxes as keys. + * The boxes are defined with 64bit 'double' floating point coordinates. + * + * See 'PhTreeD' for details. + */ +template < + dimension_t DIM, + typename T, + typename CONVERTER_BOX = ConverterBoxIEEE, + typename BUCKET = std::unordered_set> +using PhTreeMultiMapBoxD = PhTreeMultiMapBox; + +} // namespace improbable::phtree + +#endif // PHTREE_PHTREE_MULTIMAP_H diff --git a/phtree/phtree_multimap_box_d_test.cc b/phtree/phtree_multimap_box_d_test.cc new file mode 100644 index 00000000..09e7b9b2 --- /dev/null +++ b/phtree/phtree_multimap_box_d_test.cc @@ -0,0 +1,908 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include +#include + +using namespace improbable::phtree; + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; +static const double BOX_LEN = 10; + +template +using TestPoint = PhBoxD; + +template +using TestTree = PhTreeMultiMapBoxD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i), data_{0} {}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; + int data_; +}; + +namespace std { +template <> +struct hash { + size_t operator()(const Id& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + int _id; +}; + +bool comparePointDistanceAndId(PointDistance& i1, PointDistance& i2) { + return (i1._distance != i2._distance) ? (i1._distance < i2._distance) : (i1._id < i2._id); +} + +template +double distance(const PhPointD& p1, const PhPointD& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +void generateCube(std::vector>& points, size_t N, double box_Len = BOX_LEN) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key.min()[d] = rng.next(); + key.max()[d] = key.min()[d] + box_Len; + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + // With intersection queries we may get multiple results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i / NUM_DUPL == (*q)._i / NUM_DUPL); + } + ASSERT_EQ(NUM_DUPL, found); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, tree.find(p, id)->_i); + if (i % 3 == 0) { + ASSERT_EQ(1, tree.erase(p, id)); + } else { + auto iter = tree.find(p, id); + ASSERT_EQ(1, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p, id)); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMBoxDTest, SmokeTestBasicOps) { + SmokeTestBasicOps<1>(100); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(1000); + SmokeTestBasicOps<31>(100); +} + +TEST(PhTreeMMBoxDTest, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N / NUM_DUPL, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeMMBoxDTest, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + // With intersection queries we may get more than NUM_DUPL results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i / NUM_DUPL == (*q)._i / NUM_DUPL); + } + ASSERT_EQ(NUM_DUPL, found); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), NUM_DUPL); + Id id(i); + ASSERT_EQ(i, tree.find(p, id)->_i); + ASSERT_EQ(i / NUM_DUPL, tree.find(p)->_i / NUM_DUPL); + } +} + +TEST(PhTreeMMBoxDTest, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (int i = 0; i < (int)N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again (same `identity`), this should NOT replace the existing value + Id id2(i); + id2.data_ = 42; + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.find(p, id2)->_i); + ASSERT_EQ(0, tree.find(p, id2)->data_); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + + // Check that the returned value is a reference + tree.emplace(p, id2).first.data_++; + ASSERT_EQ(1, tree.find(p, id)->data_); + tree.emplace(p, id2).first.data_ = 0; + ASSERT_EQ(0, tree.emplace(p, id).first.data_); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + // With intersection queries we may get more than NUM_DUPL results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i / NUM_DUPL == (*q)._i / NUM_DUPL); + } + ASSERT_EQ(NUM_DUPL, found); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(tree.count(p), NUM_DUPL); + ASSERT_EQ(i, tree.find(p, id)->_i); + } +} + +template +void populate( + TestTree& tree, + std::vector>& points, + size_t N, + double box_len = BOX_LEN) { + generateCube(points, N, box_len); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate( + TestTree& tree, + std::vector>& points, + size_t N, + double box_len = BOX_LEN) { + generateCube(points, N, box_len); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeMMBoxDTest, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeMMBoxDTest, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_NE(tree.find(p, id), tree.end()); + ASSERT_NE(tree.end(), tree.find(p, id)); + ASSERT_EQ(tree.find(p, id)->_i, i); + i++; + } + + TestPoint p({1, 1, 1}, {2, 2, 10000000}); + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); + + auto iter1 = tree.find(points[0]); + auto iter2 = tree.find(points[0]); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +TEST(PhTreeMMBoxDTest, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + double delta = 20; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew( + {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, + {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); + size_t count_new = tree.count(pNew); + size_t count_old = tree.count(pOld); + size_t n = tree.erase(pOld, Id(i)); + ASSERT_EQ(1U, n); + tree.emplace(pNew, Id(i)); + ASSERT_EQ(count_new + 1, tree.count(pNew)); + ASSERT_EQ(count_old - 1, tree.count(pOld)); + p = pNew; + ++i; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMBoxDTest, TestUpdateWithEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + int i = 0; + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + double delta = deltas[d_n]; + PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + TestPoint pNew{min, max}; + auto iter = tree.find(pOld, Id(i)); + size_t n = tree.erase(iter); + ASSERT_EQ(1U, n); + ASSERT_TRUE(tree.emplace_hint(iter, pNew, Id(i)).second); + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + auto iterNew = tree.find(pNew, Id(i)); + ASSERT_FALSE(tree.emplace_hint(iterNew, pNew, Id(i)).second); + p = pNew; + ++i; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + double delta = deltas[d_n]; + PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + TestPoint pNew{min, max}; + ASSERT_EQ(1, tree.relocate(pOld, pNew, Id(i))); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMBoxDTest, TestEraseByIterator) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + ASSERT_NE(tree.end(), tree.find(p)); + auto iter = tree.find(p, Id(i)); + ASSERT_NE(tree.end(), iter); + size_t count = tree.erase(iter); + ASSERT_EQ(1U, count); + ASSERT_EQ(tree.end(), tree.find(p, Id(i))); + if (tree.size() % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + i++; + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeMMBoxDTest, TestEraseByIteratorQuery) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + for (size_t i = 0; i < N; ++i) { + auto iter = tree.begin(); + ASSERT_NE(tree.end(), iter); + size_t count = tree.erase(iter); + ASSERT_EQ(1U, count); + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeMMBoxDTest, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); + + auto iter1 = tree.begin(); + auto iter2 = tree.begin(); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +template +struct FilterEvenId { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>& key, const T& value) const { + return value._i % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid( + const PhPoint<2 * DIM>& prefix, int bits_to_ignore) const { + return true; + } +}; + +TEST(PhTreeMMDTest, TestExtentFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(FilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e * 2); +} + +TEST(PhTreeMMDTest, TestExtentForEachFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + struct Counter { + void operator()(const TestPoint key, const Id& t) { + ++n_; + ASSERT_EQ(points_[t._i], key); + ASSERT_TRUE(t._i % 2 == 0); + } + std::vector>& points_; + size_t n_ = 0; + }; + Counter callback{points, 0}; + tree.for_each(callback, FilterEvenId()); + ASSERT_EQ(N, callback.n_ * 2); +} + +TEST(PhTreeMMBoxDTest, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t num_e1 = 0; + for (auto& x : tree) { + x.data_ = 42; + num_e1++; + } + ASSERT_EQ(N, num_e1); + + // Check that we really had references and that data_ was changed + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_EQ(42, x.data_); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +TEST(PhTreeMMDTest, TestEstimateCountIntersect) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + std::vector> points; + populate(tree, points, N); + + // Test small + for (auto& p : points) { + size_t n = tree.estimate_count(p); + ASSERT_LE(NUM_DUPL, n); + // arbitrary upper limit: 10*NUM_DUPL + ASSERT_GE(10, NUM_DUPL); + } + + // Test medium (1/8 of volume), allow variation of 20% 0.8 / 1.2 + double min_2 = WORLD_MIN / 2; + double max_2 = WORLD_MAX / 2; + size_t n_medium = tree.estimate_count({{min_2, min_2, min_2}, {max_2, max_2, max_2}}); + ASSERT_LE(N / 8. * 0.8, n_medium); + ASSERT_GE(N / 8. * 1.2, n_medium); + + // Test all + size_t n_all = + tree.estimate_count({{WORLD_MIN, WORLD_MIN, WORLD_MIN}, {WORLD_MAX, WORLD_MAX, WORLD_MAX}}); + ASSERT_EQ(N, n_all); +} + +TEST(PhTreeMMDTest, TestEstimateCountInclude) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + std::vector> points; + populate(tree, points, N); + + // Test small + for (auto& p : points) { + size_t n = tree.estimate_count(p, QueryInclude()); + ASSERT_LE(NUM_DUPL, n); + // arbitrary upper limit: 10*NUM_DUPL + ASSERT_GE(10, NUM_DUPL); + } + + // Test medium (1/8 of volume), allow variation of 20% 0.8 / 1.2 + double min_2 = WORLD_MIN / 2; + double max_2 = WORLD_MAX / 2; + size_t n_medium = + tree.estimate_count({{min_2, min_2, min_2}, {max_2, max_2, max_2}}, QueryInclude()); + ASSERT_LE(N / 8. * 0.8, n_medium); + ASSERT_GE(N / 8. * 1.2, n_medium); + + // Test all + double min_all = WORLD_MIN - BOX_LEN; + double max_all = WORLD_MAX + BOX_LEN; + size_t n_all = tree.estimate_count( + {{min_all, min_all, min_all}, {max_all, max_all, max_all}}, QueryInclude()); + ASSERT_EQ(N, n_all); +} + +template +void referenceQuery( + std::vector>& points, + PhPointD& min, + PhPointD& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= p.max()[d] >= min[d] && p.min()[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testQuery(PhPointD& min, PhPointD& max, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeMMBoxDTest, TestWindowQuery0) { + const dimension_t dim = 3; + PhPointD p{-10000, -10000, -10000}; + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeMMBoxDTest, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + size_t n = 0; + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + // With intersection queries we may get more than NUM_DUPL results. + size_t found = 0; + for (auto q = tree.begin_query(p); q != tree.end(); ++q) { + found += (i / NUM_DUPL == (*q)._i / NUM_DUPL); + } + ASSERT_EQ(NUM_DUPL, found); + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeMMBoxDTest, TestWindowQueryMany) { + const dimension_t dim = 3; + PhPointD min{-100, -100, -100}; + PhPointD max{100, 100, 100}; + int n = 0; + testQuery(min, max, 10000, n); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeMMBoxDTest, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + PhPointD min{-10000, -10000, -10000}; + PhPointD max{10000, 10000, 10000}; + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); +} + +TEST(PhTreeMMBoxDTest, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + PhPointD min{i * 10., i * 9., i * 11.}; + PhPointD max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeMMBoxDTest, TestWindowQueryManyMovingPoint) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N, 100); + + size_t nTotal = 0; + for (int i = -120; i < 120; i++) { + PhPointD min_max{i * 10., i * 9., i * 11.}; + std::set referenceResult; + referenceQuery(points, min_max, min_max, referenceResult); + + int n = 0; + for (auto it = tree.begin_query({min_max, min_max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + } + ASSERT_EQ(referenceResult.size(), n); + nTotal += n; + + // basic check to ensure healthy queries + ASSERT_GE(N / 10, n); + } + ASSERT_LE(10, nTotal); +} + +TEST(PhTreeMMBoxDTest, TestWindowForEachManyMovingPoint) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N, 100); + + size_t nTotal = 0; + for (int i = -120; i < 120; i++) { + PhPointD min_max{i * 10., i * 9., i * 11.}; + std::set referenceResult; + referenceQuery(points, min_max, min_max, referenceResult); + + struct Counter { + void operator()(const TestPoint& key, const Id& t) { + ++n_; + ASSERT_EQ(referenceResult.count(t._i), 1); + } + std::set& referenceResult; + size_t n_ = 0; + }; + + size_t n = 0; + Counter callback{referenceResult, 0}; + tree.for_each({min_max, min_max}, callback); + n += callback.n_; + ASSERT_EQ(referenceResult.size(), n); + nTotal += n; + + // basic check to ensure healthy queries + ASSERT_GE(N / 10, n); + } + ASSERT_LE(10, nTotal); +} + +TEST(PhTreeMMBoxDTest, SmokeTestPointAPI) { + PhBoxD<3> p({1, 2, 3}, {4, 5, 6}); + (void)p; +} + +TEST(PhTreeMMBoxDTest, SmokeTestTreeAPI) { + std::map mapPtr; + PhTreeMultiMapBoxD<3, Id*> treePtr; + Id* idPtr = new Id(1); + treePtr.emplace(PhBoxD<3>({1, 2, 3}, {4, 5, 6}), idPtr); + treePtr.clear(); + delete idPtr; +} diff --git a/phtree/phtree_multimap_d_test.cc b/phtree/phtree_multimap_d_test.cc new file mode 100644 index 00000000..a65f5a50 --- /dev/null +++ b/phtree/phtree_multimap_d_test.cc @@ -0,0 +1,1104 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable::phtree; + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeMultiMap>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i), data_{0} {}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; + int data_; +}; + +namespace std { +template <> +struct hash { + size_t operator()(const Id& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +struct IdHash { + template + std::size_t operator()(std::pair const& v) const { + return std::hash()(v.size()); + } +}; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + int _id; +}; + +bool comparePointDistanceAndId(PointDistance& i1, PointDistance& i2) { + return (i1._distance != i2._distance) ? (i1._distance < i2._distance) : (i1._id < i2._id); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, tree.find(p, id)->_i); + if (i % 3 == 0) { + ASSERT_EQ(1, tree.erase(p, id)); + } else { + auto iter = tree.find(p, id); + ASSERT_EQ(1, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p, id)); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTest, SmokeTestBasicOps) { + SmokeTestBasicOps<1>(10000); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(10000); + SmokeTestBasicOps<20>(1000); + SmokeTestBasicOps<63>(100); +} + +TEST(PhTreeMMDTest, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N / NUM_DUPL, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeMMDTest, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), NUM_DUPL); + Id id(i); + ASSERT_EQ(i, tree.find(p, id)->_i); + ASSERT_EQ(i / NUM_DUPL, tree.find(p)->_i / NUM_DUPL); + } +} + +TEST(PhTreeMMDTest, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (int i = 0; i < (int)N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again (same `identity`), this should NOT replace the existing value + Id id2(i); + id2.data_ = 42; + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.find(p, id2)->_i); + ASSERT_EQ(0, tree.find(p, id2)->data_); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + + // Check that the returned value is a reference + tree.emplace(p, id2).first.data_++; + ASSERT_EQ(1, tree.find(p, id)->data_); + tree.emplace(p, id2).first.data_ = 0; + ASSERT_EQ(0, tree.emplace(p, id).first.data_); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(tree.count(p), NUM_DUPL); + ASSERT_EQ(i, tree.find(p, id)->_i); + } +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeMMDTest, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeMMDTest, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_NE(tree.find(p, id), tree.end()); + ASSERT_NE(tree.end(), tree.find(p, id)); + ASSERT_EQ(tree.find(p, id)->_i, i); + i++; + } + + TestPoint p{1, 1, 10000000}; + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); + + auto iter1 = tree.find(points[0]); + auto iter2 = tree.find(points[0]); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +TEST(PhTreeMMDTest, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + double delta = 20; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + size_t count_new = tree.count(pNew); + size_t count_old = tree.count(pOld); + size_t n = tree.erase(pOld, Id(i)); + ASSERT_EQ(1U, n); + tree.emplace(pNew, Id(i)); + ASSERT_EQ(count_new + 1, tree.count(pNew)); + ASSERT_EQ(count_old - 1, tree.count(pOld)); + p = pNew; + ++i; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTest, TestUpdateWithEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + int i = 0; + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + double delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + auto iter = tree.find(pOld, Id(i)); + size_t n = tree.erase(iter); + ASSERT_EQ(1U, n); + ASSERT_TRUE(tree.emplace_hint(iter, pNew, Id(i)).second); + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + auto iterNew = tree.find(pNew, Id(i)); + ASSERT_FALSE(tree.emplace_hint(iterNew, pNew, Id(i)).second); + p = pNew; + ++i; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 0.1, 1, 10}; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + double delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + ASSERT_EQ(1, tree.relocate(pOld, pNew, Id(i))); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTest, TestEraseByIterator) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + ASSERT_NE(tree.end(), tree.find(p)); + auto iter = tree.find(p, Id(i)); + ASSERT_NE(tree.end(), iter); + size_t count = tree.erase(iter); + ASSERT_EQ(1U, count); + ASSERT_EQ(tree.end(), tree.find(p, Id(i))); + if (tree.size() % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + i++; + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeMMDTest, TestEraseByIteratorQuery) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + for (size_t i = 0; i < N; ++i) { + auto iter = tree.begin(); + ASSERT_NE(tree.end(), iter); + size_t count = tree.erase(iter); + ASSERT_EQ(1U, count); + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeMMDTest, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); + + auto iter1 = tree.begin(); + auto iter2 = tree.begin(); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +template +struct FilterEvenId { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + return value._i % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } +}; + +TEST(PhTreeMMDTest, TestExtentFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(FilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e * 2); +} + +TEST(PhTreeMMDTest, TestExtentForEachFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + struct Counter { + void operator()(const TestPoint key, const Id& t) { + ++n_; + ASSERT_EQ(points_[t._i], key); + ASSERT_TRUE(t._i % 2 == 0); + } + std::vector>& points_; + size_t n_ = 0; + }; + Counter callback{points, 0}; + tree.for_each(callback, FilterEvenId()); + ASSERT_EQ(N, callback.n_ * 2); +} + +TEST(PhTreeMMDTest, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t num_e1 = 0; + for (auto& x : tree) { + x.data_ = 42; + num_e1++; + } + ASSERT_EQ(N, num_e1); + + // Check that we really had references and that data_ was changed + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_EQ(42, x.data_); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +TEST(PhTreeMMDTest, TestEstimateCountIntersect) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + std::vector> points; + populate(tree, points, N); + + // Test small + for (auto& p : points) { + size_t n = tree.estimate_count({p, p}); + ASSERT_LE(NUM_DUPL, n); + // arbitrary upper limit: 10*NUM_DUPL + ASSERT_GE(10, NUM_DUPL); + } + + // Test medium (1/8 of volume), allow variation of 20% 0.8 / 1.2 + double min_2 = WORLD_MIN / 2; + double max_2 = WORLD_MAX / 2; + size_t n_medium = tree.estimate_count({{min_2, min_2, min_2}, {max_2, max_2, max_2}}); + ASSERT_LE(N / 8. * 0.8, n_medium); + ASSERT_GE(N / 8. * 1.2, n_medium); + + // Test all + size_t n_all = + tree.estimate_count({{WORLD_MIN, WORLD_MIN, WORLD_MIN}, {WORLD_MAX, WORLD_MAX, WORLD_MAX}}); + ASSERT_EQ(N, n_all); +} + +template +void referenceQuery( + std::vector>& points, + TestPoint& min, + TestPoint& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= p[d] >= min[d] && p[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testQuery(TestPoint& min, TestPoint& max, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeMMDTest, TestWindowQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeMMDTest, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + auto& x = *q; + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, x._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeMMDTest, TestWindowQueryMany) { + const dimension_t dim = 3; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + int n = 0; + testQuery(min, max, 10000, n); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeMMDTest, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + TestPoint min{-10000, -10000, -10000}; + TestPoint max{10000, 10000, 10000}; + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); +} + +TEST(PhTreeMMDTest, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10., i * 9., i * 11.}; + TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeMMDTest, TestWindowForEachQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10., i * 9., i * 11.}; + TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + struct Counter { + void operator()(const TestPoint& key, const Id& t) { + ++n_; + ASSERT_EQ(referenceResult.count(t._i), 1); + } + std::set& referenceResult; + size_t n_ = 0; + }; + + size_t n = 0; + Counter callback{referenceResult, 0}; + tree.for_each({min, max}, callback); + n += callback.n_; + nn += callback.n_; + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeMMDTest, TestWindowQueryIterators) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q1 = tree.begin_query({p, p}); + auto q2 = tree.begin_query({p, p}); + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeMMDTest, TestWindowQueryFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + auto qE = tree.begin_query({min, max}, FilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_LE(2, num_e); + ASSERT_GE(50, num_e); +} + +TEST(PhTreeMMDTest, TestKnnQuery) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i++) { + double dist = distance(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistanceAndId); + + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + ASSERT_EQ(sorted_data[n]._id / NUM_DUPL, e._i / NUM_DUPL); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id / NUM_DUPL, q->_i / NUM_DUPL); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq * NUM_DUPL, n); + } +} + +template +struct PhDistanceLongL1 { + double operator()(const TestPoint& v1, const TestPoint& v2) const { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(v1[i] - v2[i]); + } + return sum; + }; +}; + +TEST(PhTreeMMDTest, TestKnnQueryFilterAndDistanceL1) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 100; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually by L1; skip every 2nd point + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i += 2) { + double dist = distanceL1(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistanceAndId); + + std::vector sorted_results; + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center, PhDistanceLongL1(), FilterEvenId()); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + sorted_results.emplace_back(q.distance(), e._i); + if (sorted_data[n]._id == e._i) { + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q->_i); + } + + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + std::sort(sorted_results.begin(), sorted_results.end(), comparePointDistanceAndId); + + for (size_t i = 0; i < n; ++i) { + auto& r = sorted_results[i]; + ASSERT_EQ(sorted_data[i]._distance, r._distance); + ASSERT_EQ(sorted_data[i]._id, r._id); + } + ASSERT_EQ(Nq * NUM_DUPL / 2, n); + } +} + +TEST(PhTreeMMDTest, TestKnnQueryIterator) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + TestPoint center{rng.next(), rng.next(), rng.next()}; + size_t n = 0; + auto q1 = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); + auto q2 = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); + while (q1 != tree.end()) { + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(Nq * NUM_DUPL, n); +} + +TEST(PhTreeMMDTest, SmokeTestPoint0) { + // Test edge case: empty tree + TestPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree; + ASSERT_EQ(tree.size(), 0); + ASSERT_EQ(tree.find(p), tree.end()); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(0, tree.erase(p, Id(-1))); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeMMDTest, SmokeTestPointInfinity) { + // Test inifnity. + double positive_infinity = std::numeric_limits::infinity(); + double negative_infinity = -positive_infinity; + PhPointD<3> p_pos{positive_infinity, positive_infinity, positive_infinity}; + PhPointD<3> p_neg{negative_infinity, negative_infinity, negative_infinity}; + PhPointD<3> p{1, 2, 3}; + TestTree<3, Id> tree; + tree.emplace(p, Id{1}); + tree.emplace(p_pos, Id{10}); + tree.emplace(p_neg, Id{-10}); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree.find(p_neg, Id(-10))->_i, -10); + ASSERT_EQ(tree.find(p, Id(1))->_i, 1); + ASSERT_EQ(tree.find(p_pos, Id(10))->_i, 10); + + ASSERT_EQ(positive_infinity, positive_infinity); + ASSERT_EQ(negative_infinity, negative_infinity); + ASSERT_GT(positive_infinity, negative_infinity); + + // Note that the tree returns result in z-order, however, since the z-order is based on + // the (unsigned) bit representation, negative values come _after_ positive values. + auto q_window = tree.begin_query({p_neg, p_pos}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(10, q_window->_i); + ++q_window; + ASSERT_EQ(-10, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(10, q_extent->_i); + ++q_extent; + ASSERT_EQ(-10, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_NE(q_knn, tree.end()); + ++q_knn; + ASSERT_NE(q_knn, tree.end()); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p_neg, Id(-10))); + ASSERT_EQ(1, tree.erase(p, Id(1))); + ASSERT_EQ(1, tree.erase(p_pos, Id(10))); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p_neg, Id(-10))); + ASSERT_EQ(0, tree.erase(p_pos, Id(10))); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeMMDTest, SmokeTestTreeAPI) { + std::map mapPtr; + PhTreeMultiMapD<3, Id*> treePtr; + Id* idPtr = new Id(1); + treePtr.emplace(PhPointD<3>{1, 2, 3}, idPtr); + treePtr.clear(); + delete idPtr; +} diff --git a/phtree/phtree_test.cc b/phtree/phtree_test.cc index 8143d012..4efc0348 100644 --- a/phtree/phtree_test.cc +++ b/phtree/phtree_test.cc @@ -24,7 +24,7 @@ template using TestPoint = PhPoint; template -using TestTree = PhTree, PrePostNoOp>; +using TestTree = PhTree; class IntRng { public: @@ -39,15 +39,57 @@ class IntRng { std::uniform_int_distribution rnd; }; +static int default_construct_count_ = 0; +static int construct_count_ = 0; +static int copy_construct_count_ = 0; +static int move_construct_count_ = 0; +static int copy_assign_count_ = 0; +static int move_assign_count_ = 0; +static int destruct_count_ = 0; + +static void reset_id_counters() { + default_construct_count_ = 0; + construct_count_ = 0; + copy_construct_count_ = 0; + move_construct_count_ = 0; + copy_assign_count_ = 0; + move_assign_count_ = 0; + destruct_count_ = 0; +} + struct Id { - Id() = default; + Id() : _i{0} { + ++default_construct_count_; + } + + explicit Id(const int i) : _i{i} { + ++construct_count_; + }; + + Id(const Id& other) { + ++copy_construct_count_; + _i = other._i; + } + + Id(Id&& other) noexcept { + ++move_construct_count_; + _i = other._i; + } - explicit Id(const int i) : _i(i){}; + bool operator==(const Id& rhs) { + ++copy_assign_count_; + return _i == rhs._i; + } - bool operator==(Id& rhs) { + bool operator==(Id&& rhs) { + ++move_assign_count_; return _i == rhs._i; } + ~Id() { + ++destruct_count_; + } + Id& operator=(Id const& rhs) = default; int _i; @@ -90,7 +132,10 @@ void generateCube(std::vector>& points, size_t N) { points.reserve(N); for (size_t i = 0; i < N; i++) { - auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + auto point = TestPoint(); + for (dimension_t d = 0; d < DIM; ++d) { + point[d] = rng.next(); + } if (refTree.count(point) != 0) { i--; continue; @@ -99,14 +144,14 @@ void generateCube(std::vector>& points, size_t N) { refTree.emplace(point, i); points.push_back(point); } - assert(refTree.size() == N); - assert(points.size() == N); + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); } template -void SmokeTestBasicOps() { +void SmokeTestBasicOps(size_t N) { + reset_id_counters(); TestTree tree; - size_t N = 10000; std::vector> points; generateCube(points, N); @@ -122,7 +167,7 @@ void SmokeTestBasicOps() { Id id(i); if (i % 2 == 0) { - ASSERT_TRUE(tree.emplace(p, id).second); + ASSERT_TRUE(tree.emplace(p, i).second); } else { ASSERT_TRUE(tree.insert(p, id).second); } @@ -143,7 +188,7 @@ void SmokeTestBasicOps() { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -175,13 +220,34 @@ void SmokeTestBasicOps() { ASSERT_EQ(0, tree.size()); ASSERT_TRUE(tree.empty()); PhTreeDebugHelper::CheckConsistency(tree); + + ASSERT_EQ(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + // The following assertions exist only as sanity checks and may need adjusting. + // There is nothing fundamentally wrong if a change in the implementation violates + // any of the following assertions, as long as performance/memory impact is observed. + ASSERT_EQ(0, default_construct_count_); + ASSERT_EQ(0, copy_assign_count_); + ASSERT_EQ(0, move_assign_count_); + // Only insert() should cause a copy, emplace() should not. + ASSERT_GE(construct_count_ / 2., copy_construct_count_); + if (DIM > 3 && DIM < 8) { + // as expected the sparse array map does a _lot_ of copying (shifting entries around) + ASSERT_GE(construct_count_ * 7, move_construct_count_); + } else if (DIM == 1) { + // small node require a lot of copying/moving + ASSERT_GE(construct_count_ * 3, move_construct_count_); + } else { + ASSERT_GE(construct_count_ * 2, move_construct_count_); + } } TEST(PhTreeTest, SmokeTestBasicOps) { - SmokeTestBasicOps<3>(); - SmokeTestBasicOps<6>(); - SmokeTestBasicOps<10>(); - SmokeTestBasicOps<20>(); + SmokeTestBasicOps<1>(100); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(10000); + SmokeTestBasicOps<20>(10000); + SmokeTestBasicOps<63>(100); } TEST(PhTreeTest, TestDebug) { @@ -245,7 +311,7 @@ TEST(PhTreeTest, TestInsert) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -292,7 +358,7 @@ TEST(PhTreeTest, TestEmplace) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -336,7 +402,7 @@ TEST(PhTreeTest, TestSquareBrackets) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -445,6 +511,72 @@ TEST(PhTreeTest, TestUpdateWithEmplace) { tree.clear(); } +TEST(PhTreeTest, TestUpdateWithEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + int delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + auto iter = tree.find(pOld); + int n = tree.erase(iter); + ASSERT_EQ(1, n); + tree.emplace_hint(iter, pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + if (delta != 0.0) { + ASSERT_EQ(0, tree.count(pOld)); + } + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeTest, TestEraseByIterator) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + auto iter = tree.find(p); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + ASSERT_EQ(tree.end(), tree.find(p)); + i++; + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + +TEST(PhTreeTest, TestEraseByIteratorQuery) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + for (size_t i = 0; i < N; ++i) { + auto iter = tree.begin(); + ASSERT_NE(tree.end(), iter); + int count = tree.erase(iter); + ASSERT_EQ(1, count); + } + + ASSERT_EQ(0, tree.erase(tree.end())); +} + TEST(PhTreeTest, TestExtent) { const dimension_t dim = 3; TestTree tree; @@ -468,7 +600,7 @@ TEST(PhTreeTest, TestExtent) { } template -struct PhFilterEvenId { +struct FilterEvenId { [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { return value._i % 2 == 0; } @@ -485,7 +617,7 @@ TEST(PhTreeTest, TestExtentFilter) { populate(tree, points, N); int num_e = 0; - auto qE = tree.begin(PhFilterEvenId()); + auto qE = tree.begin(FilterEvenId()); while (qE != tree.end()) { ASSERT_TRUE(qE->_i > -1); ASSERT_TRUE(qE->_i % 2 == 0); @@ -535,8 +667,9 @@ void referenceQuery( } } +// We use 'int&' because gtest does not compile with assertions in non-void functions. template -int testQuery(TestPoint& min, TestPoint& max, size_t N) { +void testQuery(TestPoint& min, TestPoint& max, size_t N, int& result) { TestTree tree; std::vector> points; populate(tree, points, N); @@ -544,21 +677,22 @@ int testQuery(TestPoint& min, TestPoint& max, size_t N) { std::set referenceResult; referenceQuery(points, min, max, referenceResult); - size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto& x = *it; - assert(x._i >= 0); - assert(referenceResult.count(x._i) == 1); - n++; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; } - assert(referenceResult.size() == n); - return n; + ASSERT_EQ(referenceResult.size(), result); } TEST(PhTreeTest, TestWindowQuery0) { const dimension_t dim = 3; TestPoint p{-10000, -10000, -10000}; - ASSERT_EQ(0, testQuery(p, p, 10000)); + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); } TEST(PhTreeTest, TestWindowQuery1) { @@ -571,7 +705,7 @@ TEST(PhTreeTest, TestWindowQuery1) { int n = 0; for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); // just read the entry auto& x = *q; @@ -587,7 +721,8 @@ TEST(PhTreeTest, TestWindowQueryMany) { const dimension_t dim = 3; TestPoint min{-100, -100, -100}; TestPoint max{100, 100, 100}; - int n = testQuery(min, max, 10000); + int n = 0; + testQuery(min, max, 10000, n); ASSERT_LE(3, n); ASSERT_GE(100, n); } @@ -597,7 +732,9 @@ TEST(PhTreeTest, TestWindowQueryAll) { const size_t N = 10000; TestPoint min{-10000, -10000, -10000}; TestPoint max{10000, 10000, 10000}; - ASSERT_EQ(N, testQuery(min, max, N)); + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); } TEST(PhTreeTest, TestWindowQueryManyMoving) { @@ -616,7 +753,7 @@ TEST(PhTreeTest, TestWindowQueryManyMoving) { referenceQuery(points, min, max, referenceResult); size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto& x = *it; ASSERT_EQ(referenceResult.count(x._i), 1); n++; @@ -630,7 +767,48 @@ TEST(PhTreeTest, TestWindowQueryManyMoving) { } ASSERT_GE(100, n); } - ASSERT_LE(3, 500); + ASSERT_LE(500, nn); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeTest, TestWindowForEachManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10, i * 9, i * 11}; + TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + struct Counter { + void operator()(TestPoint key, Id& t) { + ++n_; + ASSERT_EQ(referenceResult.count(t._i), 1); + } + std::set& referenceResult; + size_t n_ = 0; + }; + + size_t n = 0; + Counter callback{referenceResult, 0}; + tree.for_each({min, max}, callback); + n += callback.n_; + nn += callback.n_; + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(500, nn); ASSERT_GE(5000, nn); } @@ -644,8 +822,8 @@ TEST(PhTreeTest, TestWindowQueryIterators) { int n = 0; for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q1 = tree.begin_query(p, p); - auto q2 = tree.begin_query(p, p); + auto q1 = tree.begin_query({p, p}); + auto q2 = tree.begin_query({p, p}); ASSERT_NE(q1, tree.end()); ASSERT_NE(q2, tree.end()); ASSERT_EQ(q1, q2); @@ -667,7 +845,7 @@ TEST(PhTreeTest, TestWindowQueryFilter) { int num_e = 0; TestPoint min{-100, -100, -100}; TestPoint max{100, 100, 100}; - auto qE = tree.begin_query(min, max, PhFilterEvenId()); + auto qE = tree.begin_query({min, max}, FilterEvenId()); while (qE != tree.end()) { ASSERT_TRUE(qE->_i > -1); ASSERT_TRUE(qE->_i % 2 == 0); @@ -702,7 +880,7 @@ TEST(PhTreeTest, TestKnnQuery) { size_t n = 0; double prevDist = -1; - auto q = tree.begin_knn_query(Nq, center); + auto q = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); while (q != tree.end()) { // just read the entry auto& e = *q; @@ -754,8 +932,7 @@ TEST(PhTreeTest, TestKnnQueryFilterAndDistanceL1) { size_t n = 0; double prevDist = -1; - auto q = - tree.begin_knn_query(Nq, center, PhDistanceLongL1(), PhFilterEvenId()); + auto q = tree.begin_knn_query(Nq, center, PhDistanceLongL1(), FilterEvenId()); while (q != tree.end()) { // just read the entry ASSERT_EQ(sorted_data[n]._distance, q.distance()); @@ -783,8 +960,8 @@ TEST(PhTreeTest, TestKnnQueryIterator) { TestPoint center{rng.next(), rng.next(), rng.next()}; size_t n = 0; - auto q1 = tree.begin_knn_query(Nq, center); - auto q2 = tree.begin_knn_query(Nq, center); + auto q1 = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); + auto q2 = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); while (q1 != tree.end()) { ASSERT_NE(q1, tree.end()); ASSERT_NE(q2, tree.end()); @@ -804,13 +981,13 @@ TEST(PhTreeTest, SmokeTestPoint0) { ASSERT_EQ(tree.size(), 0); ASSERT_EQ(tree.find(p), tree.end()); - auto q_window = tree.begin_query(p, p); + auto q_window = tree.begin_query({p, p}); ASSERT_EQ(q_window, tree.end()); auto q_extent = tree.begin(); ASSERT_EQ(q_extent, tree.end()); - auto q_knn = tree.begin_knn_query(10, p); + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); ASSERT_EQ(q_knn, tree.end()); ASSERT_EQ(0, tree.erase(p)); @@ -832,7 +1009,7 @@ TEST(PhTreeTest, SmokeTestPoint1) { ASSERT_EQ(tree.find(p).second()._i, 1); ASSERT_EQ(tree[p]._i, 1); - auto q_window = tree.begin_query(p, p); + auto q_window = tree.begin_query({p, p}); ASSERT_EQ(1, q_window->_i); ++q_window; ASSERT_EQ(q_window, tree.end()); @@ -842,7 +1019,7 @@ TEST(PhTreeTest, SmokeTestPoint1) { ++q_extent; ASSERT_EQ(q_extent, tree.end()); - auto q_knn = tree.begin_knn_query(10, p); + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); ASSERT_EQ(1, q_knn->_i); ++q_knn; ASSERT_EQ(q_knn, tree.end()); diff --git a/phtree/phtree_test_const_values.cc b/phtree/phtree_test_const_values.cc index 7eeec951..50a2c4b1 100644 --- a/phtree/phtree_test_const_values.cc +++ b/phtree/phtree_test_const_values.cc @@ -24,19 +24,19 @@ template using TestPoint = PhPoint; template -using TestTree = PhTree, PrePostNoOp>; +using TestTree = PhTree; class IntRng { public: - IntRng(std::int32_t minIncl, std::int32_t maxExcl) : eng(7), rnd{minIncl, maxExcl} {} + IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} - std::int32_t next() { + int next() { return rnd(eng); } private: std::default_random_engine eng; - std::uniform_int_distribution rnd; + std::uniform_int_distribution rnd; }; struct Id { @@ -99,8 +99,8 @@ void generateCube(std::vector>& points, size_t N) { refTree.emplace(point, i); points.push_back(point); } - assert(refTree.size() == N); - assert(points.size() == N); + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); } template @@ -143,7 +143,7 @@ void SmokeTestBasicOps() { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -245,7 +245,7 @@ TEST(PhTreeTestConst, TestInsert) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -286,7 +286,7 @@ TEST(PhTreeTestConst, TestEmplace) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)._i); q++; @@ -319,7 +319,7 @@ TEST(PhTreeTestConst, TestSquareBrackets) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(0, (*q)._i); q++; @@ -444,7 +444,7 @@ TEST(PhTreeTestConst, TestExtent) { } template -struct PhFilterEvenId { +struct FilterEvenId { [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { return value._i % 2 == 0; } @@ -461,7 +461,7 @@ TEST(PhTreeTestConst, TestExtentFilter) { populate(tree, points, N); int num_e = 0; - auto qE = tree.begin(PhFilterEvenId()); + auto qE = tree.begin(FilterEvenId()); while (qE != tree.end()) { ASSERT_TRUE(qE->_i > -1); ASSERT_TRUE(qE->_i % 2 == 0); @@ -511,8 +511,9 @@ void referenceQuery( } } +// We use 'int&' because gtest does not compile with assertions in non-void functions. template -int testQuery(TestPoint& min, TestPoint& max, size_t N) { +void testQuery(TestPoint& min, TestPoint& max, size_t N, int& result) { TestTree tree; std::vector> points; populate(tree, points, N); @@ -520,21 +521,22 @@ int testQuery(TestPoint& min, TestPoint& max, size_t N) { std::set referenceResult; referenceQuery(points, min, max, referenceResult); - size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto& x = *it; - assert(x._i >= 0); - assert(referenceResult.count(x._i) == 1); - n++; + ASSERT_GE(x._i, 0); + ASSERT_EQ(referenceResult.count(x._i), 1); + result++; } - assert(referenceResult.size() == n); - return n; + ASSERT_EQ(referenceResult.size(), result); } TEST(PhTreeTestConst, TestWindowQuery0) { const dimension_t dim = 3; TestPoint p{-10000, -10000, -10000}; - ASSERT_EQ(0, testQuery(p, p, 10000)); + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); } TEST(PhTreeTestConst, TestWindowQuery1) { @@ -547,7 +549,7 @@ TEST(PhTreeTestConst, TestWindowQuery1) { int n = 0; for (size_t i = 0; i < N; i++) { PhPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); // just read the entry auto& x = *q; @@ -563,7 +565,8 @@ TEST(PhTreeTestConst, TestWindowQueryMany) { const dimension_t dim = 3; TestPoint min{-100, -100, -100}; TestPoint max{100, 100, 100}; - int n = testQuery(min, max, 10000); + int n = 0; + testQuery(min, max, 10000, n); ASSERT_LE(3, n); ASSERT_GE(100, n); } @@ -573,7 +576,9 @@ TEST(PhTreeTestConst, TestWindowQueryAll) { const size_t N = 10000; TestPoint min{-10000, -10000, -10000}; TestPoint max{10000, 10000, 10000}; - ASSERT_EQ(N, testQuery(min, max, N)); + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); } TEST(PhTreeTestConst, TestWindowQueryManyMoving) { @@ -592,7 +597,7 @@ TEST(PhTreeTestConst, TestWindowQueryManyMoving) { referenceQuery(points, min, max, referenceResult); size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto& x = *it; ASSERT_EQ(referenceResult.count(x._i), 1); n++; @@ -606,7 +611,7 @@ TEST(PhTreeTestConst, TestWindowQueryManyMoving) { } ASSERT_GE(100, n); } - ASSERT_LE(3, 500); + ASSERT_LE(500, nn); ASSERT_GE(5000, nn); } @@ -620,8 +625,8 @@ TEST(PhTreeTestConst, TestWindowQueryIterators) { int n = 0; for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q1 = tree.begin_query(p, p); - auto q2 = tree.begin_query(p, p); + auto q1 = tree.begin_query({p, p}); + auto q2 = tree.begin_query({p, p}); ASSERT_NE(q1, tree.end()); ASSERT_NE(q2, tree.end()); ASSERT_EQ(q1, q2); @@ -643,7 +648,7 @@ TEST(PhTreeTestConst, TestWindowQueryFilter) { int num_e = 0; TestPoint min{-100, -100, -100}; TestPoint max{100, 100, 100}; - auto qE = tree.begin_query(min, max, PhFilterEvenId()); + auto qE = tree.begin_query({min, max}, FilterEvenId()); while (qE != tree.end()) { ASSERT_TRUE(qE->_i > -1); ASSERT_TRUE(qE->_i % 2 == 0); @@ -678,7 +683,7 @@ TEST(PhTreeTestConst, TestKnnQuery) { size_t n = 0; double prevDist = -1; - auto q = tree.begin_knn_query(Nq, center); + auto q = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); while (q != tree.end()) { // just read the entry auto& e = *q; diff --git a/phtree/phtree_test_ptr_values.cc b/phtree/phtree_test_ptr_values.cc index 1c374da5..6fb12f9a 100644 --- a/phtree/phtree_test_ptr_values.cc +++ b/phtree/phtree_test_ptr_values.cc @@ -24,7 +24,7 @@ template using TestPoint = PhPoint; template -using TestTree = PhTree, PrePostNoOp>; +using TestTree = PhTree; class IntRng { public: @@ -99,8 +99,8 @@ void generateCube(std::vector>& points, size_t N) { refTree.emplace(point, i); points.push_back(point); } - assert(refTree.size() == N); - assert(points.size() == N); + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); } template @@ -143,7 +143,7 @@ void SmokeTestBasicOps() { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)->_i); q++; @@ -254,7 +254,7 @@ TEST(PhTreeTestPtr, TestInsert) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)->_i); q++; @@ -308,7 +308,7 @@ TEST(PhTreeTestPtr, TestEmplace) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)->_i); q++; @@ -359,7 +359,7 @@ TEST(PhTreeTestPtr, TestSquareBrackets) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); ASSERT_EQ(i, (*q)->_i); q++; @@ -582,8 +582,9 @@ void referenceQuery( } } +// We use 'int&' because gtest does not compile with assertions in non-void functions. template -int testQuery(TestPoint& min, TestPoint& max, size_t N) { +void testQuery(TestPoint& min, TestPoint& max, size_t N, int& result) { TestTree tree; std::vector> points; std::vector values; @@ -592,22 +593,23 @@ int testQuery(TestPoint& min, TestPoint& max, size_t N) { std::set referenceResult; referenceQuery(points, min, max, referenceResult); - size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + result = 0; + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto x = *it; - assert(x->_i >= 0); - assert(referenceResult.count(x->_i) == 1); - n++; + ASSERT_GE(x->_i, 0); + ASSERT_EQ(referenceResult.count(x->_i), 1); + result++; } - assert(referenceResult.size() == n); + ASSERT_EQ(referenceResult.size(), result); depopulate(values); - return n; } TEST(PhTreeTestPtr, TestWindowQuery0) { const dimension_t dim = 3; TestPoint p{-10000, -10000, -10000}; - ASSERT_EQ(0, testQuery(p, p, 10000)); + int n = 0; + testQuery(p, p, 10000, n); + ASSERT_EQ(0, n); } TEST(PhTreeTestPtr, TestWindowQuery1) { @@ -621,7 +623,7 @@ TEST(PhTreeTestPtr, TestWindowQuery1) { int n = 0; for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q = tree.begin_query(p, p); + auto q = tree.begin_query({p, p}); ASSERT_NE(q, tree.end()); // just read the entry auto x = *q; @@ -638,7 +640,8 @@ TEST(PhTreeTestPtr, TestWindowQueryMany) { const dimension_t dim = 3; TestPoint min{-100, -100, -100}; TestPoint max{100, 100, 100}; - int n = testQuery(min, max, 10000); + int n = 0; + testQuery(min, max, 10000, n); ASSERT_LE(3, n); ASSERT_GE(100, n); } @@ -648,7 +651,9 @@ TEST(PhTreeTestPtr, TestWindowQueryAll) { const size_t N = 10000; TestPoint min{-10000, -10000, -10000}; TestPoint max{10000, 10000, 10000}; - ASSERT_EQ(N, testQuery(min, max, N)); + int n = 0; + testQuery(min, max, N, n); + ASSERT_EQ(N, n); } TEST(PhTreeTestPtr, TestWindowQueryManyMoving) { @@ -668,7 +673,7 @@ TEST(PhTreeTestPtr, TestWindowQueryManyMoving) { referenceQuery(points, min, max, referenceResult); size_t n = 0; - for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + for (auto it = tree.begin_query({min, max}); it != tree.end(); it++) { auto x = *it; ASSERT_EQ(referenceResult.count(x->_i), 1); n++; @@ -682,7 +687,7 @@ TEST(PhTreeTestPtr, TestWindowQueryManyMoving) { } ASSERT_GE(100, n); } - ASSERT_LE(3, 500); + ASSERT_LE(500, nn); ASSERT_GE(5000, nn); depopulate(values); } @@ -698,8 +703,8 @@ TEST(PhTreeTestPtr, TestWindowQueryIterators) { int n = 0; for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); - auto q1 = tree.begin_query(p, p); - auto q2 = tree.begin_query(p, p); + auto q1 = tree.begin_query({p, p}); + auto q2 = tree.begin_query({p, p}); ASSERT_NE(q1, tree.end()); ASSERT_NE(q2, tree.end()); ASSERT_EQ(q1, q2); @@ -723,7 +728,7 @@ TEST(PhTreeTestPtr, TestWindowQueryFilter) { int num_e = 0; TestPoint min{-100, -100, -100}; TestPoint max{100, 100, 100}; - auto qE = tree.begin_query(min, max, PhFilterEvenId()); + auto qE = tree.begin_query({min, max}, PhFilterEvenId()); while (qE != tree.end()) { ASSERT_TRUE((*qE)->_i > -1); ASSERT_TRUE((*qE)->_i % 2 == 0); @@ -760,7 +765,7 @@ TEST(PhTreeTestPtr, TestKnnQuery) { size_t n = 0; double prevDist = -1; - auto q = tree.begin_knn_query(Nq, center); + auto q = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>()); while (q != tree.end()) { // just read the entry auto& e = *q; diff --git a/phtree/phtree_test_unique_ptr_values.cc b/phtree/phtree_test_unique_ptr_values.cc new file mode 100644 index 00000000..e5b204cd --- /dev/null +++ b/phtree/phtree_test_unique_ptr_values.cc @@ -0,0 +1,184 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPoint; + +template +using TestTree = PhTree; + +class IntRng { + public: + IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} + + int next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_int_distribution rnd; +}; + +struct IdObj { + IdObj() = default; + + explicit IdObj(const int i) : _i(i){}; + + bool operator==(IdObj& rhs) { + return _i == rhs._i; + } + + IdObj& operator=(IdObj const& rhs) = default; + + int _i; +}; + +using Id = std::unique_ptr; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + IntRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); + } else { + Id id = std::make_unique(i); + ASSERT_TRUE(tree.emplace(p, std::move(id)).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)->_i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeTestUniquePtr, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(100); +} diff --git a/phtree/testing/gtest_main/BUILD b/phtree/testing/gtest_main/BUILD index 83ffea6e..0d591976 100644 --- a/phtree/testing/gtest_main/BUILD +++ b/phtree/testing/gtest_main/BUILD @@ -9,7 +9,6 @@ cc_library( ], deps = [ "@gtest", - "@spdlog", ], alwayslink = 1, ) diff --git a/phtree/testing/gtest_main/gtest_main.cc b/phtree/testing/gtest_main/gtest_main.cc index 1dfcd1f4..1e11ab41 100644 --- a/phtree/testing/gtest_main/gtest_main.cc +++ b/phtree/testing/gtest_main/gtest_main.cc @@ -15,16 +15,8 @@ */ #include -#include -#include -#include int main(int argc, char** argv) { - auto console_sink = std::make_shared(); - spdlog::set_default_logger( - std::make_shared("", spdlog::sinks_init_list({console_sink}))); - spdlog::set_level(spdlog::level::trace); - testing::InitGoogleMock(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/phtree/v16/BUILD b/phtree/v16/BUILD index 1717296f..b44b14a1 100644 --- a/phtree/v16/BUILD +++ b/phtree/v16/BUILD @@ -5,14 +5,16 @@ cc_library( srcs = [ ], hdrs = [ - "debug_helper.h", + "debug_helper_v16.h", + "entry.h", + "for_each.h", + "for_each_hc.h", + "iterator_base.h", + "iterator_full.h", + "iterator_hc.h", + "iterator_knn_hs.h", + "iterator_simple.h", "node.h", - "ph_entry.h", - "ph_iterator_base.h", - "ph_iterator_full.h", - "ph_iterator_hc.h", - "ph_iterator_knn_hs.h", - "ph_iterator_simple.h", "phtree_v16.h", ], visibility = [ diff --git a/phtree/v16/CMakeLists.txt b/phtree/v16/CMakeLists.txt index b4bb6e43..1aa65630 100644 --- a/phtree/v16/CMakeLists.txt +++ b/phtree/v16/CMakeLists.txt @@ -2,13 +2,13 @@ cmake_minimum_required(VERSION 3.14) target_sources(phtree PRIVATE - debug_helper.h + debug_helper_v16.h node.h - ph_entry.h - ph_iterator_base.h - ph_iterator_full.h - ph_iterator_hc.h - ph_iterator_knn_hs.h - ph_iterator_simple.h + entry.h + iterator_base.h + iterator_full.h + iterator_hc.h + iterator_knn_hs.h + iterator_simple.h phtree_v16.h ) diff --git a/phtree/v16/debug_helper.h b/phtree/v16/debug_helper_v16.h similarity index 84% rename from phtree/v16/debug_helper.h rename to phtree/v16/debug_helper_v16.h index a5ac8fc0..12c53d9f 100644 --- a/phtree/v16/debug_helper.h +++ b/phtree/v16/debug_helper_v16.h @@ -17,21 +17,25 @@ #ifndef PHTREE_V16_DEBUG_HELPER_H #define PHTREE_V16_DEBUG_HELPER_H -#include "../common/ph_common.h" -#include "../common/ph_tree_debug_helper.h" #include "node.h" +#include "../common/common.h" +#include "../common/debug_helper.h" #include "phtree_v16.h" #include namespace improbable::phtree::v16 { -template POST> +template class PhTreeV16; -template +template class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { + using Key = PhPoint; + using Node = Node; + using Entry = Entry; + public: - DebugHelperV16(const Node& root, size_t size) : root_{root}, size_{size} {} + DebugHelperV16(const Node& root, size_t size) : root_{root}, size_{size} {} /* * Depending on the detail parameter this returns: @@ -54,7 +58,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { ToStringPlain(os, root_); break; case Enum::tree: - ToStringTree(os, 0, root_, PhPoint(), true); + ToStringTree(os, 0, root_, Key(), true); break; } return os.str(); @@ -79,9 +83,9 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { } private: - void ToStringPlain(std::ostringstream& os, const Node& node) const { + void ToStringPlain(std::ostringstream& os, const Node& node) const { for (auto& it : node.Entries()) { - const PhEntry& o = it.second; + const Entry& o = it.second; // inner node? if (o.IsNode()) { ToStringPlain(os, o.GetNode()); @@ -95,8 +99,8 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { void ToStringTree( std::ostringstream& sb, bit_width_t current_depth, - const Node& node, - const PhPoint& prefix, + const Node& node, + const Key& prefix, bool printValue) const { std::string ind = "*"; for (bit_width_t i = 0; i < current_depth; ++i) { @@ -107,11 +111,11 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { // for a leaf node, the existence of a sub just indicates that the value exists. if (node.GetInfixLen() > 0) { - bit_mask_t mask = MAX_MASK << node.GetInfixLen(); + bit_mask_t mask = MAX_MASK << node.GetInfixLen(); mask = ~mask; mask <<= node.GetPostfixLen() + 1; for (dimension_t i = 0; i < DIM; ++i) { - sb << ToBinary(prefix[i] & mask) << ","; + sb << ToBinary(prefix[i] & mask) << ","; } } current_depth += node.GetInfixLen(); @@ -138,7 +142,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { } } - const Node& root_; + const Node& root_; const size_t size_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/ph_entry.h b/phtree/v16/entry.h similarity index 52% rename from phtree/v16/ph_entry.h rename to phtree/v16/entry.h index fa32e7ea..75468508 100644 --- a/phtree/v16/ph_entry.h +++ b/phtree/v16/entry.h @@ -14,11 +14,11 @@ * limitations under the License. */ -#ifndef PHTREE_V16_PH_ENTRY_H -#define PHTREE_V16_PH_ENTRY_H +#ifndef PHTREE_V16_ENTRY_H +#define PHTREE_V16_ENTRY_H -#include "../common/ph_common.h" #include "node.h" +#include "../../phtree/common/common.h" #include #include #include @@ -27,7 +27,7 @@ namespace improbable::phtree::v16 { -template +template class Node; /* @@ -37,42 +37,39 @@ class Node; * - A prefix/child-node pair, where prefix is the prefix of the child node and the * child node is contained in a unique_ptr. */ -template -class PhEntry { +template +class Entry { + using Key = PhPoint; using Value = std::remove_const_t; + using Node = Node; public: - PhEntry() : kd_key_(), value_{std::in_place_type, T{}} {} + Entry() : kd_key_(), value_{std::in_place_type, T{}} {} /* * Construct entry with existing node. */ - PhEntry(const PhPoint& k, std::unique_ptr>&& node) + Entry(const Key& k, std::unique_ptr&& node) : kd_key_{k} - , value_{std::in_place_type>>, - std::forward>>(node)} {} + , value_{std::in_place_type>, std::forward>(node)} { + } /* * Construct entry with a new node. */ - PhEntry(bit_width_t infix_len, bit_width_t postfix_len) + Entry(bit_width_t infix_len, bit_width_t postfix_len) : kd_key_() - , value_{std::in_place_type>>, - std::make_unique>(infix_len, postfix_len)} {} - - /* - * Construct entry with existing value T. - */ - PhEntry(const PhPoint& k, const T& v) : kd_key_{k}, value_{std::in_place_type, v} {} + , value_{std::in_place_type>, + std::make_unique(infix_len, postfix_len)} {} /* - * Construct entry with new T value. + * Construct entry with new T or moved T. */ template - explicit PhEntry(const PhPoint& k, _Args&&... __args) + explicit Entry(const Key& k, _Args&&... __args) : kd_key_{k}, value_{std::in_place_type, std::forward<_Args>(__args)...} {} - [[nodiscard]] const PhPoint& GetKey() const { + [[nodiscard]] const Key& GetKey() const { return kd_key_; } @@ -81,7 +78,7 @@ class PhEntry { } [[nodiscard]] bool IsNode() const { - return std::holds_alternative>>(value_); + return std::holds_alternative>(value_); } [[nodiscard]] T& GetValue() const { @@ -89,15 +86,27 @@ class PhEntry { return const_cast(std::get(value_)); } - [[nodiscard]] Node& GetNode() const { + [[nodiscard]] Node& GetNode() const { + assert(IsNode()); + return *std::get>(value_); + } + + void ReplaceNodeWithDataFromEntry(Entry&& other) { assert(IsNode()); - return *std::get>>(value_); + kd_key_ = other.GetKey(); + + // 'value_' points indirectly to 'entry' so we have to remove `entity's` content before + // assigning anything to `value_` here. Otherwise the assignment would destruct the previous + // content and, by reachability, `entity's` content. + auto old_node = std::get>(value_).release(); + value_ = std::move(other.value_); + delete old_node; } private: - PhPoint kd_key_; - std::variant>> value_; + Key kd_key_; + std::variant> value_; }; } // namespace improbable::phtree::v16 -#endif // PHTREE_V16_PH_ENTRY_H +#endif // PHTREE_V16_ENTRY_H diff --git a/phtree/v16/for_each.h b/phtree/v16/for_each.h new file mode 100644 index 00000000..5eb2fcb7 --- /dev/null +++ b/phtree/v16/for_each.h @@ -0,0 +1,74 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_FOR_EACH_H +#define PHTREE_V16_FOR_EACH_H + +#include "iterator_simple.h" +#include "../common/common.h" + +namespace improbable::phtree::v16 { + +/* + * Iterates over the whole tree. Entries and child nodes that are rejected by the Filter are not + * traversed or returned. + */ +template +class ForEach { + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyExternal = typename CONVERT::KeyExternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using Entry = Entry; + using Node = Node; + + public: + ForEach(const CONVERT& converter, CALLBACK_FN& callback, FILTER filter) + : converter_{converter}, callback_{callback}, filter_(std::move(filter)) {} + + void run(const Entry& root) { + assert(root.IsNode()); + TraverseNode(root.GetKey(), root.GetNode()); + } + + private: + void TraverseNode(const KeyInternal& key, const Node& node) { + auto iter = node.Entries().begin(); + auto end = node.Entries().end(); + for (; iter != end; ++iter) { + const auto& child = iter->second; + const auto& child_key = child.GetKey(); + if (child.IsNode()) { + const auto& child_node = child.GetNode(); + if (filter_.IsNodeValid(key, node.GetPostfixLen() + 1)) { + TraverseNode(child_key, child_node); + } + } else { + T& value = child.GetValue(); + if (filter_.IsEntryValid(key, value)) { + callback_(converter_.post(child_key), value); + } + } + } + } + + CONVERT converter_; + CALLBACK_FN& callback_; + FILTER filter_; +}; +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_FOR_EACH_H diff --git a/phtree/v16/for_each_hc.h b/phtree/v16/for_each_hc.h new file mode 100644 index 00000000..dbfdefbf --- /dev/null +++ b/phtree/v16/for_each_hc.h @@ -0,0 +1,189 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_FOR_EACH_HC_H +#define PHTREE_V16_FOR_EACH_HC_H + +#include "iterator_simple.h" +#include "../common/common.h" + +namespace improbable::phtree::v16 { + +/* + * The HC (hyper cube) iterator uses `hypercube navigation`, ie. filtering of quadrants by their + * binary hypercube address. In effect it compares the node's volume (box) with the query volume + * (box) to calculate two bit masks, mask_lower_ and mask_upper_. These can be used as the number of + * the lowest and highest quadrant that overlaps with the query box. They can also be used to tell + * for any quadrant whether it overlaps with the query, simply by comparing the quadrant's ID with + * the two masks, see IsPosValid(). + * + * For details see "Efficient Z-Ordered Traversal of Hypercube Indexes" by T. Zäschke, M.C. Norrie, + * 2017. + */ +template +class ForEachHC { + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyExternal = typename CONVERT::KeyExternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using Entry = Entry; + using Node = Node; + + public: + ForEachHC( + const KeyInternal& range_min, + const KeyInternal& range_max, + const CONVERT& converter, + CALLBACK_FN& callback, + FILTER filter) + : range_min_{range_min} + , range_max_{range_max} + , converter_{converter} + , callback_{callback} + , filter_(std::move(filter)) {} + + void run(const Entry& root) { + assert(root.IsNode()); + TraverseNode(root.GetKey(), root.GetNode()); + } + + private: + void TraverseNode(const KeyInternal& key, const Node& node) { + hc_pos_t mask_lower = 0; + hc_pos_t mask_upper = 0; + CalcLimits(node.GetPostfixLen(), key, mask_lower, mask_upper); + auto iter = node.Entries().lower_bound(mask_lower); + auto end = node.Entries().end(); + for (; iter != end && iter->first <= mask_upper; ++iter) { + auto child_hc_pos = iter->first; + // Use bit-mask magic to check whether we are in a valid quadrant. + // -> See paper referenced in class description. + if (((child_hc_pos | mask_lower) & mask_upper) == child_hc_pos) { + const auto& child = iter->second; + const auto& child_key = child.GetKey(); + if (child.IsNode()) { + const auto& child_node = child.GetNode(); + if (CheckNode(child_key, child_node)) { + TraverseNode(child_key, child_node); + } + } else { + T& value = child.GetValue(); + if (IsInRange(child_key, range_min_, range_max_) && + ApplyFilter(child_key, value)) { + callback_(converter_.post(child_key), value); + } + } + } + } + } + + bool CheckNode(const KeyInternal& key, const Node& node) const { + // Check if the node overlaps with the query box. + // An infix with len=0 implies that at least part of the child node overlaps with the query, + // otherwise the bit mask checking would have returned 'false'. + if (node.GetInfixLen() > 0) { + // Mask for comparing the prefix with the query boundaries. + assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + for (dimension_t dim = 0; dim < DIM; ++dim) { + SCALAR prefix = key[dim] & comparison_mask; + if (prefix > range_max_[dim] || prefix < (range_min_[dim] & comparison_mask)) { + return false; + } + } + } + return ApplyFilter(key, node); + } + + [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const Node& node) const { + return filter_.IsNodeValid(key, node.GetPostfixLen() + 1); + } + + [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const T& value) const { + return filter_.IsEntryValid(key, value); + } + + void CalcLimits( + bit_width_t postfix_len, + const KeyInternal& prefix, + hc_pos_t& lower_limit, + hc_pos_t& upper_limit) { + // create limits for the local node. there is a lower and an upper limit. Each limit + // consists of a series of DIM bit, one for each dimension. + // For the lower limit, a '1' indicates that the 'lower' half of this dimension does + // not need to be queried. + // For the upper limit, a '0' indicates that the 'higher' half does not need to be + // queried. + // + // || lower_limit=0 || lower_limit=1 || upper_limit = 0 || upper_limit = 1 + // =============||====================================================================== + // query lower || YES NO + // ============ || ===================================================================== + // query higher || NO YES + // + assert(postfix_len < MAX_BIT_WIDTH); + bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len; + bit_mask_t maskVT = MAX_MASK << postfix_len; + constexpr hc_pos_t ONE = 1; + // to prevent problems with signed long when using 64 bit + if (postfix_len < MAX_BIT_WIDTH - 1) { + for (dimension_t i = 0; i < DIM; ++i) { + lower_limit <<= 1; + upper_limit <<= 1; + SCALAR nodeBisection = (prefix[i] | maskHcBit) & maskVT; + if (range_min_[i] >= nodeBisection) { + //==> set to 1 if lower value should not be queried + lower_limit |= ONE; + } + if (range_max_[i] >= nodeBisection) { + // Leave 0 if higher value should not be queried. + upper_limit |= ONE; + } + } + } else { + // special treatment for signed longs + // The problem (difference) here is that a '1' at the leading bit does indicate a + // LOWER value, opposed to indicating a HIGHER value as in the remaining 63 bits. + // The hypercube assumes that a leading '0' indicates a lower value. + // Solution: We leave HC as it is. + for (dimension_t i = 0; i < DIM; ++i) { + lower_limit <<= 1; + upper_limit <<= 1; + if (range_min_[i] < 0) { + // If minimum is positive, we don't need the search negative values + //==> set upper_limit to 0, prevent searching values starting with '1'. + upper_limit |= ONE; + } + if (range_max_[i] < 0) { + // Leave 0 if higher value should not be queried + // If maximum is negative, we do not need to search positive values + //(starting with '0'). + //--> lower_limit = '1' + lower_limit |= ONE; + } + } + } + } + + const KeyInternal range_min_; + const KeyInternal range_max_; + CONVERT converter_; + CALLBACK_FN& callback_; + FILTER filter_; +}; +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_FOR_EACH_HC_H diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h new file mode 100644 index 00000000..8f9ae71b --- /dev/null +++ b/phtree/v16/iterator_base.h @@ -0,0 +1,156 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_ITERATOR_BASE_H +#define PHTREE_V16_ITERATOR_BASE_H + +#include "entry.h" +#include "../common/common.h" + +namespace improbable::phtree::v16 { + +template +class PhTreeV16; + +/* + * Base class for all PH-Tree iterators. + */ +template +class IteratorBase { + protected: + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using Entry = Entry; + friend PhTreeV16; + + public: + explicit IteratorBase(const CONVERT& converter) + : current_result_{nullptr} + , current_node_{} + , parent_node_{} + , is_finished_{false} + , converter_{converter} + , filter_{FILTER()} {} + + explicit IteratorBase(const CONVERT& converter, FILTER filter) + : current_result_{nullptr} + , current_node_{} + , parent_node_{} + , is_finished_{false} + , converter_{converter} + , filter_(std::move(filter)) {} + + T& operator*() const { + assert(current_result_); + return current_result_->GetValue(); + } + + T* operator->() const { + assert(current_result_); + return ¤t_result_->GetValue(); + } + + template + friend bool operator==( + const IteratorBase& left, + const IteratorBase& right) { + // Note: The following compares pointers to Entry objects so it should be + // a) fast (i.e. not comparing contents of entries) + // b) return `false` when comparing apparently identical entries from different PH-Trees (as + // intended) + return (left.is_finished_ && right.Finished()) || + (!left.is_finished_ && !right.Finished() && + left.current_result_ == right.GetCurrentResult()); + } + + template + friend bool operator!=( + const IteratorBase& left, + const IteratorBase& right) { + return !(left == right); + } + + auto first() const { + return converter_.post(current_result_->GetKey()); + } + + T& second() const { + return current_result_->GetValue(); + } + + [[nodiscard]] bool Finished() const { + return is_finished_; + } + + const Entry* GetCurrentResult() const { + return current_result_; + } + + protected: + void SetFinished() { + is_finished_ = true; + current_result_ = nullptr; + } + + [[nodiscard]] bool ApplyFilter(const Entry& entry) const { + return entry.IsNode() + ? filter_.IsNodeValid(entry.GetKey(), entry.GetNode().GetPostfixLen() + 1) + : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); + } + + void SetCurrentResult(const Entry* current_result) { + current_result_ = current_result; + } + + void SetCurrentNodeEntry(const Entry* current_node) { + assert(!current_node || current_node->IsNode()); + current_node_ = current_node; + } + + void SetParentNodeEntry(const Entry* parent_node) { + assert(!parent_node || parent_node->IsNode()); + parent_node_ = parent_node; + } + + auto post(const KeyInternal& point) { + return converter_.post(point); + } + + private: + /* + * The parent entry contains the parent node. The parent node is the node ABOVE the current node + * which contains the current entry. + */ + const Entry* GetCurrentNodeEntry() const { + return current_node_; + } + + const Entry* GetParentNodeEntry() const { + return parent_node_; + } + + const Entry* current_result_; + const Entry* current_node_; + const Entry* parent_node_; + bool is_finished_; + const CONVERT& converter_; + FILTER filter_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_ITERATOR_BASE_H diff --git a/phtree/v16/ph_iterator_full.h b/phtree/v16/iterator_full.h similarity index 66% rename from phtree/v16/ph_iterator_full.h rename to phtree/v16/iterator_full.h index ec8a6ee3..c6075f72 100644 --- a/phtree/v16/ph_iterator_full.h +++ b/phtree/v16/iterator_full.h @@ -14,41 +14,38 @@ * limitations under the License. */ -#ifndef PHTREE_V16_PH_ITERATOR_FULL_H -#define PHTREE_V16_PH_ITERATOR_FULL_H +#ifndef PHTREE_V16_ITERATOR_FULL_H +#define PHTREE_V16_ITERATOR_FULL_H -#include "../common/ph_common.h" -#include "ph_iterator_base.h" +#include "iterator_base.h" +#include "../common/common.h" namespace improbable::phtree::v16 { -template +template class Node; -template -class PhEntry; +template +class IteratorFull : public IteratorBase { + static constexpr dimension_t DIM = CONVERT::DimInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using Node = Node; + using Entry = typename IteratorBase::Entry; -template < - dimension_t DIM, - typename T, - typename KEY, - PhPostprocessor POST, - typename FILTER> -class PhIteratorFull : public PhIteratorBase { public: - PhIteratorFull(const PhEntry& root, FILTER filter) - : PhIteratorBase(filter), stack_{}, stack_size_{0} { + IteratorFull(const Entry& root, const CONVERT& converter, FILTER filter) + : IteratorBase(converter, filter), stack_{}, stack_size_{0} { PrepareAndPush(root.GetNode()); FindNextElement(); } - PhIteratorFull& operator++() { + IteratorFull& operator++() { FindNextElement(); return *this; } - PhIteratorFull operator++(int) { - PhIteratorFull iterator(*this); + IteratorFull operator++(int) { + IteratorFull iterator(*this); ++(*this); return iterator; } @@ -56,7 +53,7 @@ class PhIteratorFull : public PhIteratorBase { private: void FindNextElement() { while (!IsEmpty()) { - EntryIteratorC* p = &Peek(); + auto* p = &Peek(); while (*p != PeekEnd()) { auto& candidate = (*p)->second; ++(*p); @@ -76,7 +73,7 @@ class PhIteratorFull : public PhIteratorBase { this->SetFinished(); } - EntryIteratorC& PrepareAndPush(const Node& node) { + auto& PrepareAndPush(const Node& node) { assert(stack_size_ < stack_.size() - 1); // No '&' because this is a temp value stack_[stack_size_].first = node.Entries().cbegin(); @@ -85,17 +82,17 @@ class PhIteratorFull : public PhIteratorBase { return stack_[stack_size_ - 1].first; } - EntryIteratorC& Peek() { + auto& Peek() { assert(stack_size_ > 0); return stack_[stack_size_ - 1].first; } - EntryIteratorC& PeekEnd() { + auto& PeekEnd() { assert(stack_size_ > 0); return stack_[stack_size_ - 1].second; } - EntryIteratorC& Pop() { + auto& Pop() { assert(stack_size_ > 0); return stack_[--stack_size_].first; } @@ -104,10 +101,13 @@ class PhIteratorFull : public PhIteratorBase { return stack_size_ == 0; } - std::array, EntryIteratorC>, MAX_BIT_WIDTH> stack_; + std::array< + std::pair, EntryIteratorC>, + MAX_BIT_WIDTH> + stack_; size_t stack_size_; }; } // namespace improbable::phtree::v16 -#endif // PHTREE_V16_PH_ITERATOR_FULL_H +#endif // PHTREE_V16_ITERATOR_FULL_H diff --git a/phtree/v16/ph_iterator_hc.h b/phtree/v16/iterator_hc.h similarity index 75% rename from phtree/v16/ph_iterator_hc.h rename to phtree/v16/iterator_hc.h index df8ab351..03bd6552 100644 --- a/phtree/v16/ph_iterator_hc.h +++ b/phtree/v16/iterator_hc.h @@ -14,22 +14,19 @@ * limitations under the License. */ -#ifndef PHTREE_V16_PH_ITERATOR_HC_H -#define PHTREE_V16_PH_ITERATOR_HC_H +#ifndef PHTREE_V16_ITERATOR_HC_H +#define PHTREE_V16_ITERATOR_HC_H -#include "../common/ph_common.h" -#include "ph_iterator_simple.h" +#include "iterator_simple.h" +#include "../common/common.h" namespace improbable::phtree::v16 { -template +template class Node; -template -class PhEntry; - namespace { -template +template class NodeIterator; } // namespace @@ -44,20 +41,21 @@ class NodeIterator; * For details see "Efficient Z-Ordered Traversal of Hypercube Indexes" by T. Zäschke, M.C. Norrie, * 2017. */ -template < - dimension_t DIM, - typename T, - typename KEY, - PhPostprocessor POST, - typename FILTER> -class PhIteratorHC : public PhIteratorBase { +template +class IteratorHC : public IteratorBase { + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using Entry = typename IteratorBase::Entry; + public: - PhIteratorHC( - const PhEntry& root, - const PhPoint& range_min, - const PhPoint& range_max, + IteratorHC( + const Entry& root, + const KeyInternal& range_min, + const KeyInternal& range_max, + const CONVERT& converter, FILTER filter) - : PhIteratorBase(filter) + : IteratorBase(converter, filter) , stack_size_{0} , range_min_{range_min} , range_max_{range_max} { @@ -65,13 +63,13 @@ class PhIteratorHC : public PhIteratorBase { FindNextElement(); } - PhIteratorHC& operator++() { + IteratorHC& operator++() { FindNextElement(); return *this; } - PhIteratorHC operator++(int) { - PhIteratorHC iterator(*this); + IteratorHC operator++(int) { + IteratorHC iterator(*this); ++(*this); return iterator; } @@ -81,7 +79,7 @@ class PhIteratorHC : public PhIteratorBase { assert(!this->Finished()); while (!IsEmpty()) { auto* p = &Peek(); - const PhEntry* current_result = nullptr; + const Entry* current_result = nullptr; while ((current_result = p->Increment(range_min_, range_max_))) { if (this->ApplyFilter(*current_result)) { if (current_result->IsNode()) { @@ -99,9 +97,9 @@ class PhIteratorHC : public PhIteratorBase { this->SetFinished(); } - auto& PrepareAndPush(const PhEntry& entry) { + auto& PrepareAndPush(const Entry& entry) { assert(stack_size_ < stack_.size() - 1); - NodeIterator& ni = stack_[stack_size_++]; + auto& ni = stack_[stack_size_++]; ni.init(range_min_, range_max_, entry.GetNode(), entry.GetKey()); return ni; } @@ -120,23 +118,23 @@ class PhIteratorHC : public PhIteratorBase { return stack_size_ == 0; } - std::array, MAX_BIT_WIDTH> stack_; + std::array, MAX_BIT_WIDTH> stack_; size_t stack_size_; - const PhPoint range_min_; - const PhPoint range_max_; + const KeyInternal range_min_; + const KeyInternal range_max_; }; namespace { -template +template class NodeIterator { + using Key = PhPoint; + using Entry = Entry; + using Node = Node; + public: NodeIterator() : iter_{}, node_{nullptr}, mask_lower_{0}, mask_upper_(0) {} - void init( - const PhPoint& range_min, - const PhPoint& range_max, - const Node& node, - const PhPoint& prefix) { + void init(const Key& range_min, const Key& range_max, const Node& node, const Key& prefix) { node_ = &node; CalcLimits(node.GetPostfixLen(), range_min, range_max, prefix); iter_ = node.Entries().lower_bound(mask_lower_); @@ -146,7 +144,7 @@ class NodeIterator { * Advances the cursor. * @return TRUE iff a matching element was found. */ - const PhEntry* Increment(const PhPoint& range_min, const PhPoint& range_max) { + const Entry* Increment(const Key& range_min, const Key& range_max) { while (iter_ != node_->Entries().end() && iter_->first <= mask_upper_) { if (IsPosValid(iter_->first)) { const auto* be = &iter_->second; @@ -160,10 +158,7 @@ class NodeIterator { return nullptr; } - bool CheckEntry( - const PhEntry& candidate, - const PhPoint& range_min, - const PhPoint& range_max) const { + bool CheckEntry(const Entry& candidate, const Key& range_min, const Key& range_max) const { if (candidate.IsValue()) { return IsInRange(candidate.GetKey(), range_min, range_max); } @@ -176,11 +171,11 @@ class NodeIterator { } // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - scalar_t comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); auto& key = candidate.GetKey(); for (dimension_t dim = 0; dim < DIM; ++dim) { - scalar_t in = key[dim] & comparison_mask; + SCALAR in = key[dim] & comparison_mask; if (in > range_max[dim] || in < (range_min[dim] & comparison_mask)) { return false; } @@ -194,10 +189,7 @@ class NodeIterator { } void CalcLimits( - bit_width_t postfix_len, - const PhPoint& range_min, - const PhPoint& range_max, - const PhPoint& prefix) { + bit_width_t postfix_len, const Key& range_min, const Key& range_max, const Key& prefix) { // create limits for the local node. there is a lower and an upper limit. Each limit // consists of a series of DIM bit, one for each dimension. // For the lower limit, a '1' indicates that the 'lower' half of this dimension does @@ -211,18 +203,18 @@ class NodeIterator { // ============ || ===================================================================== // query higher || NO YES // - assert(postfix_len < MAX_BIT_WIDTH); - bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len; - bit_mask_t maskVT = MAX_MASK << postfix_len; + assert(postfix_len < MAX_BIT_WIDTH); + bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len; + bit_mask_t maskVT = MAX_MASK << postfix_len; hc_pos_t lower_limit = 0; hc_pos_t upper_limit = 0; constexpr hc_pos_t ONE = 1; // to prevent problems with signed long when using 64 bit - if (postfix_len < 63) { + if (postfix_len < MAX_BIT_WIDTH - 1) { for (dimension_t i = 0; i < DIM; ++i) { lower_limit <<= 1; upper_limit <<= 1; - scalar_t nodeBisection = (prefix[i] | maskHcBit) & maskVT; + SCALAR nodeBisection = (prefix[i] | maskHcBit) & maskVT; if (range_min[i] >= nodeBisection) { //==> set to 1 if lower value should not be queried lower_limit |= ONE; @@ -261,12 +253,12 @@ class NodeIterator { } private: - EntryIteratorC iter_; - const Node* node_; + EntryIteratorC iter_; + const Node* node_; hc_pos_t mask_lower_; hc_pos_t mask_upper_; }; } // namespace } // namespace improbable::phtree::v16 -#endif // PHTREE_V16_PH_ITERATOR_HC_H +#endif // PHTREE_V16_ITERATOR_HC_H diff --git a/phtree/v16/ph_iterator_knn_hs.h b/phtree/v16/iterator_knn_hs.h similarity index 65% rename from phtree/v16/ph_iterator_knn_hs.h rename to phtree/v16/iterator_knn_hs.h index 79f9a3e7..30a1dd6d 100644 --- a/phtree/v16/ph_iterator_knn_hs.h +++ b/phtree/v16/iterator_knn_hs.h @@ -14,12 +14,11 @@ * limitations under the License. */ -#ifndef PHTREE_V16_PH_QUERY_KNN_HS_H -#define PHTREE_V16_PH_QUERY_KNN_HS_H +#ifndef PHTREE_V16_QUERY_KNN_HS_H +#define PHTREE_V16_QUERY_KNN_HS_H -#include "../common/ph_common.h" -#include "ph_iterator_base.h" -#include +#include "iterator_base.h" +#include "../common/common.h" #include namespace improbable::phtree::v16 { @@ -33,36 +32,38 @@ namespace improbable::phtree::v16 { */ namespace { -template -using PhEntryDist = std::pair*>; +template +using EntryDist = std::pair*>; -template -struct ComparePhEntryDistByDistance { - bool operator()(const PhEntryDist& left, const PhEntryDist& right) const { +template +struct CompareEntryDistByDistance { + bool operator()(const ENTRY& left, const ENTRY& right) const { return left.first > right.first; }; }; } // namespace -template < - dimension_t DIM, - typename T, - typename KEY, - PhPostprocessor POST, - typename DISTANCE, - typename FILTER> -class PhIteratorKnnHS : public PhIteratorBase { +template +class IteratorKnnHS : public IteratorBase { + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyExternal = typename CONVERT::KeyExternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using Entry = typename IteratorBase::Entry; + using EntryDist = EntryDist; + public: - explicit PhIteratorKnnHS( - const PhEntry& root, + explicit IteratorKnnHS( + const Entry& root, size_t min_results, - const PhPoint& center, + const KeyInternal& center, + const CONVERT& converter, DISTANCE dist, FILTER filter) - : PhIteratorBase(filter) + : IteratorBase(converter, filter) , center_{center} - , center_post_{POST(center)} - , current_distance_{DBL_MAX} + , center_post_{converter.post(center)} + , current_distance_{std::numeric_limits::max()} , num_found_results_(0) , num_requested_results_(min_results) , distance_(std::move(dist)) { @@ -80,13 +81,13 @@ class PhIteratorKnnHS : public PhIteratorBase { return current_distance_; } - PhIteratorKnnHS& operator++() { + IteratorKnnHS& operator++() { FindNextElement(); return *this; } - PhIteratorKnnHS operator++(int) { - PhIteratorKnnHS iterator(*this); + IteratorKnnHS operator++(int) { + IteratorKnnHS iterator(*this); ++(*this); return iterator; } @@ -117,7 +118,7 @@ class PhIteratorKnnHS : public PhIteratorBase { double d = DistanceToNode(e2.GetKey(), sub.GetPostfixLen() + 1); queue_.emplace(d, &e2); } else { - double d = distance_(center_post_, POST(e2.GetKey())); + double d = distance_(center_post_, this->post(e2.GetKey())); queue_.emplace(d, &e2); } } @@ -125,36 +126,33 @@ class PhIteratorKnnHS : public PhIteratorBase { } } this->SetFinished(); - current_distance_ = DBL_MAX; + current_distance_ = std::numeric_limits::max(); } - double DistanceToNode(const PhPoint& prefix, int bits_to_ignore) { - assert(bits_to_ignore < MAX_BIT_WIDTH); - scalar_t mask_min = MAX_MASK << bits_to_ignore; - scalar_t mask_max = ~mask_min; - PhPoint buf; + double DistanceToNode(const KeyInternal& prefix, int bits_to_ignore) { + assert(bits_to_ignore < MAX_BIT_WIDTH); + SCALAR mask_min = MAX_MASK << bits_to_ignore; + SCALAR mask_max = ~mask_min; + KeyInternal buf; // The following calculates the point inside of the node that is closest to center_. // If center is inside the node this returns center_, otherwise it finds a point on the // node's surface. for (dimension_t i = 0; i < DIM; ++i) { // if center_[i] is outside the node, return distance to closest edge, // otherwise return center_[i] itself (assume possible distance=0) - scalar_t min = prefix[i] & mask_min; - scalar_t max = prefix[i] | mask_max; + SCALAR min = prefix[i] & mask_min; + SCALAR max = prefix[i] | mask_max; buf[i] = min > center_[i] ? min : (max < center_[i] ? max : center_[i]); } - return distance_(center_post_, POST(buf)); + return distance_(center_post_, this->post(buf)); } private: - const PhPoint center_; + const KeyInternal center_; // center after post processing == the external representation - const KEY center_post_; + const KeyExternal center_post_; double current_distance_; - std::priority_queue< - PhEntryDist, - std::vector>, - ComparePhEntryDistByDistance> + std::priority_queue, CompareEntryDistByDistance> queue_; int num_found_results_; int num_requested_results_; @@ -163,4 +161,4 @@ class PhIteratorKnnHS : public PhIteratorBase { } // namespace improbable::phtree::v16 -#endif // PHTREE_V16_PH_QUERY_KNN_HS_H +#endif // PHTREE_V16_QUERY_KNN_HS_H diff --git a/phtree/v16/iterator_simple.h b/phtree/v16/iterator_simple.h new file mode 100644 index 00000000..dd8839f2 --- /dev/null +++ b/phtree/v16/iterator_simple.h @@ -0,0 +1,68 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_ITERATOR_SIMPLE_H +#define PHTREE_V16_ITERATOR_SIMPLE_H + +#include "iterator_base.h" +#include "../common/common.h" + +namespace improbable::phtree::v16 { + +template +class IteratorSimple : public IteratorBase { + static constexpr dimension_t DIM = CONVERT::DimInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using Entry = typename IteratorBase::Entry; + + public: + explicit IteratorSimple(const CONVERT& converter) : IteratorBase(converter) { + this->SetFinished(); + } + + explicit IteratorSimple( + const Entry* current_result, + const Entry* current_node, + const Entry* parent_node, + CONVERT converter) + : IteratorBase(converter) { + if (current_result) { + this->SetCurrentResult(current_result); + this->SetCurrentNodeEntry(current_node); + this->SetParentNodeEntry(parent_node); + } else { + this->SetFinished(); + } + } + + IteratorSimple& operator++() { + this->SetFinished(); + return *this; + } + + IteratorSimple operator++(int) { + IteratorSimple iterator(*this); + ++(*this); + return iterator; + } +}; + +template +using IteratorEnd = IteratorSimple; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_ITERATOR_SIMPLE_H diff --git a/phtree/v16/node.h b/phtree/v16/node.h index a9693c55..7f550c59 100644 --- a/phtree/v16/node.h +++ b/phtree/v16/node.h @@ -17,9 +17,9 @@ #ifndef PHTREE_V16_NODE_H #define PHTREE_V16_NODE_H -#include "../common/ph_common.h" -#include "../common/ph_tree_stats.h" -#include "ph_entry.h" +#include "entry.h" +#include "../common/common.h" +#include "../common/tree_stats.h" #include "phtree_v16.h" #include @@ -36,18 +36,16 @@ namespace improbable::phtree::v16 { * - 'std::map` is the least efficient for small node sizes but scales best with larger nodes and * dimensionality. Remember that n_max = 2^DIM. */ -template +template using EntryMap = typename std::conditional< DIM <= 3, - array_map, (1 << DIM)>, - typename std:: - conditional>, std::map>>:: - type>::type; + array_map, + typename std::conditional, std::map>::type>::type; -template -using EntryIterator = decltype(EntryMap().begin()); -template -using EntryIteratorC = decltype(EntryMap().cbegin()); +template +using EntryIterator = decltype(EntryMap().begin()); +template +using EntryIteratorC = decltype(EntryMap().cbegin()); namespace { @@ -64,16 +62,16 @@ namespace { * @param child_node The node to be removed from the parent node. * @param parent_node Current owner of the child node. */ -template -void MergeIntoParent( - const PhPoint& prefix_of_child_in_parent, Node& child_node, Node& parent) { +template +void MergeIntoParent(Node& child_node, Node& parent) { assert(child_node.GetEntryCount() == 1); // At this point we have found an entry that needs to be removed. We also know that we need to // remove the child node because it contains at most one other entry and it is not the root // node. - auto& entry = child_node.Entries().begin()->second; + auto map_entry = child_node.Entries().begin(); + auto& entry = map_entry->second; - auto hc_pos_in_parent = CalcPosInArray(prefix_of_child_in_parent, parent.GetPostfixLen()); + auto hc_pos_in_parent = CalcPosInArray(entry.GetKey(), parent.GetPostfixLen()); auto& parent_entry = parent.Entries().find(hc_pos_in_parent)->second; if (entry.IsNode()) { @@ -85,19 +83,13 @@ void MergeIntoParent( // Now move the single entry into the parent, the position in the parent is the same as the // child_node. - // We need the double 'move' here because moving anything into the parent_entry causes the - // destructors to be called first on child_node and everything referenced from it. If we were - // not moving the single child entry away first, it would be destructed by the destructor of - // child_node. - // TODO This is really bad, we are calling the copy constructor twice here.... (try 'const'...) - auto temporary_entry = std::move(entry); - parent_entry = std::move(temporary_entry); + parent_entry.ReplaceNodeWithDataFromEntry(std::move(entry)); } } // namespace /* * A node of the PH-Tree. It contains up to 2^DIM entries, each entry being either a leaf with data - * of type T or a child node (both are of the variant type PhEntry). + * of type T or a child node (both are of the variant type Entry). * * The keys (coordinates) of all entries of a node have the same prefix, where prefix refers to the * first 'n' bits of their keys. 'n' is equivalent to "n = w - GetPostLen() - 1", where 'w' is the @@ -114,22 +106,25 @@ void MergeIntoParent( * A node always has at least two entries, except for the root node which can have fewer entries. * None of the functions in this class are recursive, see Emplace(). */ -template +template class Node { + using Key = PhPoint; + using Entry = Entry; + public: Node(bit_width_t infix_len, bit_width_t postfix_len) : postfix_len_(postfix_len), infix_len_(infix_len), entries_{} { - assert(infix_len_ < MAX_BIT_WIDTH); + assert(infix_len_ < MAX_BIT_WIDTH); assert(infix_len >= 0); } // Nodes should never be copied! - Node(const Node&) = delete; - Node(Node&&) = delete; - Node& operator=(const Node&) = delete; - Node& operator=(Node&&) = delete; + Node(const Node&) = delete; + Node(Node&&) = delete; + Node& operator=(const Node&) = delete; + Node& operator=(Node&&) = delete; - [[nodiscard]] node_size_t GetEntryCount() const { + [[nodiscard]] auto GetEntryCount() const { return entries_.size(); } @@ -169,24 +164,16 @@ class Node { * @param __args Constructor arguments for creating a value T that can be inserted for the key. */ template - PhEntry* Emplace(bool& is_inserted, const PhPoint& key, _Args&&... __args) { + Entry* Emplace(bool& is_inserted, const Key& key, _Args&&... __args) { hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); - - // We do find() _and_ emplace() here. Why? - // We tried using only emplace(), but that requires either PhEntry to be constructed - // beforehand, which is expensive, or we use the following, which is apparently even more - // expensive: - // std::piecewise_construct, - // std::forward_as_tuple(pos), - // std::forward_as_tuple(key, std::forward<_Args>(__args)...)); - // - auto entry = entries_.find(hc_pos); - if (entry == entries_.end()) { + auto emplace_result = entries_.try_emplace(hc_pos, key, std::forward<_Args>(__args)...); + auto& entry = emplace_result.first->second; + // Return if emplace succeed, i.e. there was no entry. + if (emplace_result.second) { is_inserted = true; - T t{std::forward<_Args>(__args)...}; - return &entries_.emplace(hc_pos, PhEntry{key, t}).first->second; + return &entry; } - return HandleCollision(entry->second, is_inserted, key, std::forward<_Args>(__args)...); + return HandleCollision(entry, is_inserted, key, std::forward<_Args>(__args)...); } /* @@ -196,7 +183,7 @@ class Node { * @param parent parent node * @return The sub node or null. */ - const PhEntry* Find(const PhPoint& key) const { + const Entry* Find(const Key& key) const { hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); const auto& entry = entries_.find(hc_pos); if (entry != entries_.end() && DoesEntryMatch(entry->second, key)) { @@ -215,7 +202,7 @@ class Node { * @param found This is and output parameter and will be set to 'true' if a value was removed. * @return A child node if the provided key leads to a child node. */ - Node* Erase(const PhPoint& key, Node* parent, bool& found) { + Node* Erase(const Key& key, Node* parent, bool& found) { hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); auto it = entries_.find(hc_pos); if (it != entries_.end() && DoesEntryMatch(it->second, key)) { @@ -226,28 +213,28 @@ class Node { found = true; if (parent && GetEntryCount() == 1) { - MergeIntoParent(key, *this, *parent); + MergeIntoParent(*this, *parent); // WARNING: (this) is deleted here, do not refer to it beyond this point. } } return nullptr; } - EntryMap& Entries() { + auto& Entries() { return entries_; } - const EntryMap& Entries() const { + const auto& Entries() const { return entries_; } void GetStats(PhTreeStats& stats, bit_width_t current_depth = 0) const { - node_size_t num_children = entries_.size(); + size_t num_children = entries_.size(); ++stats.n_nodes_; ++stats.infix_hist_[GetInfixLen()]; ++stats.node_depth_hist_[current_depth]; - ++stats.node_size_log_hist_[32 - CountLeadingZeros(num_children)]; + ++stats.node_size_log_hist_[32 - CountLeadingZeros(std::uint32_t(num_children))]; stats.n_total_children_ += num_children; current_depth += GetInfixLen(); @@ -287,25 +274,24 @@ class Node { } void SetInfixLen(bit_width_t newInfLen) { - assert(newInfLen < MAX_BIT_WIDTH); + assert(newInfLen < MAX_BIT_WIDTH); assert(newInfLen >= 0); infix_len_ = newInfLen; } private: template - PhEntry& WriteValue(hc_pos_t hc_pos, const PhPoint& new_key, _Args&&... __args) { - return entries_.emplace(hc_pos, PhEntry{new_key, std::forward<_Args>(__args)...}) - .first->second; + auto& WriteValue(hc_pos_t hc_pos, const Key& new_key, _Args&&... __args) { + return entries_.try_emplace(hc_pos, new_key, std::forward<_Args>(__args)...).first->second; } - void WriteEntry(hc_pos_t hc_pos, PhEntry&& entry) { + void WriteEntry(hc_pos_t hc_pos, Entry&& entry) { if (entry.IsNode()) { auto& node = entry.GetNode(); bit_width_t new_subnode_infix_len = postfix_len_ - node.postfix_len_ - 1; node.SetInfixLen(new_subnode_infix_len); } - entries_.emplace(hc_pos, std::move(entry)); + entries_.try_emplace(hc_pos, std::move(entry)); } /* @@ -316,18 +302,15 @@ class Node { * inserted by this function. * @param new_key The key of the entry to be inserted * @param __args The constructor arguments for a new value T of a the new entry to be inserted - * @return A PhEntry that may contain a child node, a newly created entry or an existing entry. + * @return A Entry that may contain a child node, a newly created entry or an existing entry. * A child node indicates that no entry was inserted, but the caller should try inserting into * the child node. A newly created entry (indicated by is_inserted=true) indicates successful * insertion. An existing entry (indicated by is_inserted=false) indicates that there is already * an entry with the exact same key as new_key, so insertion has failed. */ template - PhEntry* HandleCollision( - PhEntry& existing_entry, - bool& is_inserted, - const PhPoint& new_key, - _Args&&... __args) { + auto* HandleCollision( + Entry& existing_entry, bool& is_inserted, const Key& new_key, _Args&&... __args) { assert(!is_inserted); // We have two entries in the same location (local pos). // Now we need to compare the keys. @@ -361,17 +344,17 @@ class Node { } template - PhEntry* InsertSplit( - PhEntry& current_entry, - const PhPoint& new_key, + auto* InsertSplit( + Entry& current_entry, + const Key& new_key, bit_width_t max_conflicting_bits, _Args&&... __args) { - const PhPoint current_key = current_entry.GetKey(); + const auto current_key = current_entry.GetKey(); // determine length of infix bit_width_t new_local_infix_len = GetPostfixLen() - max_conflicting_bits; bit_width_t new_postfix_len = max_conflicting_bits - 1; - auto new_sub_node = std::make_unique>(new_local_infix_len, new_postfix_len); + auto new_sub_node = std::make_unique(new_local_infix_len, new_postfix_len); hc_pos_t pos_sub_1 = CalcPosInArray(new_key, new_postfix_len); hc_pos_t pos_sub_2 = CalcPosInArray(current_key, new_postfix_len); @@ -395,11 +378,11 @@ class Node { * @return 'true' iff the relevant part of the key matches (prefix for nodes, whole key for * other entries). */ - bool DoesEntryMatch(const PhEntry& entry, const PhPoint& key) const { + bool DoesEntryMatch(const Entry& entry, const Key& key) const { if (entry.IsNode()) { - const Node& sub = entry.GetNode(); + const auto& sub = entry.GetNode(); if (sub.GetInfixLen() > 0) { - const bit_mask_t mask = MAX_MASK << (sub.GetPostfixLen() + 1); + const bit_mask_t mask = MAX_MASK << (sub.GetPostfixLen() + 1); return KeyEquals(entry.GetKey(), key, mask); } return true; @@ -417,7 +400,7 @@ class Node { // The number of bits between this node and the parent node. For 64bit keys possible values // range from 0 to 62. bit_width_t infix_len_; - EntryMap entries_; + EntryMap entries_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/ph_iterator_base.h b/phtree/v16/ph_iterator_base.h deleted file mode 100644 index 4b8529a6..00000000 --- a/phtree/v16/ph_iterator_base.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_V16_PH_ITERATOR_BASE_H -#define PHTREE_V16_PH_ITERATOR_BASE_H - -#include "../common/ph_common.h" -#include "ph_entry.h" - -namespace improbable::phtree::v16 { - -/* - * Base class for all PH-Tree iterators. - */ -template < - dimension_t DIM, - typename T, - typename KEY, - PhPostprocessor POST, - typename FILTER = PhFilterNoOp> -class PhIteratorBase { - public: - PhIteratorBase() : current_result_{nullptr}, is_finished_{false}, filter_{FILTER()} {} - - explicit PhIteratorBase(FILTER filter) - : current_result_{nullptr}, is_finished_{false}, filter_(std::move(filter)) {} - - T& operator*() const { - assert(current_result_); - return current_result_->GetValue(); - } - - T* operator->() const { - assert(current_result_); - return ¤t_result_->GetValue(); - } - - template - friend bool operator==( - const PhIteratorBase& left, - const PhIteratorBase& right) { - // Note: The following compares pointers to PhEntry objects so it should be - // a) fast (i.e. not comparing contents of entries) - // b) return `false` when comparing apparently identical entries from different PH-Trees (as - // intended) - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_result_ == right.GetCurrentResult()); - } - - template - friend bool operator!=( - const PhIteratorBase& left, - const PhIteratorBase& right) { - return !(left == right); - } - - KEY first() const { - return POST(current_result_->GetKey()); - } - - T& second() const { - return current_result_->GetValue(); - } - - [[nodiscard]] bool Finished() const { - return is_finished_; - } - - const PhEntry* GetCurrentResult() const { - return current_result_; - } - - protected: - void SetFinished() { - is_finished_ = true; - current_result_ = nullptr; - } - - [[nodiscard]] bool ApplyFilter(const PhEntry& entry) const { - return entry.IsNode() - ? filter_.IsNodeValid(entry.GetKey(), entry.GetNode().GetPostfixLen() + 1) - : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); - } - - void SetCurrentResult(const PhEntry* current_result) { - current_result_ = current_result; - } - - private: - const PhEntry* current_result_; - bool is_finished_; - FILTER filter_; -}; - -} // namespace improbable::phtree::v16 - -#endif // PHTREE_V16_PH_ITERATOR_BASE_H diff --git a/phtree/v16/ph_iterator_simple.h b/phtree/v16/ph_iterator_simple.h deleted file mode 100644 index 790b0a24..00000000 --- a/phtree/v16/ph_iterator_simple.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_V16_PH_ITERATOR_SIMPLE_H -#define PHTREE_V16_PH_ITERATOR_SIMPLE_H - -#include "../common/ph_common.h" -#include "ph_iterator_base.h" - -namespace improbable::phtree::v16 { - -template POST> -class PhTreeV16; - -template -class PhEntry; - -template POST> -class PhIteratorSimple : public PhIteratorBase { - public: - PhIteratorSimple() : PhIteratorBase() { - this->SetFinished(); - } - - explicit PhIteratorSimple(const PhEntry* e) : PhIteratorBase() { - this->SetCurrentResult(e); - } - - PhIteratorSimple& operator++() { - FindNextElement(); - return *this; - } - - PhIteratorSimple operator++(int) { - PhIteratorSimple iterator(*this); - ++(*this); - return iterator; - } - - protected: - void FindNextElement() { - this->SetFinished(); - } -}; - -template POST> -using PhIteratorEnd = PhIteratorSimple; - -} // namespace improbable::phtree::v16 - -#endif // PHTREE_V16_PH_ITERATOR_SIMPLE_H diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h index d8e78517..a09351fe 100644 --- a/phtree/v16/phtree_v16.h +++ b/phtree/v16/phtree_v16.h @@ -14,15 +14,17 @@ * limitations under the License. */ -#ifndef PHTREE_V16_PHTREEV16_H -#define PHTREE_V16_PHTREEV16_H +#ifndef PHTREE_V16_PHTREE_V16_H +#define PHTREE_V16_PHTREE_V16_H -#include "debug_helper.h" +#include "debug_helper_v16.h" +#include "for_each.h" +#include "for_each_hc.h" +#include "iterator_full.h" +#include "iterator_hc.h" +#include "iterator_knn_hs.h" +#include "iterator_simple.h" #include "node.h" -#include "ph_iterator_full.h" -#include "ph_iterator_hc.h" -#include "ph_iterator_knn_hs.h" -#include "ph_iterator_simple.h" namespace improbable::phtree::v16 { @@ -43,21 +45,35 @@ namespace improbable::phtree::v16 { * - T. Zaeschke: "The PH-Tree Revisited", (2015) * - T. Zaeschke, M.C. Norrie: "Efficient Z-Ordered Traversal of Hypercube Indexes" (BTW 2017). * - * @tparam T Value type. - * @tparam DIM Dimensionality. This is the number of dimensions of the space to index. + * @tparam T Value type. + * @tparam DIM Dimensionality. This is the number of dimensions of the space to index. + * @tparam CONVERT A converter class with a 'pre()' and a 'post()' function. 'pre()' translates + * external KEYs into the internal PhPoint type. 'post()' + * translates the PhPoint back to the external KEY type. */ -template < - dimension_t DIM, - typename T, - typename KEY = PhPoint, - PhPostprocessor POST = PrePostNoOp> +template > class PhTreeV16 { friend PhTreeDebugHelper; + using ScalarExternal = typename CONVERT::ScalarExternal; + using ScalarInternal = typename CONVERT::ScalarInternal; + using Key = typename CONVERT::KeyInternal; + using Node = Node; + using Entry = Entry; public: static_assert(!std::is_reference::value, "Reference type value are not supported."); + static_assert(std::is_signed::value, "ScalarInternal must be a signed type"); + static_assert( + std::is_integral::value, "ScalarInternal must be an integral type"); + static_assert( + std::is_arithmetic::value, "ScalarExternal must be an arithmetic type"); + static_assert(DIM >= 1 && DIM <= 63, "This PH-Tree supports between 1 and 63 dimensions"); - PhTreeV16() : num_entries_{0}, root_{0, MAX_BIT_WIDTH - 1} {} + PhTreeV16(CONVERT& converter = ConverterNoOp()) + : num_entries_{0} + , root_{0, MAX_BIT_WIDTH - 1} + , the_end_{converter} + , converter_{converter} {} /* * Attempts to build and insert a key and a value into the tree. @@ -74,7 +90,7 @@ class PhTreeV16 { * entry instead of inserting a new one. */ template - std::pair emplace(const PhPoint& key, _Args&&... __args) { + std::pair emplace(const Key& key, _Args&&... __args) { auto* current_entry = &root_; bool is_inserted = false; while (current_entry->IsNode()) { @@ -85,13 +101,63 @@ class PhTreeV16 { return {current_entry->GetValue(), is_inserted}; } + /* + * The emplace_hint() method uses an iterator as hint for insertion. + * The hint is ignored if it is not useful or is equal to end(). + * + * Iterators should normally not be used after the tree has been modified. As an exception to + * this rule, an iterator can be used as hint if it was previously used with at most one call + * to erase() and if no other modifications occurred. + * The following is valid: + * + * // Move value from key1 to key2 + * auto iter = tree.find(key1); + * auto value = iter.second(); // The value may become invalid in erase() + * erase(iter); + * emplace_hint(iter, key2, value); // the iterator can still be used as hint here + */ + template + std::pair emplace_hint(const ITERATOR& iterator, const Key& key, _Args&&... __args) { + // This function can be used to insert a value close to a known value + // or close to a recently removed value. The hint can only be used if the new key is + // inside one of the nodes provided by the hint iterator. + // The idea behind using the 'parent' is twofold: + // - The 'parent' node is one level above the iterator position, it therefore is spatially + // larger and has a better probability of containing the new position, allowing for + // fast track emplace. + // - Using 'parent' allows a scenario where the iterator was previously used with + // erase(iterator). This is safe because erase() will never erase the 'parent' node. + + if (!iterator.GetParentNodeEntry()) { + // No hint available, use standard emplace() + return emplace(key, std::forward<_Args>(__args)...); + } + + auto* parent_entry = iterator.GetParentNodeEntry(); + if (NumberOfDivergingBits(key, parent_entry->GetKey()) > + parent_entry->GetNode().GetPostfixLen() + 1) { + // replace higher up in the tree + return emplace(key, std::forward<_Args>(__args)...); + } + + // replace in node + auto* current_entry = parent_entry; + bool is_inserted = false; + while (current_entry->IsNode()) { + current_entry = + current_entry->GetNode().Emplace(is_inserted, key, std::forward<_Args>(__args)...); + } + num_entries_ += is_inserted; + return {current_entry->GetValue(), is_inserted}; + } + /* * See std::map::insert(). * * @return a pair consisting of the inserted element (or to the element that prevented the * insertion) and a bool denoting whether the insertion took place. */ - std::pair insert(const PhPoint& key, const T& value) { + std::pair insert(const Key& key, const T& value) { return emplace(key, value); } @@ -99,7 +165,7 @@ class PhTreeV16 { * @return the value stored at position 'key'. If no such value exists, one is added to the tree * and returned. */ - T& operator[](const PhPoint& key) { + T& operator[](const Key& key) { return emplace(key).first; } @@ -108,7 +174,7 @@ class PhTreeV16 { * * @return '1', if a value is associated with the provided key, otherwise '0'. */ - size_t count(const PhPoint& key) const { + size_t count(const Key& key) const { if (empty()) { return 0; } @@ -127,20 +193,21 @@ class PhTreeV16 { * @return an iterator that points either to the associated value or to {@code end()} if the key * was found */ - PhIteratorSimple find(const PhPoint& key) const { + auto find(const Key& key) const { if (empty()) { - return {}; + return IteratorSimple(converter_); } - auto* current_entry = &root_; + const Entry* current_entry = &root_; + const Entry* current_node = nullptr; + const Entry* parent_node = nullptr; while (current_entry && current_entry->IsNode()) { + parent_node = current_node; + current_node = current_entry; current_entry = current_entry->GetNode().Find(key); } - if (current_entry) { - return PhIteratorSimple(current_entry); - } - return {}; + return IteratorSimple(current_entry, current_node, parent_node, converter_); } /* @@ -148,9 +215,9 @@ class PhTreeV16 { * * @return '1' if a value was found, otherwise '0'. */ - size_t erase(const PhPoint& key) { + size_t erase(const Key& key) { auto* current_node = &root_.GetNode(); - Node* parent_node = nullptr; + Node* parent_node = nullptr; bool found = false; while (current_node) { auto* child_node = current_node->Erase(key, parent_node, found); @@ -161,36 +228,110 @@ class PhTreeV16 { return found; } + /* + * See std::map::erase(). Removes any value at the given iterator location. + * + * + * + * WARNING + * While this is guaranteed to work correctly, only iterators returned from find() + * will result in erase(iterator) being faster than erase(key). + * Iterators returned from other functions may be optimized in a future version. + * + * @return '1' if a value was found, otherwise '0'. + */ + template + size_t erase(const ITERATOR& iterator) { + if (iterator.Finished()) { + return 0; + } + if (!iterator.GetParentNodeEntry()) { + // Why may there be no parent? + // - we are in the root node + // - the iterator did not set this value + // In either case, we need to start searching from the top. + return erase(iterator.GetCurrentResult()->GetKey()); + } + bool found = false; + assert(iterator.GetCurrentNodeEntry() && iterator.GetCurrentNodeEntry()->IsNode()); + iterator.GetCurrentNodeEntry()->GetNode().Erase( + iterator.GetCurrentResult()->GetKey(), + &iterator.GetParentNodeEntry()->GetNode(), + found); + + num_entries_ -= found; + return found; + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @param callback The callback function to be called for every entry that matches the query. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + */ + template + void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { + ForEach(converter_, callback, filter).run(root_); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param callback The callback function to be called for every entry that matches the query. + * The callback requires the following signature: callback(const PhPoint &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + */ + template + void for_each( + const PhBox& query_box, + CALLBACK_FN& callback, + FILTER filter = FILTER()) const { + ForEachHC( + query_box.min(), query_box.max(), converter_, callback, filter) + .run(root_); + } + /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter - * functions must implement the same signature as the default 'PhFilterNoOp'. + * functions must implement the same signature as the default 'FilterNoOp'. * * @return an iterator over all (filtered) entries in the tree, */ - template > + template auto begin(FILTER filter = FILTER()) const { - return PhIteratorFull(root_, filter); + return IteratorFull(root_, converter_, filter); } /* * Performs a rectangular window query. The parameters are the min and max keys which * contain the minimum respectively the maximum keys in every dimension. - * @param min Minimum values - * @param max Maximum values + * @param query_box The query window. * @param filter An optional filter function. The filter function allows filtering entries and * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'PhFilterNoOp`. + * signature of the default 'FilterNoOp`. * @return Result iterator. */ - template > - auto begin_query( - const PhPoint& min, const PhPoint& max, FILTER filter = FILTER()) const { - return PhIteratorHC(root_, min, max, filter); + template + auto begin_query(const PhBox& query_box, FILTER filter = FILTER()) const { + return IteratorHC( + root_, query_box.min(), query_box.max(), converter_, filter); } /* * Locate nearest neighbors for a given point in space. + * + * Example for distance function: auto fn = DistanceEuclidean + * auto iter = tree.begin_knn_query> + * * @param min_results number of entries to be returned. More entries may or may not be returned * when several entries have the same distance. * @param center center point @@ -199,16 +340,14 @@ class PhTreeV16 { * calculated. * @return Result iterator. */ - template < - typename DISTANCE = PhDistanceLongEuclidean, - typename FILTER = PhFilterNoOp> + template auto begin_knn_query( size_t min_results, - const PhPoint& center, + const Key& center, DISTANCE distance_function = DISTANCE(), FILTER filter = FILTER()) const { - return PhIteratorKnnHS( - root_, min_results, center, distance_function, filter); + return IteratorKnnHS( + root_, min_results, center, converter_, distance_function, filter); } /* @@ -223,7 +362,7 @@ class PhTreeV16 { */ void clear() { num_entries_ = 0; - root_ = PhEntry(0, MAX_BIT_WIDTH - 1); + root_ = Entry(0, MAX_BIT_WIDTH - 1); } /* @@ -252,10 +391,11 @@ class PhTreeV16 { size_t num_entries_; // Contract: root_ contains a Node with 0 or more entries (the root node is the only Node // that is allowed to have less than two entries. - PhEntry root_; - PhIteratorEnd the_end_; + Entry root_; + IteratorEnd the_end_; + CONVERT converter_; }; } // namespace improbable::phtree::v16 -#endif // PHTREE_V16_PHTREEV16_H +#endif // PHTREE_V16_PHTREE_V16_H diff --git a/tools/bazel b/tools/bazel index 43883f99..bda59d1e 100755 --- a/tools/bazel +++ b/tools/bazel @@ -12,13 +12,13 @@ source "${TOOLS_DIR}"/../ci/includes/bazel.sh REQUIRED_BAZEL_VERSION="$(getBazelVersion)" BAZEL_INSTALLATION_DIR="${HOME}/.bazel_installations/${REQUIRED_BAZEL_VERSION}" if isLinux; then - REQUIRED_BAZEL_SHA256="4df79462c6c3ecdeeee7af99fc269b52ab1aa4828ef3bc359c1837d3fafeeee7" + REQUIRED_BAZEL_SHA256="1a64c807716e10c872f1618852d95f4893d81667fe6e691ef696489103c9b460" REQUIRED_BAZEL_SHA256CMD="sha256sum" DOWNLOAD_CMD="wget -q --no-clobber -O bazel" BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-linux-x86_64" - if [[ "$(lsb_release -cs)" != "bionic" ]]; then - echo -e "\033[0;33mWarning: You don't seem to be running Ubuntu 18.04, which is the supported Linux distribution. Continuing anyway, but your mileage might vary.\033[0m" + if [[ !("$(lsb_release -cs)" =~ ^(bionic|focal)$) ]]; then + echo -e "\033[0;33mWarning: You don't seem to be running Ubuntu 18.04 or 20.04, which is the supported Linux distribution. Continuing anyway, but your mileage might vary.\033[0m" fi if which clang-9 1>/dev/null; then @@ -28,12 +28,12 @@ if isLinux; then echo -e "\033[0;33mWarning: You don't seem to have clang-9 correctly installed. Please check README.md to ensure your compiler is set up correctly. Continuing with whatever compiler bazel detects, your mileage might vary.\033[0m" fi elif isMacOS; then - REQUIRED_BAZEL_SHA256="3eca4c96cfda97a9d5f8d3d0dec4155a5cc5ff339b10d3f35213c398bf13881e" + REQUIRED_BAZEL_SHA256="23ea93e5ef9f381babd7f658bbbf12d366510f0a719e816fea90bcbe9737aef2" REQUIRED_BAZEL_SHA256CMD="shasum -a 256" DOWNLOAD_CMD="wget -q --no-clobber -O bazel" BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-darwin-x86_64" else - REQUIRED_BAZEL_SHA256="cc7b3ff6f4bfd6bc2121a80656afec66ee57713e8b88e9d2fb58b4eddf271268" + REQUIRED_BAZEL_SHA256="d526f04197a1362d95f1d214d2aa51cde527fae01c5752ccea1167475c6f1c60" REQUIRED_BAZEL_SHA256CMD="sha256sum" DOWNLOAD_CMD="curl -L -s -o bazel.exe" # Windows does not have an installer but retrieves the executable directly. diff --git a/tools/build_rules/http.bzl b/tools/build_rules/http.bzl index a3cfa6e6..26e5ba2f 100644 --- a/tools/build_rules/http.bzl +++ b/tools/build_rules/http.bzl @@ -1,6 +1,3 @@ -# The following is a fork of the upstream bazel rules from tools/build_defs/repo/http.bzl that fixes -# netrc file lookup on Windows. - # Copyright 2016 The Bazel Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,11 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """Rules for downloading files and archives over HTTP. - ### Setup - To use these rules, load them in your `WORKSPACE` file as follows: - ```python load( "@bazel_tools//tools/build_defs/repo:http.bzl", @@ -28,15 +22,12 @@ load( "http_jar", ) ``` - These rules are improved versions of the native http rules and will eventually replace the native rules. """ load( - # BEGIN IMPROBABLE EDIT: use upstream utils.bzl - "@bazel_tools//tools/build_defs/repo:utils.bzl", - # END IMPROBABLE EDIT + ":utils.bzl", "patch", "read_netrc", "update_attrs", @@ -44,28 +35,49 @@ load( "workspace_and_buildfile", ) +# Shared between http_jar, http_file and http_archive. +_AUTH_PATTERN_DOC = """An optional dict mapping host names to custom authorization patterns. +If a URL's host name is present in this dict the value will be used as a pattern when +generating the authorization header for the http request. This enables the use of custom +authorization schemes used in a lot of common cloud storage providers. +The pattern currently supports 2 tokens: <login> and +<password>, which are replaced with their equivalent value +in the netrc file for the same host name. After formatting, the result is set +as the value for the Authorization field of the HTTP request. +Example attribute and netrc for a http download to an oauth2 enabled API using a bearer token: +
+auth_patterns = {
+    "storage.cloudprovider.com": "Bearer <password>"
+}
+
+netrc: +
+machine storage.cloudprovider.com
+        password RANDOM-TOKEN
+
+The final HTTP request would have the following header: +
+Authorization: Bearer RANDOM-TOKEN
+
+""" + def _get_auth(ctx, urls): """Given the list of URLs obtain the correct auth dict.""" if ctx.attr.netrc: netrc = read_netrc(ctx, ctx.attr.netrc) - return use_netrc(netrc, urls) - - if "HOME" in ctx.os.environ: - if not ctx.os.name.startswith("windows"): - netrcfile = "%s/.netrc" % (ctx.os.environ["HOME"],) - if ctx.execute(["test", "-f", netrcfile]).return_code == 0: - netrc = read_netrc(ctx, netrcfile) - return use_netrc(netrc, urls) + return use_netrc(netrc, urls, ctx.attr.auth_patterns) - # BEGIN IMPROBABLE EDIT: lookup netrc file on Windows - if "USERPROFILE" in ctx.os.environ: - if ctx.os.name.startswith("windows"): - netrcfile = "%s/.netrc" % (ctx.os.environ["USERPROFILE"],) - if ctx.execute(["test", "-f", netrcfile]).return_code == 0: - netrc = read_netrc(ctx, netrcfile) - return use_netrc(netrc, urls) + if "HOME" in ctx.os.environ and not ctx.os.name.startswith("windows"): + netrcfile = "%s/.netrc" % (ctx.os.environ["HOME"]) + if ctx.execute(["test", "-f", netrcfile]).return_code == 0: + netrc = read_netrc(ctx, netrcfile) + return use_netrc(netrc, urls, ctx.attr.auth_patterns) - # END IMPROBABLE EDIT + if "USERPROFILE" in ctx.os.environ and ctx.os.name.startswith("windows"): + netrcfile = "%s/.netrc" % (ctx.os.environ["USERPROFILE"]) + if ctx.path(netrcfile).exists: + netrc = read_netrc(ctx, netrcfile) + return use_netrc(netrc, urls, ctx.attr.auth_patterns) return {} @@ -100,7 +112,6 @@ def _http_archive_impl(ctx): _HTTP_FILE_BUILD = """ package(default_visibility = ["//visibility:public"]) - filegroup( name = "file", srcs = ["{}"], @@ -138,21 +149,17 @@ def _http_file_impl(ctx): _HTTP_JAR_BUILD = """ load("@rules_java//java:defs.bzl", "java_import") - package(default_visibility = ["//visibility:public"]) - java_import( name = 'jar', jars = ['downloaded.jar'], visibility = ['//visibility:public'], ) - filegroup( name = 'file', srcs = ['downloaded.jar'], visibility = ['//visibility:public'], ) - """ def _http_jar_impl(ctx): @@ -178,10 +185,8 @@ _http_archive_attrs = { "url": attr.string( doc = """A URL to a file that will be made available to Bazel. - This must be a file, http or https URL. Redirections are followed. Authentication is not supported. - This parameter is to simplify the transition from the native http_archive rule. More flexibility can be achieved by the urls parameter that allows to specify alternative URLs to fetch from. @@ -190,13 +195,11 @@ to specify alternative URLs to fetch from. "urls": attr.string_list( doc = """A list of URLs to a file that will be made available to Bazel. - Each entry must be a file, http or https URL. Redirections are followed. Authentication is not supported.""", ), "sha256": attr.string( doc = """The expected SHA-256 of the file downloaded. - This must match the SHA-256 of the file downloaded. _It is a security risk to omit the SHA-256 as remote files can change._ At best omitting this field will make your build non-hermetic. It is optional to make development @@ -205,27 +208,26 @@ easier but should be set before shipping.""", "netrc": attr.string( doc = "Location of the .netrc file to use for authentication", ), + "auth_patterns": attr.string_dict( + doc = _AUTH_PATTERN_DOC, + ), "canonical_id": attr.string( doc = """A canonical id of the archive downloaded. - If specified and non-empty, bazel will not take the archive from cache, unless it was added to the cache by a request with the same canonical id. """, ), "strip_prefix": attr.string( doc = """A directory prefix to strip from the extracted files. - Many archives contain a top-level directory that contains all of the useful files in archive. Instead of needing to specify this prefix over and over in the `build_file`, this field can be used to strip it from all of the extracted files. - For example, suppose you are using `foo-lib-latest.zip`, which contains the directory `foo-lib-1.2.3/` under which there is a `WORKSPACE` file and are `src/`, `lib/`, and `test/` directories that contain the actual code you wish to build. Specify `strip_prefix = "foo-lib-1.2.3"` to use the `foo-lib-1.2.3` directory as your top-level directory. - Note that if there are files outside of this directory, they will be discarded and inaccessible (e.g., a top-level license file). This includes files/directories that start with the prefix but are not in the directory @@ -234,7 +236,6 @@ match a directory in the archive, Bazel will return an error.""", ), "type": attr.string( doc = """The archive type of the downloaded file. - By default, the archive type is determined from the file extension of the URL. If the file has no extension, you can explicitly specify one of the following: `"zip"`, `"jar"`, `"war"`, `"tar"`, `"tar.gz"`, `"tgz"`, @@ -312,26 +313,21 @@ http_archive = repository_rule( doc = """Downloads a Bazel repository as a compressed archive file, decompresses it, and makes its targets available for binding. - It supports the following file extensions: `"zip"`, `"jar"`, `"war"`, `"tar"`, `"tar.gz"`, `"tgz"`, `"tar.xz"`, and `tar.bz2`. - Examples: Suppose the current repository contains the source code for a chat program, rooted at the directory `~/chat-app`. It needs to depend on an SSL library which is available from http://example.com/openssl.zip. This `.zip` file contains the following directory structure: - ``` WORKSPACE src/ openssl.cc openssl.h ``` - In the local repository, the user creates a `openssl.BUILD` file which contains the following target definition: - ```python cc_library( name = "openssl-lib", @@ -339,13 +335,10 @@ Examples: hdrs = ["src/openssl.h"], ) ``` - Targets in the `~/chat-app` repository can depend on this target if the following lines are added to `~/chat-app/WORKSPACE`: - ```python load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - http_archive( name = "my_ssl", urls = ["http://example.com/openssl.zip"], @@ -353,7 +346,6 @@ Examples: build_file = "@//:openssl.BUILD", ) ``` - Then targets would specify `@my_ssl//:openssl-lib` as a dependency. """, ) @@ -368,7 +360,6 @@ _http_file_attrs = { ), "sha256": attr.string( doc = """The expected SHA-256 of the file downloaded. - This must match the SHA-256 of the file downloaded. _It is a security risk to omit the SHA-256 as remote files can change._ At best omitting this field will make your build non-hermetic. It is optional to make development @@ -376,7 +367,6 @@ easier but should be set before shipping.""", ), "canonical_id": attr.string( doc = """A canonical id of the archive downloaded. - If specified and non-empty, bazel will not take the archive from cache, unless it was added to the cache by a request with the same canonical id. """, @@ -384,13 +374,15 @@ unless it was added to the cache by a request with the same canonical id. "urls": attr.string_list( mandatory = True, doc = """A list of URLs to a file that will be made available to Bazel. - Each entry must be a file, http or https URL. Redirections are followed. Authentication is not supported.""", ), "netrc": attr.string( doc = "Location of the .netrc file to use for authentication", ), + "auth_patterns": attr.string_dict( + doc = _AUTH_PATTERN_DOC, + ), } http_file = repository_rule( @@ -399,22 +391,18 @@ http_file = repository_rule( doc = """Downloads a file from a URL and makes it available to be used as a file group. - Examples: Suppose you need to have a debian package for your custom rules. This package is available from http://example.com/package.deb. Then you can add to your WORKSPACE file: - ```python load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") - http_file( name = "my_deb", urls = ["http://example.com/package.deb"], sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", ) ``` - Targets would specify `@my_deb//file` as a dependency to depend on this file. """, ) @@ -425,7 +413,6 @@ _http_jar_attrs = { ), "canonical_id": attr.string( doc = """A canonical id of the archive downloaded. - If specified and non-empty, bazel will not take the archive from cache, unless it was added to the cache by a request with the same canonical id. """, @@ -442,6 +429,9 @@ unless it was added to the cache by a request with the same canonical id. "netrc": attr.string( doc = "Location of the .netrc file to use for authentication", ), + "auth_patterns": attr.string_dict( + doc = _AUTH_PATTERN_DOC, + ), } http_jar = repository_rule( @@ -449,29 +439,22 @@ http_jar = repository_rule( attrs = _http_jar_attrs, doc = """Downloads a jar from a URL and makes it available as java_import - Downloaded files must have a .jar extension. - Examples: Suppose the current repository contains the source code for a chat program, rooted at the directory `~/chat-app`. It needs to depend on an SSL library which is available from `http://example.com/openssl-0.2.jar`. - Targets in the `~/chat-app` repository can depend on this target if the following lines are added to `~/chat-app/WORKSPACE`: - ```python load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_jar") - http_jar( name = "my_ssl", url = "http://example.com/openssl-0.2.jar", sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", ) ``` - Targets would specify @my_ssl//jar as a dependency to depend on this jar. - You may also reference files on the current system (localhost) by using "file:///path/to/file" if you are on Unix-based systems. If you're on Windows, use "file:///c:/path/to/file". In both examples, note the three slashes (`/`) -- the first two slashes belong to `file://` and the third diff --git a/tools/build_rules/utils.bzl b/tools/build_rules/utils.bzl new file mode 100644 index 00000000..b2a70051 --- /dev/null +++ b/tools/build_rules/utils.bzl @@ -0,0 +1,322 @@ +# Copyright 2018 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utils for manipulating external repositories, once fetched. +### Setup +These utilities are intended to be used by other repository rules. They +can be loaded as follows. +```python +load( + "@bazel_tools//tools/build_defs/repo:utils.bzl", + "workspace_and_buildfile", + "patch", + "update_attrs", +) +``` +""" + +def workspace_and_buildfile(ctx): + """Utility function for writing WORKSPACE and, if requested, a BUILD file. + This rule is intended to be used in the implementation function of a + repository rule. + It assumes the parameters `name`, `build_file`, `build_file_content`, + `workspace_file`, and `workspace_file_content` to be + present in `ctx.attr`; the latter four possibly with value None. + Args: + ctx: The repository context of the repository rule calling this utility + function. + """ + if ctx.attr.build_file and ctx.attr.build_file_content: + ctx.fail("Only one of build_file and build_file_content can be provided.") + + if ctx.attr.workspace_file and ctx.attr.workspace_file_content: + ctx.fail("Only one of workspace_file and workspace_file_content can be provided.") + + if ctx.attr.workspace_file: + ctx.file("WORKSPACE", ctx.read(ctx.attr.workspace_file)) + elif ctx.attr.workspace_file_content: + ctx.file("WORKSPACE", ctx.attr.workspace_file_content) + else: + ctx.file("WORKSPACE", "workspace(name = \"{name}\")\n".format(name = ctx.name)) + + if ctx.attr.build_file: + ctx.file("BUILD.bazel", ctx.read(ctx.attr.build_file)) + elif ctx.attr.build_file_content: + ctx.file("BUILD.bazel", ctx.attr.build_file_content) + +def _is_windows(ctx): + return ctx.os.name.lower().find("windows") != -1 + +def _use_native_patch(patch_args): + """If patch_args only contains -p options, we can use the native patch implementation.""" + for arg in patch_args: + if not arg.startswith("-p"): + return False + return True + +def patch(ctx, patches = None, patch_cmds = None, patch_cmds_win = None, patch_tool = None, patch_args = None): + """Implementation of patching an already extracted repository. + This rule is intended to be used in the implementation function of + a repository rule. If the parameters `patches`, `patch_tool`, + `patch_args`, `patch_cmds` and `patch_cmds_win` are not specified + then they are taken from `ctx.attr`. + Args: + ctx: The repository context of the repository rule calling this utility + function. + patches: The patch files to apply. List of strings, Labels, or paths. + patch_cmds: Bash commands to run for patching, passed one at a + time to bash -c. List of strings + patch_cmds_win: Powershell commands to run for patching, passed + one at a time to powershell /c. List of strings. If the + boolean value of this parameter is false, patch_cmds will be + used and this parameter will be ignored. + patch_tool: Path of the patch tool to execute for applying + patches. String. + patch_args: Arguments to pass to the patch tool. List of strings. + """ + bash_exe = ctx.os.environ["BAZEL_SH"] if "BAZEL_SH" in ctx.os.environ else "bash" + powershell_exe = ctx.os.environ["BAZEL_POWERSHELL"] if "BAZEL_POWERSHELL" in ctx.os.environ else "powershell.exe" + + if patches == None and hasattr(ctx.attr, "patches"): + patches = ctx.attr.patches + if patches == None: + patches = [] + + if patch_cmds == None and hasattr(ctx.attr, "patch_cmds"): + patch_cmds = ctx.attr.patch_cmds + if patch_cmds == None: + patch_cmds = [] + + if patch_cmds_win == None and hasattr(ctx.attr, "patch_cmds_win"): + patch_cmds_win = ctx.attr.patch_cmds_win + if patch_cmds_win == None: + patch_cmds_win = [] + + if patch_tool == None and hasattr(ctx.attr, "patch_tool"): + patch_tool = ctx.attr.patch_tool + if not patch_tool: + patch_tool = "patch" + native_patch = True + else: + native_patch = False + + if patch_args == None and hasattr(ctx.attr, "patch_args"): + patch_args = ctx.attr.patch_args + if patch_args == None: + patch_args = [] + + if len(patches) > 0 or len(patch_cmds) > 0: + ctx.report_progress("Patching repository") + + if native_patch and _use_native_patch(patch_args): + if patch_args: + strip = int(patch_args[-1][2:]) + else: + strip = 0 + for patchfile in patches: + ctx.patch(patchfile, strip) + else: + for patchfile in patches: + command = "{patchtool} {patch_args} < {patchfile}".format( + patchtool = patch_tool, + patchfile = ctx.path(patchfile), + patch_args = " ".join([ + "'%s'" % arg + for arg in patch_args + ]), + ) + st = ctx.execute([bash_exe, "-c", command]) + if st.return_code: + fail("Error applying patch %s:\n%s%s" % + (str(patchfile), st.stderr, st.stdout)) + + if _is_windows(ctx) and patch_cmds_win: + for cmd in patch_cmds_win: + st = ctx.execute([powershell_exe, "/c", cmd]) + if st.return_code: + fail("Error applying patch command %s:\n%s%s" % + (cmd, st.stdout, st.stderr)) + else: + for cmd in patch_cmds: + st = ctx.execute([bash_exe, "-c", cmd]) + if st.return_code: + fail("Error applying patch command %s:\n%s%s" % + (cmd, st.stdout, st.stderr)) + +def update_attrs(orig, keys, override): + """Utility function for altering and adding the specified attributes to a particular repository rule invocation. + This is used to make a rule reproducible. + Args: + orig: dict of actually set attributes (either explicitly or implicitly) + by a particular rule invocation + keys: complete set of attributes defined on this rule + override: dict of attributes to override or add to orig + Returns: + dict of attributes with the keys from override inserted/updated + """ + result = {} + for key in keys: + if getattr(orig, key) != None: + result[key] = getattr(orig, key) + result["name"] = orig.name + result.update(override) + return result + +def maybe(repo_rule, name, **kwargs): + """Utility function for only adding a repository if it's not already present. + This is to implement safe repositories.bzl macro documented in + https://docs.bazel.build/versions/master/skylark/deploying.html#dependencies. + Args: + repo_rule: repository rule function. + name: name of the repository to create. + **kwargs: remaining arguments that are passed to the repo_rule function. + Returns: + Nothing, defines the repository when needed as a side-effect. + """ + if not native.existing_rule(name): + repo_rule(name = name, **kwargs) + +def read_netrc(ctx, filename): + """Utility function to parse at least a basic .netrc file. + Args: + ctx: The repository context of the repository rule calling this utility + function. + filename: the name of the .netrc file to read + Returns: + dict mapping a machine names to a dict with the information provided + about them + """ + contents = ctx.read(filename) + + # Parse the file. This is mainly a token-based update of a simple state + # machine, but we need to keep the line structure to correctly determine + # the end of a `macdef` command. + netrc = {} + currentmachinename = None + currentmachine = {} + macdef = None + currentmacro = "" + cmd = None + for line in contents.splitlines(): + if line.startswith("#"): + # Comments start with #. Ignore these lines. + continue + elif macdef: + # as we're in a macro, just determine if we reached the end. + if line: + currentmacro += line + "\n" + else: + # reached end of macro, add it + currentmachine[macdef] = currentmacro + macdef = None + currentmacro = "" + else: + # Essentially line.split(None) which starlark does not support. + tokens = [ + w.strip() + for w in line.split(" ") + if len(w.strip()) > 0 + ] + for token in tokens: + if cmd: + # we have a command that expects another argument + if cmd == "machine": + # a new machine definition was provided, so save the + # old one, if present + if not currentmachinename == None: + netrc[currentmachinename] = currentmachine + currentmachine = {} + currentmachinename = token + elif cmd == "macdef": + macdef = "macdef %s" % (token,) + # a new macro definition; the documentation says + # "its contents begin with the next .netrc line [...]", + # so should there really be tokens left in the current + # line, they're not part of the macro. + + else: + currentmachine[cmd] = token + cmd = None + elif token in [ + "machine", + "login", + "password", + "account", + "macdef", + ]: + # command takes one argument + cmd = token + elif token == "default": + # defines the default machine; again, store old machine + if not currentmachinename == None: + netrc[currentmachinename] = currentmachine + + # We use the empty string for the default machine, as that + # can never be a valid hostname ("default" could be, in the + # default search domain). + currentmachinename = "" + currentmachine = {} + else: + fail("Unexpected token '%s' while reading %s" % + (token, filename)) + if not currentmachinename == None: + netrc[currentmachinename] = currentmachine + return netrc + +def use_netrc(netrc, urls, patterns): + """Compute an auth dict from a parsed netrc file and a list of URLs. + Args: + netrc: a netrc file already parsed to a dict, e.g., as obtained from + read_netrc + urls: a list of URLs. + patterns: optional dict of url to authorization patterns + Returns: + dict suitable as auth argument for ctx.download; more precisely, the dict + will map all URLs where the netrc file provides login and password to a + dict containing the corresponding login, password and optional authorization pattern, + as well as the mapping of "type" to "basic" or "pattern". + """ + auth = {} + for url in urls: + schemerest = url.split("://", 1) + if len(schemerest) < 2: + continue + if not (schemerest[0] in ["http", "https"]): + # For other protocols, bazel currently does not support + # authentication. So ignore them. + continue + host = schemerest[1].split("/")[0].split(":")[0] + if not host in netrc: + continue + authforhost = netrc[host] + if host in patterns: + auth_dict = { + "type": "pattern", + "pattern": patterns[host], + } + + if "login" in authforhost: + auth_dict["login"] = authforhost["login"] + + if "password" in authforhost: + auth_dict["password"] = authforhost["password"] + + auth[url] = auth_dict + elif "login" in authforhost and "password" in authforhost: + auth[url] = { + "type": "basic", + "login": authforhost["login"], + "password": authforhost["password"], + } + + return auth