Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix quadtree spatial join OOMs on large numbers of input polygons #1381

Merged
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -23,6 +23,17 @@ add_library(cuspatial_benchmark_common OBJECT

target_compile_features(cuspatial_benchmark_common PUBLIC cxx_std_17 cuda_std_17)

set_target_properties(cuspatial_benchmark_common
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUSPATIAL_BINARY_DIR}/benchmarks>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON
INTERFACE_POSITION_INDEPENDENT_CODE ON
)

target_link_libraries(cuspatial_benchmark_common
PUBLIC benchmark::benchmark
cudf::cudftestutil
Expand All @@ -43,6 +54,10 @@ function(ConfigureBench CMAKE_BENCH_NAME)
set_target_properties(${CMAKE_BENCH_NAME}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUSPATIAL_BINARY_DIR}/benchmarks>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
)
target_link_libraries(${CMAKE_BENCH_NAME} PRIVATE benchmark::benchmark_main cuspatial_benchmark_common)
install(
Expand All @@ -61,7 +76,11 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
${CMAKE_BENCH_NAME}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUSPATIAL_BINARY_DIR}/benchmarks>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
)
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cuspatial_benchmark_common nvbench::main
)
Expand Down
105 changes: 63 additions & 42 deletions cpp/include/cuspatial/detail/join/quadtree_point_in_polygon.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@
#include <cuspatial/range/multipolygon_range.cuh>
#include <cuspatial/traits.hpp>

#include <rmm/cuda_device.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
#include <rmm/resource_ref.hpp>

#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/scan.h>

#include <cstdint>

namespace cuspatial {
namespace detail {

Expand All @@ -57,7 +57,7 @@ struct compute_poly_and_point_indices {
using IndexType = iterator_value_type<QuadOffsetsIterator>;

inline thrust::tuple<IndexType, IndexType> __device__
operator()(IndexType const global_index) const
operator()(std::uint64_t const global_index) const
{
auto const [quad_poly_index, local_point_index] =
get_quad_and_local_point_indices(global_index, point_offsets_begin, point_offsets_end);
Expand Down Expand Up @@ -118,16 +118,22 @@ std::pair<rmm::device_uvector<IndexType>, rmm::device_uvector<IndexType>> quadtr

auto num_poly_quad_pairs = std::distance(poly_indices_first, poly_indices_last);

auto quad_lengths_iter =
thrust::make_permutation_iterator(quadtree.length_begin(), quad_indices_first);
auto quad_lengths_iter = thrust::make_transform_iterator(
thrust::make_permutation_iterator(quadtree.length_begin(), quad_indices_first),
cuda::proclaim_return_type<std::uint64_t>([] __device__(IndexType const& i) -> std::uint64_t {
return static_cast<std::uint64_t>(i);
}));
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved

auto quad_offsets_iter =
thrust::make_permutation_iterator(quadtree.offset_begin(), quad_indices_first);

// Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction)
auto point_xys_iter = thrust::make_permutation_iterator(points_first, point_indices_first);

// Compute a "local" set of zero-based point offsets from number of points in each quadrant
// Use `num_poly_quad_pairs + 1` as the length so that the last element produced by
// `inclusive_scan` is the total number of points to be tested against any polygon.
rmm::device_uvector<IndexType> local_point_offsets(num_poly_quad_pairs + 1, stream);
rmm::device_uvector<std::uint64_t> local_point_offsets(num_poly_quad_pairs + 1, stream);
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved

// inclusive scan of quad_lengths_iter
thrust::inclusive_scan(rmm::exec_policy(stream),
Expand All @@ -136,52 +142,67 @@ std::pair<rmm::device_uvector<IndexType>, rmm::device_uvector<IndexType>> quadtr
local_point_offsets.begin() + 1);

// Ensure local point offsets starts at 0
IndexType init{0};
std::uint64_t init{0};
local_point_offsets.set_element_async(0, init, stream);

// The last element is the total number of points to test against any polygon.
auto num_total_points = local_point_offsets.back_element(stream);

// Allocate the output polygon and point index pair vectors
rmm::device_uvector<IndexType> poly_indices(num_total_points, stream);
rmm::device_uvector<IndexType> point_indices(num_total_points, stream);

auto poly_and_point_indices =
thrust::make_zip_iterator(poly_indices.begin(), point_indices.begin());
// Process in chunks of *at most* 25% of the free available device memory to avoid OOM'ing
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
// when allocating additional output space.
auto max_chunk_size =
static_cast<std::uint64_t>(rmm::percent_of_free_device_memory(25) / (sizeof(IndexType) * 2));
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved

// Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction)
auto point_xys_iter = thrust::make_permutation_iterator(points_first, point_indices_first);
auto num_chunks = std::uint64_t{num_total_points / max_chunk_size + 1};

// Compute the combination of polygon and point index pairs. For each polygon/quadrant pair,
// enumerate pairs of (poly_index, point_index) for each point in each quadrant.
//
// In Python pseudocode:
// ```
// pp_pairs = []
// for polygon, quadrant in pq_pairs:
// for point in quadrant:
// pp_pairs.append((polygon, point))
// ```
//
auto global_to_poly_and_point_indices = detail::make_counting_transform_iterator(
0,
detail::compute_poly_and_point_indices{quad_offsets_iter,
local_point_offsets.begin(),
local_point_offsets.end(),
poly_indices_first});

// Compute the number of intersections by removing (poly, point) pairs that don't intersect
auto num_intersections = thrust::distance(
poly_and_point_indices,
thrust::copy_if(rmm::exec_policy(stream),
global_to_poly_and_point_indices,
global_to_poly_and_point_indices + num_total_points,
poly_and_point_indices,
detail::test_poly_point_intersection{point_xys_iter, polygons}));
// Allocate the output polygon and point index pair vectors
rmm::device_uvector<IndexType> poly_indices(0, stream);
rmm::device_uvector<IndexType> point_indices(0, stream);

std::uint64_t num_intersections = 0;
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved

for (std::uint64_t chunk_index = 0, chunk_offset = 0; chunk_index < num_chunks; ++chunk_index) {
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
auto chunk_size = std::min(max_chunk_size, num_total_points - (chunk_index * max_chunk_size));

poly_indices.resize(num_intersections + chunk_size, stream);
point_indices.resize(num_intersections + chunk_size, stream);

// Compute the combination of polygon and point index pairs. For each polygon/quadrant pair,
// enumerate pairs of (poly_index, point_index) for each point in each quadrant.
//
// In Python pseudocode:
// ```
// pp_pairs = []
// for polygon, quadrant in pq_pairs:
// for point in quadrant:
// pp_pairs.append((polygon, point))
// ```
//
auto global_to_poly_and_point_indices = detail::make_counting_transform_iterator(
chunk_offset,
detail::compute_poly_and_point_indices{quad_offsets_iter,
local_point_offsets.begin(),
local_point_offsets.end(),
poly_indices_first});

auto poly_and_point_indices =
thrust::make_zip_iterator(poly_indices.begin(), point_indices.begin()) + num_intersections;

// Compute the number of intersections by removing (poly, point) pairs that don't intersect
num_intersections += thrust::distance(
poly_and_point_indices,
thrust::copy_if(rmm::exec_policy(stream),
global_to_poly_and_point_indices,
global_to_poly_and_point_indices + chunk_size,
poly_and_point_indices,
detail::test_poly_point_intersection{point_xys_iter, polygons}));

chunk_offset += chunk_size;
}

poly_indices.resize(num_intersections, stream);
poly_indices.shrink_to_fit(stream);
point_indices.resize(num_intersections, stream);
poly_indices.shrink_to_fit(stream);
point_indices.shrink_to_fit(stream);

return std::pair{std::move(poly_indices), std::move(point_indices)};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -74,6 +74,7 @@ template <typename GeometryIterator, typename PartIterator, typename VecIterator
class multilinestring_range;

template <typename GeometryIterator, typename PartIterator, typename VecIterator>
CUSPATIAL_HOST_DEVICE
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
multilinestring_range<GeometryIterator, PartIterator, VecIterator>::multilinestring_range(
GeometryIterator geometry_begin,
GeometryIterator geometry_end,
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cuspatial/geometry/box.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,9 +40,9 @@ class alignas(sizeof(Vertex)) box {

private:
/**
* @brief Output stream operator for `vec_2d<T>` for human-readable formatting
* @brief Output stream operator for `box<T>` for human-readable formatting
*/
friend std::ostream& operator<<(std::ostream& os, cuspatial::box<T> const& b)
friend std::ostream& operator<<(std::ostream& os, cuspatial::box<T, Vertex> const& b)
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
{
return os << "{" << b.v1 << ", " << b.v2 << "}";
}
Expand Down
7 changes: 5 additions & 2 deletions cpp/include/cuspatial_test/geometry_generator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

#include <ranger/ranger.hpp>

#include <cmath>

namespace cuspatial {
namespace test {

Expand Down Expand Up @@ -399,8 +401,9 @@ auto generate_multipoint_array(multipoint_generator_parameter<T> params,
std::size_t{0},
params.num_points_per_multipoints);

auto engine_x = deterministic_engine(params.num_points());
auto engine_y = deterministic_engine(2 * params.num_points());
auto golden_ratio = (1 + std::sqrt(T{5})) / 2;
auto engine_x = deterministic_engine(golden_ratio * params.num_points());
auto engine_y = deterministic_engine((1 / golden_ratio) * params.num_points());
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved

auto x_dist = make_uniform_dist(params.lower_left.x, params.upper_right.x);
auto y_dist = make_uniform_dist(params.lower_left.y, params.upper_right.y);
Expand Down
11 changes: 11 additions & 0 deletions cpp/include/cuspatial_test/vector_factories.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,17 @@ class multipoint_array {
_geometry_offsets.begin(), _geometry_offsets.end(), _coordinates.begin(), _coordinates.end()};
}

/**
* @brief Copy the offset arrays to host.
*/
auto to_host() const
{
auto geometry_offsets = cuspatial::test::to_host<geometry_t>(_geometry_offsets);
auto coordinate_offsets = cuspatial::test::to_host<coord_t>(_coordinates);

return std::tuple{geometry_offsets, coordinate_offsets};
}

trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
/// Release ownership
auto release() { return std::pair{std::move(_geometry_offsets), std::move(_coordinates)}; }

Expand Down
5 changes: 4 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -158,6 +158,9 @@ ConfigureTest(JOIN_POINT_IN_POLYGON_SMALL_TEST_EXP
ConfigureTest(JOIN_POINT_IN_POLYGON_LARGE_TEST_EXP
join/quadtree_point_in_polygon_test_large.cu)

ConfigureTest(JOIN_POINT_IN_POLYGON_OOM_TEST_EXP
join/quadtree_point_in_polygon_test_oom.cu)

ConfigureTest(JOIN_POINT_TO_LINESTRING_SMALL_TEST_EXP
join/quadtree_point_to_nearest_linestring_test_small.cu)

Expand Down
Loading
Loading