diff --git a/dev/benchmarks/CMakeLists.txt b/dev/benchmarks/CMakeLists.txt index 587cdc50..0c6c0b91 100644 --- a/dev/benchmarks/CMakeLists.txt +++ b/dev/benchmarks/CMakeLists.txt @@ -27,7 +27,7 @@ if(NOT DEFINED CMAKE_C_STANDARD) endif() if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) endif() @@ -58,7 +58,6 @@ if(IS_DIRECTORY "${GEOARROW_BENCHMARK_SOURCE_URL}") elseif(NOT "${GEOARROW_BENCHMARK_SOURCE_URL}" STREQUAL "") fetchcontent_declare(geoarrow URL "${GEOARROW_BENCHMARK_SOURCE_URL}") fetchcontent_makeavailable(geoarrow) - fetchcontent_makeavailable(geoarrow_ipc) endif() # Check that either the parent scope or this CMakeLists.txt defines a geoarrow target @@ -74,8 +73,8 @@ endif() include(CTest) enable_testing() -foreach(ITEM array_view) - add_executable(${ITEM}_benchmark "c/${ITEM}_benchmark.cc" c/benchmark_lib.cc) +foreach(ITEM coord_view hpp_coord_sequence) + add_executable(${ITEM}_benchmark "c/${ITEM}_benchmark.cc") target_link_libraries(${ITEM}_benchmark PRIVATE geoarrow benchmark::benchmark_main) add_test(NAME ${ITEM}_benchmark COMMAND ${ITEM}_benchmark --benchmark_out=${ITEM}_benchmark.json) diff --git a/dev/benchmarks/c/array_view_benchmark.cc b/dev/benchmarks/c/array_view_benchmark.cc deleted file mode 100644 index dbd849d6..00000000 --- a/dev/benchmarks/c/array_view_benchmark.cc +++ /dev/null @@ -1,142 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include - -#include "geoarrow.h" - -#include "benchmark_lib.h" - -// The length of most arrays used in these benchmarks. Just big enough so -// that the benchmark takes a non-trivial amount of time to run. -static const int64_t kNumItemsPrettyBig = 100000000; - -enum CoordAccessStrategy { GENERIC, OPTIMIZED, LOOP_THEN_IF }; - -enum Operation { BOUNDS, CENTROID }; - -template -static void CoordViewLoop(benchmark::State& state) { - struct GeoArrowArrayView view; - GeoArrowArrayViewInitFromType(&view, type); - - // Generate a circle with n_coords points - int64_t n_coords = kNumItemsPrettyBig; - int n_dims = view.coords.n_values; - std::vector coords(n_coords * n_dims); - double angle_inc_radians = M_PI / 100; - double radius = 483; - double angle = 0; - - if (view.schema_view.coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - for (int i = 0; i < n_dims; i++) { - view.coords.values[i] = coords.data() + (i * n_coords); - } - - for (int64_t i = 0; i < n_coords; i++) { - coords[i] = cos(angle) * radius; - coords[n_coords + i] = sin(angle) * radius; - angle += angle_inc_radians; - } - } else { - for (int i = 0; i < n_dims; i++) { - view.coords.values[i] = coords.data() + i; - } - - for (int64_t i = 0; i < n_coords; i++) { - coords[n_dims * i] = cos(angle) * radius; - coords[n_dims * i + 1] = sin(angle) * radius; - angle += angle_inc_radians; - } - } - - if (operation == BOUNDS) { - std::array bounds{}; - switch (strategy) { - case GENERIC: - for (auto _ : state) { - bounds = CalculateBoundsGeneric(&view.coords, n_coords); - benchmark::DoNotOptimize(bounds); - } - break; - case OPTIMIZED: - for (auto _ : state) { - bounds = CalculateBoundsOptimized(&view.coords, n_coords, - view.schema_view.coord_type); - benchmark::DoNotOptimize(bounds); - } - break; - case LOOP_THEN_IF: - for (auto _ : state) { - bounds = CalculateBoundsLoopThenIf(&view.coords, n_coords, - view.schema_view.coord_type); - benchmark::DoNotOptimize(bounds); - } - break; - } - - } else if (operation == CENTROID) { - std::array centroid{}; - switch (strategy) { - case GENERIC: - for (auto _ : state) { - centroid = CalculateCentroidGeneric(&view.coords, n_coords); - benchmark::DoNotOptimize(centroid); - } - break; - case OPTIMIZED: - for (auto _ : state) { - centroid = CalculateCentroidOptimized(&view.coords, n_coords, - view.schema_view.coord_type); - benchmark::DoNotOptimize(centroid); - } - break; - case LOOP_THEN_IF: - for (auto _ : state) { - centroid = CalculateCentroidLoopThenIf(&view.coords, n_coords, - view.schema_view.coord_type); - benchmark::DoNotOptimize(centroid); - } - break; - } - } - - state.SetItemsProcessed(n_coords * state.iterations()); - // Check the result - // std::cout << bounds[0] << ", " << bounds[1] << ", " << bounds[2] << ", " << - // bounds[3] - // << std::endl; -} - -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); - -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); diff --git a/dev/benchmarks/c/benchmark_lib.cc b/dev/benchmarks/c/benchmark_lib.cc deleted file mode 100644 index 9fa768ef..00000000 --- a/dev/benchmarks/c/benchmark_lib.cc +++ /dev/null @@ -1,152 +0,0 @@ - -#include -#include - -#include "geoarrow.h" - -// Slightly faster than std::min/std::max -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) - -std::array CalculateBoundsGeneric(struct GeoArrowCoordView* coords, - int64_t n_coords) { - double xmin = std::numeric_limits::infinity(); - double xmax = -std::numeric_limits::infinity(); - double ymin = std::numeric_limits::infinity(); - double ymax = -std::numeric_limits::infinity(); - - double x, y; - for (int64_t i = 0; i < n_coords; i++) { - x = GEOARROW_COORD_VIEW_VALUE(coords, i, 0); - y = GEOARROW_COORD_VIEW_VALUE(coords, i, 1); - xmin = MIN(xmin, x); - xmax = MAX(xmax, y); - ymin = MIN(ymin, x); - ymax = MAX(ymax, y); - } - - return {xmin, xmax, ymin, ymax}; -} - -std::array CalculateBoundsOptimized(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type) { - double xmin = std::numeric_limits::infinity(); - double xmax = -std::numeric_limits::infinity(); - double ymin = std::numeric_limits::infinity(); - double ymax = -std::numeric_limits::infinity(); - - if (coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - const double* xs = coords->values[0]; - const double* ys = coords->values[1]; - for (int64_t i = 0; i < n_coords; i++) { - xmin = MIN(xmin, xs[i]); - xmax = MAX(xmax, xs[i]); - ymin = MIN(ymin, ys[i]); - ymax = MAX(ymax, ys[i]); - } - } else { - int n_dims = coords->n_values; - const double* xs = coords->values[0]; - const double* ys = xs + 1; - for (int64_t i = 0; i < n_coords; i++) { - int64_t offset = i * n_dims; - xmin = MIN(xmin, xs[offset]); - xmax = MAX(xmax, xs[offset]); - ymin = MIN(ymin, ys[offset]); - ymax = MAX(ymax, ys[offset]); - } - } - - return {xmin, xmax, ymin, ymax}; -} - -std::array CalculateBoundsLoopThenIf(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type) { - double xmin = std::numeric_limits::infinity(); - double xmax = -std::numeric_limits::infinity(); - double ymin = std::numeric_limits::infinity(); - double ymax = -std::numeric_limits::infinity(); - - double x, y; - int n_dims = coords->n_values; - for (int64_t i = 0; i < n_coords; i++) { - if (coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - x = coords->values[0][i]; - y = coords->values[1][i]; - } else { - x = coords->values[0][i * n_dims]; - y = coords->values[0][i * n_dims + 1]; - } - - xmin = MIN(xmin, x); - xmax = MAX(xmax, y); - ymin = MIN(ymin, x); - ymax = MAX(ymax, y); - } - - return {xmin, xmax, ymin, ymax}; -} - -std::array CalculateCentroidGeneric(struct GeoArrowCoordView* coords, - int64_t n_coords) { - double xsum = 0; - double ysum = 0; - - double x, y; - for (int64_t i = 0; i < n_coords; i++) { - xsum += GEOARROW_COORD_VIEW_VALUE(coords, i, 0); - ysum += GEOARROW_COORD_VIEW_VALUE(coords, i, 1); - } - - return {xsum / n_coords, ysum / n_coords}; -} - -std::array CalculateCentroidOptimized(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type) { - double xsum = 0; - double ysum = 0; - - if (coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - // This version exploits that we can do this one element at a time - const double* xs = coords->values[0]; - const double* ys = coords->values[1]; - for (int64_t i = 0; i < n_coords; i++) { - xsum += xs[i]; - ysum += ys[i]; - } - } else { - int n_dims = coords->n_values; - const double* xs = coords->values[0]; - const double* ys = xs + 1; - for (int64_t i = 0; i < n_coords; i++) { - int64_t offset = i * n_dims; - xsum += xs[offset]; - ysum += ys[offset]; - } - } - - return {xsum / n_coords, ysum / n_coords}; -} - -std::array CalculateCentroidLoopThenIf(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type) { - double xsum = 0; - double ysum = 0; - - int n_dims = coords->n_values; - for (int64_t i = 0; i < n_coords; i++) { - if (coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - xsum += coords->values[0][i]; - ysum += coords->values[1][i]; - } else { - xsum += coords->values[0][i * n_dims]; - xsum += coords->values[0][i * n_dims + 1]; - } - } - - return {xsum / n_coords, ysum / n_coords}; -} diff --git a/dev/benchmarks/c/benchmark_lib.h b/dev/benchmarks/c/benchmark_lib.h deleted file mode 100644 index 29c61894..00000000 --- a/dev/benchmarks/c/benchmark_lib.h +++ /dev/null @@ -1,26 +0,0 @@ - -#include - -#include "geoarrow.h" - -std::array CalculateBoundsGeneric(struct GeoArrowCoordView* coords, - int64_t n_coords); - -std::array CalculateBoundsOptimized(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type); - -std::array CalculateBoundsLoopThenIf(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type); - -std::array CalculateCentroidGeneric(struct GeoArrowCoordView* coords, - int64_t n_coords); - -std::array CalculateCentroidOptimized(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type); - -std::array CalculateCentroidLoopThenIf(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type); diff --git a/dev/benchmarks/c/benchmark_util.hpp b/dev/benchmarks/c/benchmark_util.hpp new file mode 100644 index 00000000..b3359374 --- /dev/null +++ b/dev/benchmarks/c/benchmark_util.hpp @@ -0,0 +1,38 @@ + +#include +#include + +namespace geoarrow { + +namespace benchmark_util { + +static const int64_t kNumCoordsPrettyBig = 10000000; + +static inline void PointsOnCircle(uint32_t n, uint32_t stride, double* out_x, + double* out_y, double dangle_radians = M_PI / 100.0, + double radius = 483.0) { + double angle = 0; + + for (uint32_t i = 0; i < n; i++) { + *out_x = std::cos(angle) * radius; + *out_y = std::sin(angle) * radius; + angle += dangle_radians; + out_x += stride; + out_y += stride; + } +} + +static inline void FillRandom(uint32_t n, uint32_t stride, double* out, + uint32_t seed = 1234, double range_min = -1.0, + double range_max = 1.0) { + std::srand(seed); + double range = range_max - range_max; + for (uint32_t i = 0; i < n; i++) { + double value01 = static_cast(std::rand()) / static_cast(RAND_MAX); + *out = range_min + value01 * range; + out += stride; + } +} + +} // namespace benchmark_util +} // namespace geoarrow diff --git a/dev/benchmarks/c/coord_view_benchmark.cc b/dev/benchmarks/c/coord_view_benchmark.cc new file mode 100644 index 00000000..ecf2faa4 --- /dev/null +++ b/dev/benchmarks/c/coord_view_benchmark.cc @@ -0,0 +1,163 @@ + +#include + +#include + +#include "geoarrow.h" + +#include "benchmark_util.hpp" + +enum Operation { BOUNDS, CENTROID }; +enum Strategy { COORD_VIEW_VALUE, POINTERS }; + +// Slightly faster than std::min/std::max +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +// Calculates bounds using GEOARROW_COORD_VIEW_VALUE +std::array BoundsUsingCoordViewValue(struct GeoArrowCoordView* coords) { + double xmin = std::numeric_limits::infinity(); + double xmax = -std::numeric_limits::infinity(); + double ymin = std::numeric_limits::infinity(); + double ymax = -std::numeric_limits::infinity(); + + double x, y; + for (int64_t i = 0; i < coords->n_coords; i++) { + x = GEOARROW_COORD_VIEW_VALUE(coords, i, 0); + y = GEOARROW_COORD_VIEW_VALUE(coords, i, 1); + xmin = MIN(xmin, x); + xmax = MAX(xmax, y); + ymin = MIN(ymin, x); + ymax = MAX(ymax, y); + } + + return {xmin, ymin, xmax, ymax}; +} + +// Calculates total bounds using raw pointer iteration +std::array BoundsUsingPointers(struct GeoArrowCoordView* coords) { + double xmin = std::numeric_limits::infinity(); + double xmax = -std::numeric_limits::infinity(); + double ymin = std::numeric_limits::infinity(); + double ymax = -std::numeric_limits::infinity(); + + const double* x = coords->values[0]; + const double* y = coords->values[1]; + for (int64_t i = 0; i < coords->n_coords; i++) { + xmin = MIN(xmin, *x); + xmax = MAX(xmax, *x); + x += coords->coords_stride; + + ymin = MIN(xmin, *y); + ymax = MAX(xmax, *y); + y += coords->coords_stride; + } + + return {xmin, ymin, xmax, ymax}; +} + +// Calculates centroid using GEOARROW_COORD_VIEW_VALUE +std::array CentroidUsingCoordViewValue(struct GeoArrowCoordView* coords) { + double xsum = 0; + double ysum = 0; + + for (int64_t i = 0; i < coords->n_coords; i++) { + xsum += GEOARROW_COORD_VIEW_VALUE(coords, i, 0); + ysum += GEOARROW_COORD_VIEW_VALUE(coords, i, 1); + } + + return {xsum / coords->n_coords, ysum / coords->n_coords}; +} + +// Calculates centroid using raw pointer iteration +std::array CentroidUsingPointers(struct GeoArrowCoordView* coords) { + double xsum = 0; + double ysum = 0; + + const double* x = coords->values[0]; + const double* y = coords->values[1]; + for (int64_t i = 0; i < coords->n_coords; i++) { + xsum += *x++; + ysum += *y++; + } + + return {xsum / coords->n_coords, ysum / coords->n_coords}; +} + +/// \brief Benchmark iteration over all coordinates in a GeoArrowCoordView +/// +/// The Operation here is a way to ensure that all coordinates are actually iterated over +/// and nothing is optimized out. The type is to check interleaved and separated +/// coordinates, and the strategy is to check GEOARROW_COORD_VIEW_VALUE() against raw +/// pointer iteration. Interestingly, this does not affect centroid calculations but does +/// affect bounds calculation on some architectures (possibly because raw pointer +/// iteration is autovectorized but the `* coord_stride` prevents that from occurring). +template +static void CoordViewLoop(benchmark::State& state) { + struct GeoArrowArrayView view; + GeoArrowArrayViewInitFromType(&view, type); + + // Memory for circle with n points + view.coords.n_coords = geoarrow::benchmark_util::kNumCoordsPrettyBig; + int n_dims = view.coords.n_values; + std::vector coords(view.coords.n_coords * n_dims); + + if (view.schema_view.coord_type == GEOARROW_COORD_TYPE_SEPARATE) { + for (int i = 0; i < n_dims; i++) { + view.coords.values[i] = coords.data() + (i * view.coords.n_coords); + } + } else { + for (int i = 0; i < n_dims; i++) { + view.coords.values[i] = coords.data() + i; + } + } + + std::array bounds{}; + std::array centroid{}; + + geoarrow::benchmark_util::PointsOnCircle(view.coords.n_coords, + view.coords.coords_stride, + const_cast(view.coords.values[0]), + const_cast(view.coords.values[1])); + + if (operation == BOUNDS) { + if (strategy == COORD_VIEW_VALUE) { + for (auto _ : state) { + bounds = BoundsUsingCoordViewValue(&view.coords); + benchmark::DoNotOptimize(bounds); + } + } else if (strategy == POINTERS) { + for (auto _ : state) { + bounds = BoundsUsingPointers(&view.coords); + benchmark::DoNotOptimize(bounds); + } + } + } else if (operation == CENTROID) { + if (strategy == COORD_VIEW_VALUE) { + for (auto _ : state) { + centroid = CentroidUsingCoordViewValue(&view.coords); + benchmark::DoNotOptimize(centroid); + } + } else if (strategy == POINTERS) { + for (auto _ : state) { + centroid = CentroidUsingPointers(&view.coords); + benchmark::DoNotOptimize(centroid); + } + } + } + + state.SetItemsProcessed(view.coords.n_coords * state.iterations()); + // Check the result (centroid should more or less be 0, 0; bounds should be more or + // less -484..483 in both dimensions) std::cout << bounds[0] << ", " << bounds[1] << + // ", " << bounds[2] << ", " << bounds[3] + // << std::endl; +} + +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); diff --git a/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc new file mode 100644 index 00000000..5579881a --- /dev/null +++ b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc @@ -0,0 +1,161 @@ + +#include + +#include + +#include "hpp/array_util.hpp" + +#include "benchmark_util.hpp" + +using geoarrow::array_util::CoordSequence; +using geoarrow::array_util::XY; + +enum Operation { BOUNDS, CENTROID }; +enum Strategy { STL_ITERATOR, POINTERS }; + +// Slightly faster than std::min/std::max +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +// Calculates total bounds using the STL iterator +std::array BoundsUsingCoordIterator(const CoordSequence& seq) { + double xmin = std::numeric_limits::infinity(); + double xmax = -std::numeric_limits::infinity(); + double ymin = std::numeric_limits::infinity(); + double ymax = -std::numeric_limits::infinity(); + + for (const XY coord : seq) { + xmin = MIN(xmin, coord.x()); + xmax = MAX(xmax, coord.x()); + ymin = MIN(ymin, coord.y()); + ymax = MAX(ymax, coord.y()); + } + + return {xmin, ymin, xmax, ymax}; +} + +// Calculates total bounds using raw pointer iteration +std::array BoundsUsingPointers(const CoordSequence& seq) { + double xmin = std::numeric_limits::infinity(); + double xmax = -std::numeric_limits::infinity(); + double ymin = std::numeric_limits::infinity(); + double ymax = -std::numeric_limits::infinity(); + + auto x = seq.dbegin(0); + auto y = seq.dbegin(1); + for (uint32_t i = 0; i < seq.size(); i++) { + xmin = MIN(xmin, *x); + xmax = MAX(xmax, *x); + ++x; + + ymin = MIN(xmin, *y); + ymax = MAX(xmax, *y); + ++y; + } + + return {xmin, ymin, xmax, ymax}; +} + +// Calculates centroid using the STL iterator +std::array CentroidUsingCoordIterator(const CoordSequence& seq) { + double xsum = 0; + double ysum = 0; + + for (const XY coord : seq) { + xsum += coord.x(); + ysum += coord.y(); + } + + return {xsum / seq.size(), ysum / seq.size()}; +} + +// Calculates centroid using raw pointer iteration +std::array CentroidUsingPointers(const CoordSequence& seq) { + double xsum = 0; + double ysum = 0; + + auto x = seq.dbegin(0); + auto y = seq.dbegin(1); + for (uint32_t i = 0; i < seq.size(); i++) { + xsum += x++; + ysum += y++; + } + + return {xsum / seq.size(), ysum / seq.size()}; +} + +/// \brief Benchmark iteration over all coordinates in a CoordSequence +/// +/// The Operation here is a way to ensure that all coordinates are actually iterated over +/// and nothing is optimized out. The type is to check interleaved and separated +/// coordinates, and the strategy is to check STL iterators against raw +/// pointer iteration. +template +static void CoordSequenceLoop(benchmark::State& state) { + CoordSequence seq; + + // Memory for circle with n points + seq.offset = 0; + seq.length = geoarrow::benchmark_util::kNumCoordsPrettyBig; + std::vector coords(seq.size() * seq.coord_size); + + if (type == GEOARROW_TYPE_POINT) { + seq.stride = 1; + for (uint32_t i = 0; i < seq.coord_size; i++) { + seq.values[i] = coords.data() + (i * seq.size()); + } + } else { + seq.stride = seq.coord_size; + for (uint32_t i = 0; i < seq.coord_size; i++) { + seq.values[i] = coords.data() + i; + } + } + + geoarrow::benchmark_util::PointsOnCircle(seq.size(), seq.stride, + const_cast(seq.values[0]), + const_cast(seq.values[1])); + + std::array bounds{}; + std::array centroid{}; + + if (operation == BOUNDS) { + if (strategy == STL_ITERATOR) { + for (auto _ : state) { + bounds = BoundsUsingCoordIterator(seq); + benchmark::DoNotOptimize(bounds); + } + } else if (strategy == POINTERS) { + for (auto _ : state) { + bounds = BoundsUsingPointers(seq); + benchmark::DoNotOptimize(bounds); + } + } + } else if (operation == CENTROID) { + if (strategy == STL_ITERATOR) { + for (auto _ : state) { + centroid = CentroidUsingCoordIterator(seq); + benchmark::DoNotOptimize(centroid); + } + } else if (strategy == POINTERS) { + for (auto _ : state) { + centroid = CentroidUsingPointers(seq); + benchmark::DoNotOptimize(centroid); + } + } + } + + state.SetItemsProcessed(seq.size() * state.iterations()); + // Check the result (centroid should more or less be 0, 0; bounds should be more or + // less -484..483 in both dimensions) std::cout << bounds[0] << ", " << bounds[1] << + // ", " << bounds[2] << ", " << bounds[3] + // << std::endl; +} + +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); diff --git a/src/geoarrow/hpp/array_util.hpp b/src/geoarrow/hpp/array_util.hpp index 221870b1..7b14daa9 100644 --- a/src/geoarrow/hpp/array_util.hpp +++ b/src/geoarrow/hpp/array_util.hpp @@ -68,6 +68,8 @@ class CoordSequenceIterator : public BaseRandomAccessIterator { using iterator_category = std::random_access_iterator_tag; using difference_type = int64_t; using value_type = typename CoordSequence::value_type; + using reference = value_type&; + using pointer = value_type*; explicit CoordSequenceIterator(const CoordSequence& outer, uint32_t i) : BaseRandomAccessIterator(outer, i) {} @@ -97,6 +99,56 @@ class ListSequenceIterator : public BaseRandomAccessIterator { typename ListSequence::child_type stashed_; }; +// Iterator for dimension begin/end +template +class StridedIterator { + public: + explicit StridedIterator(const T* ptr, ptrdiff_t stride) : ptr_(ptr), stride_(stride) {} + StridedIterator& operator++() { + ptr_ += stride_; + return *this; + } + T operator++(int) { + T retval = *ptr_; + ptr_ += stride_; + return retval; + } + StridedIterator& operator--() { + ptr_ -= stride_; + return *this; + } + StridedIterator& operator+=(ptrdiff_t n) { + ptr_ += (n * stride_); + return *this; + } + StridedIterator& operator-=(ptrdiff_t n) { + ptr_ -= (n * stride_); + return *this; + } + int64_t operator-(const StridedIterator& other) const { + return (ptr_ - other.ptr_) / stride_; + } + bool operator<(const StridedIterator& other) const { return ptr_ < other.ptr_; } + bool operator>(const StridedIterator& other) const { return ptr_ > other.ptr_; } + bool operator<=(const StridedIterator& other) const { return ptr_ <= other.ptr_; } + bool operator>=(const StridedIterator& other) const { return ptr_ >= other.ptr_; } + bool operator==(const StridedIterator& other) const { return ptr_ == other.ptr_; } + bool operator!=(const StridedIterator& other) const { return ptr_ != other.ptr_; } + + T operator*() const { return *ptr_; } + T operator[](ptrdiff_t i) const { return ptr_[i]; } + + using iterator_category = std::random_access_iterator_tag; + using difference_type = int64_t; + using value_type = T; + using reference = T&; + using pointer = T*; + + protected: + const T* ptr_; + ptrdiff_t stride_; +}; + } // namespace internal struct BoxXY; @@ -263,8 +315,13 @@ struct CoordSequence { const_iterator begin() const { return const_iterator(*this, 0); } const_iterator end() const { return const_iterator(*this, length); } - double* dim_begin(uint32_t j) const { return values[0]; } - double* dim_end(uint32_t j) const { return values[0] + (length * stride); } + using dimension_iterator = internal::StridedIterator; + dimension_iterator dbegin(uint32_t j) const { + return dimension_iterator(values[j] + (offset * stride), stride); + } + dimension_iterator dend(uint32_t j) const { + return dimension_iterator(values[j] + ((offset + length) * stride), stride); + } }; /// \brief View of a sequence of lists diff --git a/src/geoarrow/hpp/array_util_test.cc b/src/geoarrow/hpp/array_util_test.cc index a48163da..95aac9b0 100644 --- a/src/geoarrow/hpp/array_util_test.cc +++ b/src/geoarrow/hpp/array_util_test.cc @@ -197,6 +197,29 @@ TEST(GeoArrowHppTest, IterateCoords) { EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); EXPECT_THAT(sequence.Slice(1, 1), ::testing::ElementsAre(XY{1, 6})); + + // Check dbegin() iteration + coords_vec.clear(); + auto x = sequence.dbegin(0); + auto y = sequence.dbegin(1); + for (uint32_t i = 0; i < sequence.size(); i++) { + coords_vec.push_back(XY{*x, *y}); + ++x; + ++y; + } + EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); + + // Check dbegin() iteration with offset + coords_vec.clear(); + sequence = sequence.Slice(1, 2); + x = sequence.dbegin(0); + y = sequence.dbegin(1); + for (uint32_t i = 0; i < sequence.size(); i++) { + coords_vec.push_back(XY{*x, *y}); + ++x; + ++y; + } + EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{1, 6}, XY{2, 7})); } TEST(GeoArrowHppTest, IterateCoordsInterleaved) { @@ -213,6 +236,29 @@ TEST(GeoArrowHppTest, IterateCoordsInterleaved) { } EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); + + // Check dbegin() iteration + coords_vec.clear(); + auto x = sequence.dbegin(0); + auto y = sequence.dbegin(1); + for (uint32_t i = 0; i < sequence.size(); i++) { + coords_vec.push_back(XY{*x, *y}); + ++x; + ++y; + } + EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); + + // Check dbegin() iteration with offset + coords_vec.clear(); + sequence = sequence.Slice(1, 2); + x = sequence.dbegin(0); + y = sequence.dbegin(1); + for (uint32_t i = 0; i < sequence.size(); i++) { + coords_vec.push_back(XY{*x, *y}); + ++x; + ++y; + } + EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{1, 6}, XY{2, 7})); } TEST(GeoArrowHppTest, IterateNestedCoords) { @@ -323,6 +369,15 @@ TEST(GeoArrowHppTest, SetArrayBox) { EXPECT_THAT(boxes, ::testing::ElementsAre(BoxXY{0, 1, 2, 3}, BoxXY{4, 5, 6, 7}, BoxXY{8, 9, 12, 13})); + std::vector xmins(native_array.value.dbegin(0), native_array.value.dend(0)); + EXPECT_THAT(xmins, ::testing::ElementsAre(0, 4, 8)); + std::vector ymins(native_array.value.dbegin(1), native_array.value.dend(1)); + EXPECT_THAT(ymins, ::testing::ElementsAre(1, 5, 9)); + std::vector xmaxs(native_array.value.dbegin(2), native_array.value.dend(2)); + EXPECT_THAT(xmaxs, ::testing::ElementsAre(2, 6, 12)); + std::vector ymaxs(native_array.value.dbegin(3), native_array.value.dend(3)); + EXPECT_THAT(ymaxs, ::testing::ElementsAre(3, 7, 13)); + EXPECT_THAT(native_array.LowerBound().value, ::testing::ElementsAre(XY{0, 1}, XY{4, 5}, XY{8, 9})); EXPECT_THAT(native_array.UpperBound().value, @@ -358,7 +413,14 @@ TEST(GeoArrowHppTest, SetArrayPoint) { EXPECT_THAT(points, ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5})); EXPECT_THAT(native_array.Coords(), ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5})); - EXPECT_THAT(native_array.Slice(1, 1).Coords(), ::testing::ElementsAre(XY{2, 3})); + + auto sliced_coords = native_array.Slice(1, 1).Coords(); + EXPECT_THAT(sliced_coords, ::testing::ElementsAre(XY{2, 3})); + + std::vector sliced_x(sliced_coords.dbegin(0), sliced_coords.dend(0)); + EXPECT_THAT(sliced_x, ::testing::ElementsAre(2)); + std::vector sliced_y(sliced_coords.dbegin(1), sliced_coords.dend(1)); + EXPECT_THAT(sliced_y, ::testing::ElementsAre(3)); } } @@ -401,8 +463,14 @@ TEST(GeoArrowHppTest, SetArrayLinestring) { EXPECT_THAT(native_array.Coords(), ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5}, XY{6, 7}, XY{8, 9}, XY{10, 11}, XY{12, 13})); - EXPECT_THAT(native_array.Slice(1, 1).Coords(), - ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + + auto sliced_coords = native_array.Slice(1, 1).Coords(); + EXPECT_THAT(sliced_coords, ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + + std::vector sliced_x(sliced_coords.dbegin(0), sliced_coords.dend(0)); + EXPECT_THAT(sliced_x, ::testing::ElementsAre(4, 6)); + std::vector sliced_y(sliced_coords.dbegin(1), sliced_coords.dend(1)); + EXPECT_THAT(sliced_y, ::testing::ElementsAre(5, 7)); } } @@ -448,11 +516,18 @@ TEST(GeoArrowHppTest, SetArrayMultiLinestring) { std::vector>{{XY{4, 5}, XY{6, 7}}}, std::vector>{{XY{8, 9}, XY{10, 11}, XY{12, 13}}, {XY{15, 16}, XY{17, 18}}})); + EXPECT_THAT(native_array.Coords(), ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5}, XY{6, 7}, XY{8, 9}, XY{10, 11}, XY{12, 13}, XY{15, 16}, XY{17, 18})); + + auto sliced_coords = native_array.Slice(1, 1).Coords(); EXPECT_THAT(native_array.Slice(1, 1).Coords(), ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + std::vector sliced_x(sliced_coords.dbegin(0), sliced_coords.dend(0)); + EXPECT_THAT(sliced_x, ::testing::ElementsAre(4, 6)); + std::vector sliced_y(sliced_coords.dbegin(1), sliced_coords.dend(1)); + EXPECT_THAT(sliced_y, ::testing::ElementsAre(5, 7)); } } @@ -505,7 +580,12 @@ TEST(GeoArrowHppTest, SetArrayMultiPolygon) { EXPECT_THAT(native_array.Coords(), ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5}, XY{6, 7}, XY{8, 9}, XY{10, 11}, XY{12, 13}, XY{15, 16}, XY{17, 18})); - EXPECT_THAT(native_array.Slice(1, 1).Coords(), - ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + + auto sliced_coords = native_array.Slice(1, 1).Coords(); + EXPECT_THAT(sliced_coords, ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + std::vector sliced_x(sliced_coords.dbegin(0), sliced_coords.dend(0)); + EXPECT_THAT(sliced_x, ::testing::ElementsAre(4, 6)); + std::vector sliced_y(sliced_coords.dbegin(1), sliced_coords.dend(1)); + EXPECT_THAT(sliced_y, ::testing::ElementsAre(5, 7)); } }