Skip to content

Commit 3beb17b

Browse files
authored
feat: Add support for geoarrow.box (#106)
Modelling this here as "box" being another geometry type since it fits nicely in with a struct point. This PR adds support for the box in: - `GeoArrowSchemaInit()` (i.e., create a schema) - `GeoArrowSchemaViewInit()` (i.e., consume/inspect a schema) - `GeoArrowArrayViewInit()` (i.e., consume an array/stream) - Creating arrays via the box kernel (which already did basically this but without emitting the box extension name/metadata) - Creating arrays via the `GeoArrowArrayBuilder()` by buffer
1 parent 3eb7d87 commit 3beb17b

16 files changed

+656
-92
lines changed

python/geoarrow-c/src/geoarrow/c/_lib.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ cdef extern from "geoarrow_type.h":
4545
GEOARROW_GEOMETRY_TYPE_MULTILINESTRING = 5
4646
GEOARROW_GEOMETRY_TYPE_MULTIPOLYGON = 6
4747
GEOARROW_GEOMETRY_TYPE_GEOMETRYCOLLECTION = 7
48+
GEOARROW_GEOMETRY_TYPE_BOX = 990
4849

4950
cpdef enum GeoArrowDimensions:
5051
GEOARROW_DIMENSIONS_UNKNOWN = 0
@@ -64,6 +65,9 @@ cdef extern from "geoarrow_type.h":
6465
GEOARROW_TYPE_LARGE_WKB = 100002
6566
GEOARROW_TYPE_WKT = 100003
6667
GEOARROW_TYPE_LARGE_WKT = 100004
68+
GEOARROW_TYPE_BOX_Z = 1990
69+
GEOARROW_TYPE_BOX_M = 2990
70+
GEOARROW_TYPE_BOX_ZM = 3990
6771
GEOARROW_TYPE_POINT = 1
6872
GEOARROW_TYPE_LINESTRING = 2
6973
GEOARROW_TYPE_POLYGON = 3

python/geoarrow-c/src/geoarrow/c/lib.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class GeometryType:
3838
MULTIPOLYGON = _lib.GEOARROW_GEOMETRY_TYPE_MULTIPOLYGON
3939
#: Geometrycollection geometry type
4040
GEOMETRYCOLLECTION = _lib.GEOARROW_GEOMETRY_TYPE_GEOMETRYCOLLECTION
41+
#: Box geometry type
42+
BOX = _lib.GEOARROW_GEOMETRY_TYPE_BOX
4143

4244

4345
class Dimensions:

src/geoarrow/array_view.c

Lines changed: 118 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ static int32_t kZeroInt32 = 0;
99

1010
static int GeoArrowArrayViewInitInternal(struct GeoArrowArrayView* array_view) {
1111
switch (array_view->schema_view.geometry_type) {
12+
case GEOARROW_GEOMETRY_TYPE_BOX:
1213
case GEOARROW_GEOMETRY_TYPE_POINT:
1314
array_view->n_offsets = 0;
1415
break;
@@ -58,6 +59,10 @@ static int GeoArrowArrayViewInitInternal(struct GeoArrowArrayView* array_view) {
5859
break;
5960
}
6061

62+
if (array_view->schema_view.geometry_type == GEOARROW_GEOMETRY_TYPE_BOX) {
63+
array_view->coords.n_values *= 2;
64+
}
65+
6166
switch (array_view->schema_view.coord_type) {
6267
case GEOARROW_COORD_TYPE_SEPARATE:
6368
array_view->coords.coords_stride = 1;
@@ -71,22 +76,20 @@ static int GeoArrowArrayViewInitInternal(struct GeoArrowArrayView* array_view) {
7176
break;
7277
}
7378

74-
for (int i = 0; i < 4; i++) {
75-
array_view->coords.values[i] = NULL;
76-
}
77-
7879
return GEOARROW_OK;
7980
}
8081

8182
GeoArrowErrorCode GeoArrowArrayViewInitFromType(struct GeoArrowArrayView* array_view,
8283
enum GeoArrowType type) {
84+
memset(array_view, 0, sizeof(struct GeoArrowArrayView));
8385
NANOARROW_RETURN_NOT_OK(GeoArrowSchemaViewInitFromType(&array_view->schema_view, type));
8486
return GeoArrowArrayViewInitInternal(array_view);
8587
}
8688

8789
GeoArrowErrorCode GeoArrowArrayViewInitFromSchema(struct GeoArrowArrayView* array_view,
8890
const struct ArrowSchema* schema,
8991
struct GeoArrowError* error) {
92+
memset(array_view, 0, sizeof(struct GeoArrowArrayView));
9093
NANOARROW_RETURN_NOT_OK(
9194
GeoArrowSchemaViewInit(&array_view->schema_view, schema, error));
9295
return GeoArrowArrayViewInitInternal(array_view);
@@ -210,6 +213,37 @@ static GeoArrowErrorCode GeoArrowArrayViewSetArraySerialized(
210213
return GEOARROW_OK;
211214
}
212215

216+
static GeoArrowErrorCode GeoArrowArrayViewSetArrayBox(
217+
struct GeoArrowArrayView* array_view, const struct ArrowArray* array,
218+
struct GeoArrowError* error) {
219+
array_view->length[0] = array->length;
220+
array_view->offset[0] = array->offset;
221+
array_view->coords.n_coords = array->length;
222+
223+
if (array->n_children != array_view->coords.n_values) {
224+
GeoArrowErrorSet(error,
225+
"Unexpected number of children for box array struct "
226+
"in GeoArrowArrayViewSetArray()");
227+
return EINVAL;
228+
}
229+
230+
// Set the coord pointers to the data buffer of each child (applying
231+
// offset before assigning the pointer)
232+
for (int32_t i = 0; i < array_view->coords.n_values; i++) {
233+
if (array->children[i]->n_buffers != 2) {
234+
ArrowErrorSet((struct ArrowError*)error,
235+
"Unexpected number of buffers for box array child "
236+
"in GeoArrowArrayViewSetArray()");
237+
return EINVAL;
238+
}
239+
240+
array_view->coords.values[i] =
241+
((const double*)array->children[i]->buffers[1]) + array->children[i]->offset;
242+
}
243+
244+
return GEOARROW_OK;
245+
}
246+
213247
GeoArrowErrorCode GeoArrowArrayViewSetArray(struct GeoArrowArrayView* array_view,
214248
const struct ArrowArray* array,
215249
struct GeoArrowError* error) {
@@ -218,6 +252,12 @@ GeoArrowErrorCode GeoArrowArrayViewSetArray(struct GeoArrowArrayView* array_view
218252
case GEOARROW_TYPE_WKB:
219253
NANOARROW_RETURN_NOT_OK(GeoArrowArrayViewSetArraySerialized(array_view, array));
220254
break;
255+
case GEOARROW_TYPE_BOX:
256+
case GEOARROW_TYPE_BOX_Z:
257+
case GEOARROW_TYPE_BOX_M:
258+
case GEOARROW_TYPE_BOX_ZM:
259+
NANOARROW_RETURN_NOT_OK(GeoArrowArrayViewSetArrayBox(array_view, array, error));
260+
break;
221261
default:
222262
NANOARROW_RETURN_NOT_OK(
223263
GeoArrowArrayViewSetArrayInternal(array_view, array, error, 0));
@@ -469,10 +509,84 @@ static GeoArrowErrorCode GeoArrowArrayViewVisitMultipolygon(
469509
return GEOARROW_OK;
470510
}
471511

512+
static GeoArrowErrorCode GeoArrowArrayViewVisitBox(
513+
const struct GeoArrowArrayView* array_view, int64_t offset, int64_t length,
514+
struct GeoArrowVisitor* v) {
515+
// We aren't going to attempt Z, M, or ZM boxes since there is no canonical
516+
// way to do this (maybe only if the non-XY dimensions are constant?).
517+
if (array_view->schema_view.dimensions != GEOARROW_DIMENSIONS_XY) {
518+
GeoArrowErrorSet(v->error, "Can't visit box with non-XY dimensions");
519+
return ENOTSUP;
520+
}
521+
522+
// These are the polygon coords and the arrays to back them
523+
struct GeoArrowCoordView poly_coords;
524+
memset(&poly_coords, 0, sizeof(struct GeoArrowCoordView));
525+
526+
int n_dim = array_view->coords.n_values / 2;
527+
double x[5];
528+
double y[5];
529+
poly_coords.n_values = n_dim;
530+
poly_coords.n_coords = 5;
531+
poly_coords.coords_stride = 1;
532+
poly_coords.values[0] = x;
533+
poly_coords.values[1] = y;
534+
535+
// index into each box coord's values[] for each polygon coordinate
536+
int box_coord_poly_map_x[] = {0, n_dim, n_dim, 0, 0};
537+
int box_coord_poly_map_y[] = {1, 1, n_dim + 1, n_dim + 1, 1};
538+
539+
for (int64_t i = 0; i < length; i++) {
540+
int64_t raw_offset = array_view->offset[0] + offset + i;
541+
NANOARROW_RETURN_NOT_OK(v->feat_start(v));
542+
if (!array_view->validity_bitmap ||
543+
ArrowBitGet(array_view->validity_bitmap, raw_offset)) {
544+
// Check for empty dimensions
545+
int n_empty_dims = 0;
546+
for (int i = 0; i < n_dim; i++) {
547+
double dim_min = GEOARROW_COORD_VIEW_VALUE(&array_view->coords, raw_offset, i);
548+
double dim_max =
549+
GEOARROW_COORD_VIEW_VALUE(&array_view->coords, raw_offset, n_dim + i);
550+
n_empty_dims += dim_min > dim_max;
551+
}
552+
553+
NANOARROW_RETURN_NOT_OK(v->geom_start(v, GEOARROW_GEOMETRY_TYPE_POLYGON,
554+
array_view->schema_view.dimensions));
555+
556+
// If any dimension has a negative range, we consider the polygon empty
557+
// (i.e., there are no points for which...)
558+
if (n_empty_dims == 0) {
559+
// Populate the polygon coordinates
560+
for (int i = 0; i < 5; i++) {
561+
x[i] = GEOARROW_COORD_VIEW_VALUE(&array_view->coords, raw_offset,
562+
box_coord_poly_map_x[i]);
563+
y[i] = GEOARROW_COORD_VIEW_VALUE(&array_view->coords, raw_offset,
564+
box_coord_poly_map_y[i]);
565+
}
566+
567+
// Call the visitor
568+
NANOARROW_RETURN_NOT_OK(v->ring_start(v));
569+
NANOARROW_RETURN_NOT_OK(v->coords(v, &poly_coords));
570+
NANOARROW_RETURN_NOT_OK(v->ring_end(v));
571+
}
572+
573+
NANOARROW_RETURN_NOT_OK(v->geom_end(v));
574+
} else {
575+
NANOARROW_RETURN_NOT_OK(v->null_feat(v));
576+
}
577+
578+
NANOARROW_RETURN_NOT_OK(v->feat_end(v));
579+
}
580+
581+
return GEOARROW_OK;
582+
}
583+
472584
GeoArrowErrorCode GeoArrowArrayViewVisit(const struct GeoArrowArrayView* array_view,
473585
int64_t offset, int64_t length,
474586
struct GeoArrowVisitor* v) {
475587
switch (array_view->schema_view.geometry_type) {
588+
case GEOARROW_GEOMETRY_TYPE_BOX:
589+
return GeoArrowArrayViewVisitBox(array_view, offset, length, v);
476590
case GEOARROW_GEOMETRY_TYPE_POINT:
477591
return GeoArrowArrayViewVisitPoint(array_view, offset, length, v);
478592
case GEOARROW_GEOMETRY_TYPE_LINESTRING:

src/geoarrow/array_view_test.cc

Lines changed: 113 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11

2+
#include <cmath>
3+
24
#include <gtest/gtest.h>
35

46
#include <geoarrow.h>
@@ -27,8 +29,15 @@ TEST_P(TypeParameterizedTestFixture, ArrayViewTestInitType) {
2729
EXPECT_EQ(array_view.validity_bitmap, nullptr);
2830
EXPECT_EQ(array_view.n_offsets, kNumOffsets[array_view.schema_view.geometry_type]);
2931
EXPECT_EQ(array_view.coords.n_coords, 0);
30-
EXPECT_EQ(array_view.coords.n_values,
31-
kNumDimensions[array_view.schema_view.dimensions]);
32+
33+
if (array_view.schema_view.geometry_type == GEOARROW_GEOMETRY_TYPE_BOX) {
34+
EXPECT_EQ(array_view.coords.n_values,
35+
kNumDimensions[array_view.schema_view.dimensions] * 2);
36+
37+
} else {
38+
EXPECT_EQ(array_view.coords.n_values,
39+
kNumDimensions[array_view.schema_view.dimensions]);
40+
}
3241

3342
if (array_view.schema_view.coord_type == GEOARROW_COORD_TYPE_SEPARATE) {
3443
EXPECT_EQ(array_view.coords.coords_stride, 1);
@@ -71,6 +80,8 @@ TEST_P(TypeParameterizedTestFixture, ArrayViewTestInitEmptyArray) {
7180
INSTANTIATE_TEST_SUITE_P(
7281
ArrayViewTest, TypeParameterizedTestFixture,
7382
::testing::Values(
83+
GEOARROW_TYPE_BOX, GEOARROW_TYPE_BOX_Z, GEOARROW_TYPE_BOX_M, GEOARROW_TYPE_BOX_ZM,
84+
7485
GEOARROW_TYPE_POINT, GEOARROW_TYPE_LINESTRING, GEOARROW_TYPE_POLYGON,
7586
GEOARROW_TYPE_MULTIPOINT, GEOARROW_TYPE_MULTILINESTRING,
7687
GEOARROW_TYPE_MULTIPOLYGON,
@@ -106,19 +117,36 @@ TEST(ArrayViewTest, ArrayViewTestSetArrayErrors) {
106117
struct GeoArrowArrayView array_view;
107118
struct GeoArrowError error;
108119
struct ArrowArray array;
120+
struct ArrowArray dummy_childx;
121+
struct ArrowArray* children[] = {&dummy_childx, &dummy_childx, &dummy_childx,
122+
&dummy_childx};
109123

110-
ASSERT_EQ(GeoArrowArrayViewInitFromType(&array_view, GEOARROW_TYPE_POINT), GEOARROW_OK);
124+
ASSERT_EQ(GeoArrowArrayViewInitFromType(&array_view, GEOARROW_TYPE_BOX), GEOARROW_OK);
125+
array.offset = 0;
126+
array.n_children = 1;
127+
EXPECT_EQ(GeoArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
128+
EXPECT_STREQ(error.message,
129+
"Unexpected number of children for box array struct in "
130+
"GeoArrowArrayViewSetArray()");
131+
132+
array.n_children = 4;
133+
array.children = reinterpret_cast<struct ArrowArray**>(children);
134+
dummy_childx.n_buffers = 1;
135+
EXPECT_EQ(GeoArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
136+
EXPECT_STREQ(error.message,
137+
"Unexpected number of buffers for box array child in "
138+
"GeoArrowArrayViewSetArray()");
111139

140+
ASSERT_EQ(GeoArrowArrayViewInitFromType(&array_view, GEOARROW_TYPE_POINT), GEOARROW_OK);
141+
array.n_children = 0;
142+
array.children = nullptr;
112143
array.offset = 0;
113144
array.n_children = 1;
114145
EXPECT_EQ(GeoArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
115146
EXPECT_STREQ(error.message,
116147
"Unexpected number of children for struct coordinate array in "
117148
"GeoArrowArrayViewSetArray()");
118149

119-
struct ArrowArray dummy_childx;
120-
struct ArrowArray dummy_childy;
121-
struct ArrowArray* children[] = {&dummy_childx, &dummy_childy};
122150
array.n_children = 2;
123151
array.children = reinterpret_cast<struct ArrowArray**>(children);
124152
dummy_childx.n_buffers = 1;
@@ -169,6 +197,85 @@ TEST(ArrayViewTest, ArrayViewTestSetInterleavedArrayErrors) {
169197
"GeoArrowArrayViewSetArray()");
170198
}
171199

200+
TEST(ArrayViewTest, ArrayViewTestSetArrayValidBox) {
201+
struct ArrowSchema schema;
202+
struct ArrowArray array;
203+
enum GeoArrowType type = GEOARROW_TYPE_BOX;
204+
205+
// Build the array for [BOX (0 1 => 2 3), BOX EMPTY, null]
206+
ASSERT_EQ(GeoArrowSchemaInit(&schema, type), GEOARROW_OK);
207+
ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), GEOARROW_OK);
208+
ASSERT_EQ(ArrowArrayStartAppending(&array), GEOARROW_OK);
209+
210+
ASSERT_EQ(ArrowArrayAppendDouble(array.children[0], 0), GEOARROW_OK);
211+
ASSERT_EQ(ArrowArrayAppendDouble(array.children[1], 1), GEOARROW_OK);
212+
ASSERT_EQ(ArrowArrayAppendDouble(array.children[2], 2), GEOARROW_OK);
213+
ASSERT_EQ(ArrowArrayAppendDouble(array.children[3], 3), GEOARROW_OK);
214+
ASSERT_EQ(ArrowArrayFinishElement(&array), GEOARROW_OK);
215+
216+
ASSERT_EQ(ArrowArrayAppendDouble(array.children[0], INFINITY), GEOARROW_OK);
217+
ASSERT_EQ(ArrowArrayAppendDouble(array.children[1], INFINITY), GEOARROW_OK);
218+
ASSERT_EQ(ArrowArrayAppendDouble(array.children[2], -INFINITY), GEOARROW_OK);
219+
ASSERT_EQ(ArrowArrayAppendDouble(array.children[3], -INFINITY), GEOARROW_OK);
220+
ASSERT_EQ(ArrowArrayFinishElement(&array), GEOARROW_OK);
221+
222+
ASSERT_EQ(ArrowArrayAppendNull(&array, 1), GEOARROW_OK);
223+
224+
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), GEOARROW_OK);
225+
226+
// Set the array view
227+
struct GeoArrowArrayView array_view;
228+
EXPECT_EQ(GeoArrowArrayViewInitFromType(&array_view, type), GEOARROW_OK);
229+
EXPECT_EQ(GeoArrowArrayViewSetArray(&array_view, &array, nullptr), GEOARROW_OK);
230+
231+
// Check its contents
232+
EXPECT_EQ(array_view.length[0], 3);
233+
EXPECT_TRUE(ArrowBitGet(array_view.validity_bitmap, 0));
234+
EXPECT_TRUE(ArrowBitGet(array_view.validity_bitmap, 1));
235+
EXPECT_FALSE(ArrowBitGet(array_view.validity_bitmap, 2));
236+
EXPECT_EQ(array_view.coords.n_values, 4);
237+
EXPECT_EQ(array_view.coords.n_coords, 3);
238+
EXPECT_EQ(array_view.coords.values[0][0], 0);
239+
EXPECT_EQ(array_view.coords.values[1][0], 1);
240+
EXPECT_EQ(array_view.coords.values[2][0], 2);
241+
EXPECT_EQ(array_view.coords.values[3][0], 3);
242+
243+
EXPECT_EQ(array_view.coords.values[0][1], INFINITY);
244+
EXPECT_EQ(array_view.coords.values[1][1], INFINITY);
245+
EXPECT_EQ(array_view.coords.values[2][1], -INFINITY);
246+
EXPECT_EQ(array_view.coords.values[3][1], -INFINITY);
247+
248+
WKXTester tester;
249+
EXPECT_EQ(GeoArrowArrayViewVisit(&array_view, 0, array.length, tester.WKTVisitor()),
250+
GEOARROW_OK);
251+
auto values = tester.WKTValues("<null value>");
252+
ASSERT_EQ(values.size(), 3);
253+
EXPECT_EQ(values[0], "POLYGON ((0 1, 2 1, 2 3, 0 3, 0 1))");
254+
EXPECT_EQ(values[1], "POLYGON EMPTY");
255+
EXPECT_EQ(values[2], "<null value>");
256+
257+
schema.release(&schema);
258+
array.release(&array);
259+
}
260+
261+
TEST(ArrayViewTest, ArrayViewTestSetArrayValidBoxNonXY) {
262+
struct ArrowSchema schema;
263+
struct ArrowArray array;
264+
struct GeoArrowError error;
265+
266+
ASSERT_EQ(GeoArrowSchemaInit(&schema, GEOARROW_TYPE_BOX_Z), GEOARROW_OK);
267+
struct GeoArrowArrayView array_view;
268+
ASSERT_EQ(GeoArrowArrayViewInitFromType(&array_view, GEOARROW_TYPE_BOX_Z), GEOARROW_OK);
269+
270+
struct GeoArrowVisitor v;
271+
GeoArrowVisitorInitVoid(&v);
272+
v.error = &error;
273+
ASSERT_EQ(GeoArrowArrayViewVisit(&array_view, 0, 0, &v), ENOTSUP);
274+
ASSERT_STREQ(error.message, "Can't visit box with non-XY dimensions");
275+
276+
ArrowSchemaRelease(&schema);
277+
}
278+
172279
TEST(ArrayViewTest, ArrayViewTestSetArrayValidPoint) {
173280
struct ArrowSchema schema;
174281
struct ArrowArray array;

src/geoarrow/builder.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ struct BuilderPrivate {
2222
// Depending on what exactly is being built, these pointers
2323
// might be NULL.
2424
struct ArrowBitmap* validity;
25-
struct ArrowBuffer* buffers[8];
25+
struct ArrowBuffer* buffers[9];
2626

2727
// Fields to keep track of state when using the visitor pattern
2828
int visitor_initialized;
@@ -389,6 +389,7 @@ static void GeoArrowSetArrayLengthFromBufferLength(struct GeoArrowSchemaView* sc
389389

390390
int coord_level;
391391
switch (schema_view->geometry_type) {
392+
case GEOARROW_GEOMETRY_TYPE_BOX:
392393
case GEOARROW_GEOMETRY_TYPE_POINT:
393394
coord_level = 0;
394395
break;
@@ -448,6 +449,7 @@ static void GeoArrowSetCoordContainerLength(struct GeoArrowBuilder* builder) {
448449
}
449450

450451
switch (builder->view.schema_view.geometry_type) {
452+
case GEOARROW_GEOMETRY_TYPE_BOX:
451453
case GEOARROW_GEOMETRY_TYPE_POINT:
452454
private
453455
->array.length = private->array.children[0]->length / scale;

0 commit comments

Comments
 (0)