Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add KDTreeMetadata to Python API #101

Merged
merged 1 commit into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions python/DEVELOP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
To run the docs locally:

```
uv sync --no-install-package geoindex-rs
uv run --no-project maturin develop
uv run --no-project mkdocs serve
```
67 changes: 57 additions & 10 deletions python/python/geoindex_rs/kdtree.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@ def range(
Results are the insertion indexes of items that match the query.

Args:
index: the KDTree to search
min_x: The `min_x` coordinate of the query bounding box
min_y: The `min_y` coordinate of the query bounding box
max_x: The `max_x` coordinate of the query bounding box
max_y: The `max_y` coordinate of the query bounding box
index: the KDTree to search.
min_x: The `min_x` coordinate of the query bounding box.
min_y: The `min_y` coordinate of the query bounding box.
max_x: The `max_x` coordinate of the query bounding box.
max_y: The `max_y` coordinate of the query bounding box.

Returns:
An Arrow array with the insertion indexes of query results.
A uint32-typed Arrow array with the insertion indexes of query results.
"""

def within(
index: IndexLike,
qx: int | float,
Expand All @@ -55,13 +56,13 @@ def within(
Results are the insertion indexes of items that match the query.

Args:
index: the KDTree to search
qx: The `x` coordinate of the query point
qy: The `y` coordinate of the query point
index: the KDTree to search.
qx: The `x` coordinate of the query point.
qy: The `y` coordinate of the query point.
r: The radius from the query point to use for searching.

Returns:
An Arrow array with the insertion indexes of query results.
A uint32-typed Arrow array with the insertion indexes of query results.
"""

class KDTreeBuilder:
Expand Down Expand Up @@ -149,3 +150,49 @@ class KDTree(Buffer):
object.
"""
def __repr__(self) -> str: ...

class KDTreeMetadata:
"""Common metadata to describe a KDTree.

This can be used to know the number of items, node information, or total byte size
of a KDTree.

Additionally, this can be used to know how much memory a KDTree **would use** with
the given number of items and node size. A KDTree with 1 million items and a node
size of 64 (the default) would take up 20 MiB.

```py
from geoindex_rs import kdtree as kd

metadata = kd.KDTreeMetadata(num_items=1_000_000, node_size=64)
assert metadata.num_bytes == 20_000_008
```
"""

def __init__(
self,
num_items: int,
node_size: int = 64,
coord_type: Literal["float32", "float64"] = "float64",
) -> None:
"""Create a new KDTreeMetadata given a number of items and node size.

Args:
num_items: The number of items in the tree
node_size: The node size of the tree. Defaults to 16.
coord_type: The coordinate type to use in the tree. Currently only float32
and float64 are permitted. Defaults to None.
"""
@classmethod
def from_index(cls, index: IndexLike) -> KDTreeMetadata:
"""Create from an existing KDTree buffer."""
def __repr__(self) -> str: ...
@property
def num_items(self) -> int:
"""The number of items indexed in the tree."""
@property
def node_size(self) -> int:
"""The maximum number of items per node."""
@property
def num_bytes(self) -> int:
"""The number of bytes that a KDTree with this metadata would have."""
14 changes: 11 additions & 3 deletions python/python/geoindex_rs/rtree.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def boxes_at_level(index: IndexLike, level: int) -> Array:
The returned array is a a zero-copy view from Rust. Note that it will keep
the entire index memory alive until the returned array is garbage collected.
"""

def tree_join(
left: IndexLike,
right: IndexLike,
Expand Down Expand Up @@ -150,6 +151,7 @@ def partitions(index: IndexLike) -> RecordBatch:
index. Therefore, the `indices` array will have type `uint16` if the tree
has fewer than 16,384 items; otherwise it will have type `uint32`.
"""

def partition_boxes(index: IndexLike) -> RecordBatch:
"""Extract the geometries of the spatial partitions from an RTree.

Expand All @@ -174,6 +176,7 @@ def partition_boxes(index: IndexLike) -> RecordBatch:
data. The `partition_id` column will be `uint16` type if there are less than
65,536 partitions; otherwise it will be `uint32` type.
"""

def search(
index: IndexLike,
min_x: int | float,
Expand All @@ -199,7 +202,7 @@ def search(
class RTreeMetadata:
"""Common metadata to describe an RTree.

This can be used to know the number of items, node informatino, or total byte size
This can be used to know the number of items, node information, or total byte size
of an RTree.

Additionally, this can be used to know how much memory an RTree **would use** with
Expand Down Expand Up @@ -335,7 +338,9 @@ class RTreeBuilder:
It's important to add _arrays_ at a time. This should usually not be called in a loop.

Args:
min_x: array-like input
min_x: array-like input. If this is the only provided input, it should
represent the entire bounding box, as described above. Otherwise, pass
four separate parameters.
min_y: array-like input. Defaults to None.
max_x: array-like input. Defaults to None.
max_y: array-like input. Defaults to None.
Expand All @@ -347,7 +352,10 @@ class RTreeBuilder:
"""Sort the internal index and convert this class to an RTree instance.

Args:
method: The method used for sorting the RTree. `"hilbert"` will use a [Hilbert Curve](https://en.wikipedia.org/wiki/Hilbert_R-tree#Packed_Hilbert_R-trees) for sorting; `"str"` will use the [Sort-Tile-Recursive](https://ia600900.us.archive.org/27/items/nasa_techdoc_19970016975/19970016975.pdf) algorithm. Defaults to `"hilbert"`.
method: The method used for sorting the RTree. Defaults to `"hilbert"`.

- `"hilbert"` will use a [Hilbert Curve](https://en.wikipedia.org/wiki/Hilbert_R-tree#Packed_Hilbert_R-trees) for sorting.
- `"str"` will use the [Sort-Tile-Recursive](https://ia600900.us.archive.org/27/items/nasa_techdoc_19970016975/19970016975.pdf) algorithm.

Returns:
An immutable RTree instance, which can be used for spatial queries.
Expand Down
101 changes: 101 additions & 0 deletions python/src/kdtree/metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
use geo_index::kdtree::{KDTreeIndex, KDTreeMetadata, DEFAULT_KDTREE_NODE_SIZE};
use pyo3::prelude::*;
use pyo3::types::PyType;

use crate::coord_type::CoordType;
use crate::kdtree::input::PyKDTreeRef;

pub(crate) enum PyKDTreeMetadataInner {
Float32(KDTreeMetadata<f32>),
Float64(KDTreeMetadata<f64>),
}

impl PyKDTreeMetadataInner {
fn node_size(&self) -> u16 {
match self {
Self::Float32(meta) => meta.node_size(),
Self::Float64(meta) => meta.node_size(),
}
}

fn num_items(&self) -> u32 {
match self {
Self::Float32(meta) => meta.num_items(),
Self::Float64(meta) => meta.num_items(),
}
}

fn num_bytes(&self) -> usize {
match self {
Self::Float32(meta) => meta.data_buffer_length(),
Self::Float64(meta) => meta.data_buffer_length(),
}
}
}

#[pyclass(name = "KDTreeMetadata")]
pub struct PyKDTreeMetadata(PyKDTreeMetadataInner);

#[pymethods]
impl PyKDTreeMetadata {
#[new]
#[pyo3(signature = (num_items, node_size = DEFAULT_KDTREE_NODE_SIZE, coord_type = None))]
fn new(num_items: u32, node_size: u16, coord_type: Option<CoordType>) -> Self {
let coord_type = coord_type.unwrap_or(CoordType::Float64);
match coord_type {
CoordType::Float32 => Self(PyKDTreeMetadataInner::Float32(KDTreeMetadata::<f32>::new(
num_items, node_size,
))),
CoordType::Float64 => Self(PyKDTreeMetadataInner::Float64(KDTreeMetadata::<f64>::new(
num_items, node_size,
))),
}
}

#[classmethod]
fn from_index(_cls: &Bound<PyType>, index: PyKDTreeRef) -> PyResult<Self> {
match index {
PyKDTreeRef::Float32(tree) => {
Ok(Self(PyKDTreeMetadataInner::Float32(*tree.metadata())))
}
PyKDTreeRef::Float64(tree) => {
Ok(Self(PyKDTreeMetadataInner::Float64(*tree.metadata())))
}
}
}

fn __repr__(&self) -> String {
format!(
"KDTreeMetadata(num_items={}, node_size={})",
self.0.num_items(),
self.0.node_size()
)
}

#[getter]
fn node_size(&self) -> u16 {
self.0.node_size()
}

#[getter]
fn num_items(&self) -> u32 {
self.0.num_items()
}

#[getter]
fn num_bytes(&self) -> usize {
self.0.num_bytes()
}
}

impl From<KDTreeMetadata<f32>> for PyKDTreeMetadata {
fn from(value: KDTreeMetadata<f32>) -> Self {
Self(PyKDTreeMetadataInner::Float32(value))
}
}

impl From<KDTreeMetadata<f64>> for PyKDTreeMetadata {
fn from(value: KDTreeMetadata<f64>) -> Self {
Self(PyKDTreeMetadataInner::Float64(value))
}
}
2 changes: 2 additions & 0 deletions python/src/kdtree/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod builder;
mod input;
mod metadata;
mod range;
mod within;

Expand All @@ -19,6 +20,7 @@ pub fn register_kdtree_module(

child_module.add_class::<builder::PyKDTree>()?;
child_module.add_class::<builder::PyKDTreeBuilder>()?;
child_module.add_class::<metadata::PyKDTreeMetadata>()?;
child_module.add_wrapped(wrap_pyfunction!(range::range))?;
child_module.add_wrapped(wrap_pyfunction!(within::within))?;

Expand Down
16 changes: 8 additions & 8 deletions python/src/rtree/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl PyRTreeMetadataInner {
}
}

fn data_buffer_length(&self) -> usize {
fn num_bytes(&self) -> usize {
match self {
Self::Float32(meta) => meta.data_buffer_length(),
Self::Float64(meta) => meta.data_buffer_length(),
Expand All @@ -57,12 +57,12 @@ impl PyRTreeMetadata {
fn new(num_items: u32, node_size: u16, coord_type: Option<CoordType>) -> Self {
let coord_type = coord_type.unwrap_or(CoordType::Float64);
match coord_type {
CoordType::Float32 => Self(PyRTreeMetadataInner::Float32(
geo_index::rtree::RTreeMetadata::<f32>::new(num_items, node_size),
)),
CoordType::Float64 => Self(PyRTreeMetadataInner::Float64(
geo_index::rtree::RTreeMetadata::<f64>::new(num_items, node_size),
)),
CoordType::Float32 => Self(PyRTreeMetadataInner::Float32(RTreeMetadata::<f32>::new(
num_items, node_size,
))),
CoordType::Float64 => Self(PyRTreeMetadataInner::Float64(RTreeMetadata::<f64>::new(
num_items, node_size,
))),
}
}

Expand Down Expand Up @@ -113,7 +113,7 @@ impl PyRTreeMetadata {

#[getter]
fn num_bytes(&self) -> usize {
self.0.data_buffer_length()
self.0.num_bytes()
}
}

Expand Down
Loading