diff --git a/Cargo.lock b/Cargo.lock index 72f59b77c..d2e29f60c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -457,9 +457,9 @@ checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" [[package]] name = "arrow" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7104b9e9761613ae92fe770c741d6bbf1dbc791a0fe204400aebdd429875741" +checksum = "04a8801ebb147ad240b2d978d3ab9f73c9ccd4557ba6a03e7800496770ed10e0" dependencies = [ "ahash 0.8.3", "arrow-arith", @@ -479,9 +479,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38e597a8e8efb8ff52c50eaf8f4d85124ce3c1bf20fab82f476d73739d9ab1c2" +checksum = "895263144bd4a69751cbe6a34a53f26626e19770b313a9fa792c415cd0e78f11" dependencies = [ "arrow-array", "arrow-buffer", @@ -494,9 +494,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a86d9c1473db72896bd2345ebb6b8ad75b8553ba390875c76708e8dc5c5492d" +checksum = "226fdc6c3a4ae154a74c24091d36a90b514f0ed7112f5b8322c1d8f354d8e20d" dependencies = [ "ahash 0.8.3", "arrow-buffer", @@ -510,19 +510,20 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234b3b1c8ed00c874bf95972030ac4def6f58e02ea5a7884314388307fb3669b" +checksum = "fc4843af4dd679c2f35b69c572874da8fde33be53eb549a5fb128e7a4b763510" dependencies = [ + "bytes", "half 2.3.1", "num", ] [[package]] name = "arrow-cast" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22f61168b853c7faea8cea23a2169fdff9c82fb10ae5e2c07ad1cab8f6884931" +checksum = "35e8b9990733a9b635f656efda3c9b8308c7a19695c9ec2c7046dd154f9b144b" dependencies = [ "arrow-array", "arrow-buffer", @@ -537,9 +538,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b545c114d9bf8569c84d2fbe2020ac4eea8db462c0a37d0b65f41a90d066fe" +checksum = "646fbb4e11dd0afb8083e883f53117713b8caadb4413b3c9e63e3f535da3683c" dependencies = [ "arrow-array", "arrow-buffer", @@ -556,9 +557,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6b6852635e7c43e5b242841c7470606ff0ee70eef323004cacc3ecedd33dd8f" +checksum = "da900f31ff01a0a84da0572209be72b2b6f980f3ea58803635de47913191c188" dependencies = [ "arrow-buffer", "arrow-schema", @@ -568,9 +569,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a66da9e16aecd9250af0ae9717ae8dd7ea0d8ca5a3e788fe3de9f4ee508da751" +checksum = "2707a8d7ee2d345d045283ece3ae43416175873483e5d96319c929da542a0b1f" dependencies = [ "arrow-array", "arrow-buffer", @@ -584,9 +585,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60ee0f9d8997f4be44a60ee5807443e396e025c23cf14d2b74ce56135cb04474" +checksum = "5d1b91a63c356d14eedc778b76d66a88f35ac8498426bb0799a769a49a74a8b4" dependencies = [ "arrow-array", "arrow-buffer", @@ -604,9 +605,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcab05410e6b241442abdab6e1035177dc082bdb6f17049a4db49faed986d63" +checksum = "584325c91293abbca7aaaabf8da9fe303245d641f5f4a18a6058dc68009c7ebf" dependencies = [ "arrow-array", "arrow-buffer", @@ -619,9 +620,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91a847dd9eb0bacd7836ac63b3475c68b2210c2c96d0ec1b808237b973bd5d73" +checksum = "0e32afc1329f7b372463b21c6ca502b07cf237e1ed420d87706c1770bb0ebd38" dependencies = [ "ahash 0.8.3", "arrow-array", @@ -634,15 +635,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54df8c47918eb634c20e29286e69494fdc20cafa5173eb6dad49c7f6acece733" +checksum = "b104f5daa730f00fde22adc03a12aa5a2ae9ccbbf99cbd53d284119ddc90e03d" [[package]] name = "arrow-select" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "941dbe481da043c4bd40c805a19ec2fc008846080c4953171b62bcad5ee5f7fb" +checksum = "73b3ca55356d1eae07cf48808d8c462cea674393ae6ad1e0b120f40b422eb2b4" dependencies = [ "arrow-array", "arrow-buffer", @@ -653,9 +654,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "359b2cd9e071d5a3bcf44679f9d85830afebc5b9c98a08019a570a65ae933e0f" +checksum = "af1433ce02590cae68da0a18ed3a3ed868ffac2c6f24c533ddd2067f7ee04b4a" dependencies = [ "arrow-array", "arrow-buffer", @@ -942,9 +943,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.3.3" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" [[package]] name = "block-buffer" @@ -2130,11 +2131,11 @@ dependencies = [ [[package]] name = "gdal" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7691fc763e24396e3e0a7853573273cf2fc786798bf624db3162f3174536521" +checksum = "6639365794fa1f35f36f8cd3e2ea21c94c9b04aa8d9ee0d0f3324621efeb0800" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.0", "chrono", "gdal-sys", "geo-types", @@ -2241,6 +2242,8 @@ name = "geoengine-datatypes" version = "0.7.0" dependencies = [ "arrow", + "arrow-array", + "arrow-ord", "async-trait", "bytes", "chrono", @@ -4727,7 +4730,7 @@ version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.0", "errno", "libc", "linux-raw-sys 0.4.3", diff --git a/datatypes/Cargo.toml b/datatypes/Cargo.toml index 3330781a1..aee8eb8e7 100644 --- a/datatypes/Cargo.toml +++ b/datatypes/Cargo.toml @@ -9,15 +9,17 @@ edition = "2021" pro = [] [dependencies] -arrow = { version = "45.0", features = [ +arrow = { version = "46.0", features = [ "ipc_compression", # TODO: activate SIMD when stable ] } +arrow-array = "46.0" +arrow-ord = "46.0" async-trait = "0.1" bytes = "1.0" # for postgres-types impls chrono = "0.4" float-cmp = "0.9" -gdal = "0.15" +gdal = "0.16" geo-types = "0.7" geo = "0.26" geojson = "0.24" diff --git a/datatypes/src/collections/feature_collection.rs b/datatypes/src/collections/feature_collection.rs index 0e7256a06..c633c6622 100644 --- a/datatypes/src/collections/feature_collection.rs +++ b/datatypes/src/collections/feature_collection.rs @@ -7,13 +7,14 @@ use arrow::{ }, buffer::Buffer, }; +use arrow_array::{Date64Array, Float64Array, Int64Array, Scalar, StringArray}; use serde::{Deserialize, Serialize}; use serde_json::Map; use snafu::ensure; use std::collections::hash_map; use std::collections::{HashMap, HashSet}; -use std::convert::{TryFrom, TryInto}; +use std::convert::TryFrom; use std::marker::PhantomData; use std::ops::{Bound, RangeBounds}; use std::rc::Rc; @@ -433,58 +434,38 @@ where match column_type { FeatureDataType::Float => { - apply_filters( + apply_filters::, _, _>( as_primitive_array::(column), &mut filter_array, ranges, - arrow::compute::gt_eq_scalar, - arrow::compute::gt_scalar, - arrow::compute::lt_eq_scalar, - arrow::compute::lt_scalar, )?; } FeatureDataType::Int => { - apply_filters( + apply_filters::, _, _>( as_primitive_array::(column), &mut filter_array, ranges, - arrow::compute::gt_eq_scalar, - arrow::compute::gt_scalar, - arrow::compute::lt_eq_scalar, - arrow::compute::lt_scalar, )?; } FeatureDataType::Text => { - apply_filters( + apply_filters::, _, _>( as_string_array(column), &mut filter_array, ranges, - arrow::compute::gt_eq_utf8_scalar, - arrow::compute::gt_utf8_scalar, - arrow::compute::lt_eq_utf8_scalar, - arrow::compute::lt_utf8_scalar, )?; } FeatureDataType::Bool => { - apply_filters( + apply_filters::, _, _>( as_boolean_array(column), &mut filter_array, ranges, - arrow::compute::gt_eq_bool_scalar, - arrow::compute::gt_bool_scalar, - arrow::compute::lt_eq_bool_scalar, - arrow::compute::lt_bool_scalar, )?; } FeatureDataType::DateTime => { - apply_filters( + apply_filters::, _, _>( as_primitive_array::(column), &mut filter_array, ranges, - arrow::compute::gt_eq_scalar, - arrow::compute::gt_scalar, - arrow::compute::lt_eq_scalar, - arrow::compute::lt_scalar, )?; } FeatureDataType::Category => { @@ -1379,15 +1360,22 @@ fn update_filter_array( fn apply_filter_on_bound<'b, T, A>( bound: Bound<&'b FeatureDataValue>, array: &'b A, - included_fn: fn(&'b A, T) -> Result, - excluded_fn: fn(&'b A, T) -> Result, + included_fn: fn( + &dyn arrow_array::Datum, + &dyn arrow_array::Datum, + ) -> Result, + excluded_fn: fn( + &dyn arrow_array::Datum, + &dyn arrow_array::Datum, + ) -> Result, ) -> Result> where - T: TryFrom<&'b FeatureDataValue, Error = error::FeatureCollectionError>, + T: TryFrom<&'b FeatureDataValue, Error = error::FeatureCollectionError> + arrow_array::Datum, + A: arrow_array::Array, { Ok(match bound { - Bound::Included(v) => Some(included_fn(array, v.try_into()?)?), - Bound::Excluded(v) => Some(excluded_fn(array, v.try_into()?)?), + Bound::Included(v) => Some(included_fn(array, &T::try_from(v)?)?), + Bound::Excluded(v) => Some(excluded_fn(array, &T::try_from(v)?)?), Bound::Unbounded => None, }) } @@ -1396,29 +1384,26 @@ fn apply_filters<'b, T, A, R>( column: &'b A, filter_array: &mut Option, ranges: &'b [R], - included_lower_fn: fn(&'b A, T) -> Result, - excluded_lower_fn: fn(&'b A, T) -> Result, - included_upper_fn: fn(&'b A, T) -> Result, - excluded_upper_fn: fn(&'b A, T) -> Result, ) -> Result<()> where - T: TryFrom<&'b FeatureDataValue, Error = error::FeatureCollectionError>, + T: TryFrom<&'b FeatureDataValue, Error = error::FeatureCollectionError> + arrow_array::Datum, R: RangeBounds, + A: arrow_array::Array, { for range in ranges { update_filter_array( filter_array, - apply_filter_on_bound( + apply_filter_on_bound::( range.start_bound(), column, - included_lower_fn, - excluded_lower_fn, + arrow_ord::cmp::gt_eq, + arrow_ord::cmp::gt, )?, - apply_filter_on_bound( + apply_filter_on_bound::( range.end_bound(), column, - included_upper_fn, - excluded_upper_fn, + arrow_ord::cmp::lt_eq, + arrow_ord::cmp::lt, )?, )?; } diff --git a/datatypes/src/collections/multi_point_collection.rs b/datatypes/src/collections/multi_point_collection.rs index 7956c560a..125f67cb5 100755 --- a/datatypes/src/collections/multi_point_collection.rs +++ b/datatypes/src/collections/multi_point_collection.rs @@ -1233,7 +1233,10 @@ mod tests { let sorted_collection = collection.sort_by_time_asc().unwrap(); - assert!(sorted_collection.chunks_equal_ignoring_cache_hint(&expected_collection)); + assert!( + sorted_collection.chunks_equal_ignoring_cache_hint(&expected_collection), + "expected: {expected_collection:#?}\nactual: {sorted_collection:#?}" + ); } #[test] diff --git a/datatypes/src/primitives/feature_data.rs b/datatypes/src/primitives/feature_data.rs index 699bb5f56..b3b356016 100644 --- a/datatypes/src/primitives/feature_data.rs +++ b/datatypes/src/primitives/feature_data.rs @@ -3,6 +3,7 @@ use crate::primitives::TimeInstance; use crate::raster::RasterDataType; use crate::util::Result; use arrow::buffer::NullBuffer; +use arrow_array::{BooleanArray, Date64Array, Float64Array, Int64Array, StringArray}; use gdal::vector::OGRFieldType; use num_traits::AsPrimitive; use serde::{Deserialize, Serialize}; @@ -1303,6 +1304,78 @@ impl TryFrom<&FeatureDataValue> for TimeInstance { } } +impl TryFrom<&FeatureDataValue> for arrow_array::Scalar { + type Error = crate::collections::FeatureCollectionError; + + fn try_from( + value: &FeatureDataValue, + ) -> Result, Self::Error> { + Ok(match value { + FeatureDataValue::Float(v) | FeatureDataValue::NullableFloat(Some(v)) => { + Float64Array::new_scalar(*v) + } + _ => return Err(crate::collections::FeatureCollectionError::WrongDataType), + }) + } +} + +impl TryFrom<&FeatureDataValue> for arrow_array::Scalar { + type Error = crate::collections::FeatureCollectionError; + + fn try_from(value: &FeatureDataValue) -> Result, Self::Error> { + Ok(match value { + FeatureDataValue::Int(v) | FeatureDataValue::NullableInt(Some(v)) => { + Int64Array::new_scalar(*v) + } + FeatureDataValue::DateTime(v) | FeatureDataValue::NullableDateTime(Some(v)) => { + Int64Array::new_scalar(v.inner()) + } + _ => return Err(crate::collections::FeatureCollectionError::WrongDataType), + }) + } +} + +impl TryFrom<&FeatureDataValue> for arrow_array::Scalar { + type Error = crate::collections::FeatureCollectionError; + + fn try_from(value: &FeatureDataValue) -> Result, Self::Error> { + Ok(match value { + FeatureDataValue::DateTime(v) | FeatureDataValue::NullableDateTime(Some(v)) => { + Date64Array::new_scalar(v.inner()) + } + _ => return Err(crate::collections::FeatureCollectionError::WrongDataType), + }) + } +} + +impl TryFrom<&FeatureDataValue> for arrow_array::Scalar { + type Error = crate::collections::FeatureCollectionError; + + fn try_from(value: &FeatureDataValue) -> Result, Self::Error> { + Ok(match value { + FeatureDataValue::Text(v) | FeatureDataValue::NullableText(Some(v)) => { + StringArray::new_scalar(v.clone()) + } + _ => return Err(crate::collections::FeatureCollectionError::WrongDataType), + }) + } +} + +impl TryFrom<&FeatureDataValue> for arrow_array::Scalar { + type Error = crate::collections::FeatureCollectionError; + + fn try_from( + value: &FeatureDataValue, + ) -> Result, Self::Error> { + Ok(match value { + FeatureDataValue::Bool(v) | FeatureDataValue::NullableBool(Some(v)) => { + BooleanArray::new_scalar(*v) + } + _ => return Err(crate::collections::FeatureCollectionError::WrongDataType), + }) + } +} + #[cfg(test)] mod tests { use crate::{ diff --git a/datatypes/tests/example-arrow.rs b/datatypes/tests/example-arrow.rs index 24f87dd0d..1b995fb73 100755 --- a/datatypes/tests/example-arrow.rs +++ b/datatypes/tests/example-arrow.rs @@ -5,9 +5,11 @@ use arrow::array::{ UInt64Array, UInt64Builder, }; use arrow::buffer::Buffer; -use arrow::compute::gt_eq_scalar; use arrow::compute::kernels::filter::filter; use arrow::datatypes::{DataType, Field}; +use arrow_array::builder::Int64Builder; +use arrow_array::cast::AsArray; +use arrow_ord::cmp::gt_eq; use geoengine_datatypes::primitives::{Coordinate2D, TimeInterval}; use std::sync::Arc; use std::{mem, slice}; @@ -698,9 +700,51 @@ fn gt_eq_example() { // dbg!(&a); - let b = gt_eq_scalar(&a, 2).unwrap(); + let b = gt_eq(&a, &Int32Array::new_scalar(2)).unwrap(); // dbg!(&b); assert_eq!(&b, &BooleanArray::from(vec![Some(false), Some(true), None])); } + +#[test] +fn sort_example() { + let a = { + let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2); + + for value in [[1, 5], [0, 3], [1, 3]] { + builder.values().append_slice(&value); + builder.append(true); + } + + builder.finish() + }; + + // dbg!(&a); + + let sort_options = Some(arrow::compute::SortOptions { + descending: false, + nulls_first: false, + }); + + let sort_indices = arrow::compute::sort_to_indices(&a, sort_options, None).unwrap(); + + let array_ref = arrow::compute::take(&a, &sort_indices, None).unwrap(); + + let b: &FixedSizeListArray = array_ref.as_fixed_size_list(); + + // dbg!(&b); + + let c = { + let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2); + + for value in [[0, 3], [1, 3], [1, 5]] { + builder.values().append_slice(&value); + builder.append(true); + } + + builder.finish() + }; + + assert_eq!(b, &c); +} diff --git a/operators/Cargo.toml b/operators/Cargo.toml index 18a406904..460914019 100644 --- a/operators/Cargo.toml +++ b/operators/Cargo.toml @@ -10,14 +10,14 @@ pro = ["geoengine-datatypes/pro"] xgboost = ["dep:xgboost-rs", "pro"] [dependencies] -arrow = { version = "45.0" } # TODO: activate SIMD when stable +arrow = { version = "46.0" } # TODO: activate SIMD when stable async-trait = "0.1" chrono = "0.4" crossbeam = "0.8" csv = "1.1" float-cmp = "0.9" futures = "0.3" -gdal = "0.15" +gdal = "0.16" gdal-sys = "0.9" geo = "0.26" geoengine-datatypes = { path = "../datatypes" } diff --git a/services/Cargo.toml b/services/Cargo.toml index d7cfa22ff..41d244c8f 100644 --- a/services/Cargo.toml +++ b/services/Cargo.toml @@ -33,7 +33,7 @@ flexi_logger = { version = "0.25", features = ["trc"] } float-cmp = "0.9" futures = "0.3" futures-util = "0.3" -gdal = "0.15.0" +gdal = "0.16" gdal-sys = "0.9" geo = "0.26" geoengine-datatypes = { path = "../datatypes" }