diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 7967084aa7ab..2d1bc675aaaa 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -498,7 +498,7 @@ impl BooleanArray { /// /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). /// I.e. that `size_hint().1` correctly reports its length. Note that this is a stronger - /// guarantee that `ExactSizeIterator` provides which could still report a wrong length. + /// guarantee than `ExactSizeIterator` provides, which could still report a wrong length. /// /// # Panics /// @@ -511,8 +511,9 @@ impl BooleanArray { pub unsafe fn from_trusted_len_iter(iter: I) -> Self where P: Into, - I: ExactSizeIterator, + I: IntoIterator, { + let iter = iter.into_iter(); let data_len = iter.len(); let num_bytes = bit_util::ceil(data_len, 8); @@ -715,7 +716,7 @@ mod tests { let expected = v.clone().into_iter().collect::(); let actual = unsafe { // SAFETY: `v` has trusted length - BooleanArray::from_trusted_len_iter(v.into_iter()) + BooleanArray::from_trusted_len_iter(v) }; assert_eq!(expected, actual); } diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index a5bbd0e664d6..62970e2f1fe9 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1458,18 +1458,24 @@ impl>> FromIterator for P impl PrimitiveArray { /// Creates a [`PrimitiveArray`] from an iterator of trusted length. + /// /// # Safety + /// /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). - /// I.e. that `size_hint().1` correctly reports its length. + /// I.e. that `size_hint().1` correctly reports its length. Note that this is a stronger + /// guarantee than `ExactSizeIterator` provides, which could still report a wrong length. + /// + /// # Panics + /// + /// Panics if the iterator does not report an upper bound on `size_hint()`. #[inline] pub unsafe fn from_trusted_len_iter(iter: I) -> Self where P: std::borrow::Borrow::Native>>, - I: IntoIterator, + I: IntoIterator, { let iterator = iter.into_iter(); - let (_, upper) = iterator.size_hint(); - let len = upper.expect("trusted_len_unzip requires an upper limit"); + let len = iterator.len(); let (null, buffer) = unsafe { trusted_len_unzip(iterator) }; diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs index 275aa8c9e56a..165fd3b42fc0 100644 --- a/arrow-array/src/builder/boolean_builder.rs +++ b/arrow-array/src/builder/boolean_builder.rs @@ -237,7 +237,7 @@ impl Extend> for BooleanBuilder { let buffered = iter.into_iter().collect::>(); let array = unsafe { // SAFETY: std::vec::IntoIter implements TrustedLen - BooleanArray::from_trusted_len_iter(buffered.into_iter()) + BooleanArray::from_trusted_len_iter(buffered) }; self.append_array(&array) } diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs index 77696ae0d8cc..8194a15d34dd 100644 --- a/arrow-cast/src/cast/string.rs +++ b/arrow-cast/src/cast/string.rs @@ -87,7 +87,7 @@ pub(crate) fn parse_string_view( fn parse_string_iter< 'a, P: Parser, - I: Iterator>, + I: ExactSizeIterator>, F: FnOnce() -> Option, >( iter: I, @@ -156,7 +156,7 @@ pub(crate) fn cast_view_to_timestamp( fn cast_string_to_timestamp_impl< 'a, - I: Iterator>, + I: ExactSizeIterator>, T: ArrowTimestampType, Tz: TimeZone, >( @@ -310,7 +310,7 @@ fn cast_string_to_interval_impl<'a, I, ArrowType, F>( parse_function: F, ) -> Result where - I: Iterator>, + I: ExactSizeIterator>, ArrowType: ArrowPrimitiveType, F: Fn(&str) -> Result + Copy, { diff --git a/arrow/benches/array_from.rs b/arrow/benches/array_from.rs index 575a8280f652..b50fa62489ea 100644 --- a/arrow/benches/array_from.rs +++ b/arrow/benches/array_from.rs @@ -206,13 +206,15 @@ fn array_from_vec_benchmark(c: &mut Criterion) { }); } -fn gen_option_vector(item: TItem, len: usize) -> Vec> { - hint::black_box( - repeat_n(item, len) - .enumerate() - .map(|(idx, item)| if idx % 3 == 0 { None } else { Some(item) }) - .collect(), - ) +fn gen_option_iter( + item: TItem, + len: usize, +) -> Box>> { + hint::black_box(Box::new(repeat_n(item, len).enumerate().map( + |(idx, item)| { + if idx % 3 == 0 { None } else { Some(item) } + }, + ))) } fn from_iter_benchmark(c: &mut Criterion) { @@ -220,26 +222,26 @@ fn from_iter_benchmark(c: &mut Criterion) { // All ArrowPrimitiveType use the same implementation c.bench_function("Int64Array::from_iter", |b| { - let values = gen_option_vector(1, ITER_LEN); - b.iter(|| hint::black_box(Int64Array::from_iter(values.iter()))); + b.iter(|| hint::black_box(Int64Array::from_iter(gen_option_iter(1, ITER_LEN)))); }); c.bench_function("Int64Array::from_trusted_len_iter", |b| { - let values = gen_option_vector(1, ITER_LEN); b.iter(|| unsafe { - // SAFETY: values.iter() is a TrustedLenIterator - hint::black_box(Int64Array::from_trusted_len_iter(values.iter())) + // SAFETY: gen_option_iter returns a TrustedLen iterator + hint::black_box(Int64Array::from_trusted_len_iter(gen_option_iter( + 1, ITER_LEN, + ))) }); }); c.bench_function("BooleanArray::from_iter", |b| { - let values = gen_option_vector(true, ITER_LEN); - b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter()))); + b.iter(|| hint::black_box(BooleanArray::from_iter(gen_option_iter(true, ITER_LEN)))); }); c.bench_function("BooleanArray::from_trusted_len_iter", |b| { - let values = gen_option_vector(true, ITER_LEN); b.iter(|| unsafe { - // SAFETY: values.iter() is a TrustedLenIterator - hint::black_box(BooleanArray::from_trusted_len_iter(values.iter())) + // SAFETY: gen_option_iter returns a TrustedLen iterator + hint::black_box(BooleanArray::from_trusted_len_iter(gen_option_iter( + true, ITER_LEN, + ))) }); }); }