Skip to content

Commit 6e5c476

Browse files
committed
[Variant] Support varint's typed_value for DataType::LargeUtf8
1 parent 0502733 commit 6e5c476

File tree

3 files changed

+35
-4
lines changed

3 files changed

+35
-4
lines changed

parquet-variant-compute/src/variant_array.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,11 @@ fn typed_value_to_variant<'a>(
967967
let value = array.value(index);
968968
Ok(Variant::from(value))
969969
}
970+
DataType::LargeUtf8 => {
971+
let array = typed_value.as_string::<i64>();
972+
let value = array.value(index);
973+
Ok(Variant::from(value))
974+
}
970975
DataType::Int8 => {
971976
primitive_conversion_single_value!(Int8Type, typed_value, index)
972977
}
@@ -1165,14 +1170,14 @@ fn canonicalize_and_verify_data_type(data_type: &DataType) -> Result<Cow<'_, Dat
11651170
// Binary and string are allowed. Force Binary to BinaryView because that's what the parquet
11661171
// reader returns and what the rest of the variant code expects.
11671172
Binary => Cow::Owned(DataType::BinaryView),
1168-
BinaryView | Utf8 => borrow!(),
1173+
BinaryView | Utf8 | LargeUtf8 => borrow!(),
11691174

11701175
// UUID maps to 16-byte fixed-size binary; no other width is allowed
11711176
FixedSizeBinary(16) => borrow!(),
11721177
FixedSizeBinary(_) | FixedSizeList(..) => fail!(),
11731178

11741179
// We can _possibly_ allow (some of) these some day?
1175-
LargeBinary | LargeUtf8 | Utf8View | ListView(_) | LargeList(_) | LargeListView(_) => {
1180+
LargeBinary | Utf8View | ListView(_) | LargeList(_) | LargeListView(_) => {
11761181
fail!()
11771182
}
11781183

parquet-variant-compute/src/variant_get.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,8 @@ mod test {
311311
use arrow::array::{
312312
Array, ArrayRef, AsArray, BinaryViewArray, BooleanArray, Date32Array, Decimal32Array,
313313
Decimal64Array, Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int8Array,
314-
Int16Array, Int32Array, Int64Array, NullBuilder, StringArray, StructArray,
315-
Time64MicrosecondArray,
314+
Int16Array, Int32Array, Int64Array, LargeStringArray, NullBuilder, StringArray,
315+
StructArray, Time64MicrosecondArray,
316316
};
317317
use arrow::buffer::NullBuffer;
318318
use arrow::compute::CastOptions;
@@ -1302,6 +1302,25 @@ mod test {
13021302
])
13031303
);
13041304

1305+
perfectly_shredded_variant_array_fn!(large_utf8_gen_array, || {
1306+
LargeStringArray::from(vec![
1307+
"Arrow-parquet-variant",
1308+
"short-string",
1309+
"apache-arrow-rs-parquet-variant-string-value-the-length-is-bigger-than-sixty-three",
1310+
])
1311+
});
1312+
1313+
perfectly_shredded_to_arrow_primitive_test!(
1314+
large_utf8_test_as_large_utf8,
1315+
DataType::LargeUtf8,
1316+
large_utf8_gen_array,
1317+
LargeStringArray::from(vec![
1318+
"Arrow-parquet-variant",
1319+
"short-string",
1320+
"apache-arrow-rs-parquet-variant-string-value-the-length-is-bigger-than-sixty-three"
1321+
])
1322+
);
1323+
13051324
/// Return a VariantArray that represents a normal "shredded" variant
13061325
/// for the following example
13071326
///

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
6262
Time(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
6363
Date(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
6464
Utf8(VariantToUtf8ArrowRowBuilder<'a, i32>),
65+
LargeUtf8(VariantToUtf8ArrowRowBuilder<'a, i64>),
6566
}
6667

6768
/// Builder for converting variant values into strongly typed Arrow arrays.
@@ -104,6 +105,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
104105
Time(b) => b.append_null(),
105106
Date(b) => b.append_null(),
106107
Utf8(b) => b.append_null(),
108+
LargeUtf8(b) => b.append_null(),
107109
}
108110
}
109111

@@ -134,6 +136,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
134136
Time(b) => b.append_value(value),
135137
Date(b) => b.append_value(value),
136138
Utf8(b) => b.append_value(value),
139+
LargeUtf8(b) => b.append_value(value),
137140
}
138141
}
139142

@@ -164,6 +167,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
164167
Time(b) => b.finish(),
165168
Date(b) => b.finish(),
166169
Utf8(b) => b.finish(),
170+
LargeUtf8(b) => b.finish(),
167171
}
168172
}
169173
}
@@ -287,6 +291,9 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
287291
VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
288292
),
289293
DataType::Utf8 => Utf8(VariantToUtf8ArrowRowBuilder::new(cast_options, capacity)),
294+
DataType::LargeUtf8 => {
295+
LargeUtf8(VariantToUtf8ArrowRowBuilder::new(cast_options, capacity))
296+
}
290297
_ if data_type.is_primitive() => {
291298
return Err(ArrowError::NotYetImplemented(format!(
292299
"Primitive data_type {data_type:?} not yet implemented"

0 commit comments

Comments
 (0)