Skip to content

Commit 5ffe118

Browse files
committed
Fix bug in repartition of ranged files with order preseved and add tests
When the order needs to be preserved, the end ranges were not being set correctly. Leading to empty file ranges like (20, 20) instead of (20, 40). Found this bug while adding tests with less than complete ranges. Thank you Jeffrey Vo for this suggestion!
1 parent 53e0f95 commit 5ffe118

File tree

1 file changed

+49
-1
lines changed

1 file changed

+49
-1
lines changed

datafusion/datasource/src/file_groups.rs

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ impl FileGroupPartitioner {
337337

338338
let last_group = new_groups.len() - 1;
339339
let (mut range_start, file_end) = original_file.range();
340-
let mut range_end = range_size;
340+
let mut range_end = range_start + range_size;
341341
for (i, group_index) in new_groups.into_iter().enumerate() {
342342
let target_group = &mut file_groups[group_index];
343343
assert!(target_group.is_empty());
@@ -661,6 +661,26 @@ mod test {
661661
assert_partitioned_files(expected, actual);
662662
}
663663

664+
#[test]
665+
fn repartition_single_file_with_incomplete_range() {
666+
// Single file, single partition into multiple partitions
667+
let single_partition =
668+
vec![FileGroup::new(vec![pfile("a", 123).with_range(10, 100)])];
669+
670+
let actual = FileGroupPartitioner::new()
671+
.with_target_partitions(4)
672+
.with_repartition_file_min_size(10)
673+
.repartition_file_groups(&single_partition);
674+
675+
let expected = Some(vec![
676+
FileGroup::new(vec![pfile("a", 123).with_range(10, 33)]),
677+
FileGroup::new(vec![pfile("a", 123).with_range(33, 56)]),
678+
FileGroup::new(vec![pfile("a", 123).with_range(56, 79)]),
679+
FileGroup::new(vec![pfile("a", 123).with_range(79, 100)]),
680+
]);
681+
assert_partitioned_files(expected, actual);
682+
}
683+
664684
#[test]
665685
fn repartition_single_file_duplicated_with_range() {
666686
// Single file, two partitions into multiple partitions
@@ -936,6 +956,34 @@ mod test {
936956
assert_partitioned_files(expected, actual);
937957
}
938958

959+
#[test]
960+
fn repartition_ordered_one_large_one_small_file_with_non_full_range() {
961+
// "Rebalance" the single large file across empty partitions, but can't split
962+
// small file
963+
let source_partitions = vec![
964+
FileGroup::new(vec![pfile("a", 100).with_range(20, 80)]),
965+
FileGroup::new(vec![pfile("b", 30).with_range(5, 25)]),
966+
];
967+
968+
let actual = FileGroupPartitioner::new()
969+
.with_preserve_order_within_groups(true)
970+
.with_target_partitions(4)
971+
.with_repartition_file_min_size(10)
972+
.repartition_file_groups(&source_partitions);
973+
974+
let expected = Some(vec![
975+
// scan first third of "a"
976+
FileGroup::new(vec![pfile("a", 100).with_range(20, 40)]),
977+
// only b in this group (can't split this)
978+
FileGroup::new(vec![pfile("b", 30).with_range(5, 25)]),
979+
// second third of "a"
980+
FileGroup::new(vec![pfile("a", 100).with_range(40, 60)]),
981+
// final third of "a"
982+
FileGroup::new(vec![pfile("a", 100).with_range(60, 80)]),
983+
]);
984+
assert_partitioned_files(expected, actual);
985+
}
986+
939987
#[test]
940988
fn repartition_ordered_two_large_files() {
941989
// "Rebalance" two large files across empty partitions, but can't mix them

0 commit comments

Comments
 (0)