Skip to content

Commit

Permalink
fix: add a compute intensive reproduction
Browse files Browse the repository at this point in the history
The default batch size is too large to build an on-demand "large table"
so the test is ignored by default

Signed-off-by: R. Tyler Croy <rtyler@brokenco.de>
  • Loading branch information
rtyler committed Oct 25, 2024
1 parent 264a0ec commit e19e21f
Showing 1 changed file with 62 additions and 1 deletion.
63 changes: 62 additions & 1 deletion crates/core/src/protocol/checkpoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,10 +357,10 @@ fn parquet_bytes_from_state(
let buf = serde_json::to_string(&j?).unwrap();
let _ = decoder.decode(buf.as_bytes())?;

total_actions += 1;
while let Some(batch) = decoder.flush()? {
writer.write(&batch)?;
}
total_actions += 1;
}

let _ = writer.close()?;
Expand Down Expand Up @@ -1095,4 +1095,65 @@ mod tests {
}
});
}

#[ignore = "This test is only useful if the batch size has been made small"]
#[tokio::test]
async fn test_checkpoint_large_table() -> crate::DeltaResult<()> {
use crate::writer::test_utils::get_arrow_schema;

let table_schema = get_delta_schema();
let temp_dir = tempfile::tempdir()?;
let table_path = temp_dir.path().to_str().unwrap();
let mut table = DeltaOps::try_from_uri(&table_path)
.await?
.create()
.with_columns(table_schema.fields().cloned())
.await
.unwrap();
assert_eq!(table.version(), 0);
let count = 20;

for _ in 0..count {
table.load().await?;
let batch = RecordBatch::try_new(
Arc::clone(&get_arrow_schema(&None)),
vec![
Arc::new(arrow::array::StringArray::from(vec!["A", "B", "C", "C"])),
Arc::new(arrow::array::Int32Array::from(vec![0, 20, 10, 100])),
Arc::new(arrow::array::StringArray::from(vec![
"2021-02-02",
"2021-02-03",
"2021-02-02",
"2021-02-04",
])),
],
)
.unwrap();
let _ = DeltaOps(table.clone()).write(vec![batch]).await?;
}

table.load().await?;
assert_eq!(table.version(), count, "Expected {count} transactions");
let pre_checkpoint_actions = table.snapshot()?.file_actions()?;

let before = table.version();
let res = create_checkpoint(&table).await;
assert!(res.is_ok(), "Failed to create the checkpoint! {res:#?}");

let table = crate::open_table(&table_path).await?;
assert_eq!(
before,
table.version(),
"Why on earth did a checkpoint creata version?"
);

let post_checkpoint_actions = table.snapshot()?.file_actions()?;

assert_eq!(
pre_checkpoint_actions.len(),
post_checkpoint_actions.len(),
"The number of actions read from the table after checkpointing is wrong!"
);
Ok(())
}
}

0 comments on commit e19e21f

Please sign in to comment.