Skip to content

Commit

Permalink
feat: ignore infer_schema_length and read whole file at eager mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Banyc committed Dec 12, 2023
1 parent e56f126 commit f56078f
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "dfsql"
version = "0.3.1"
version = "0.3.2"
edition = "2021"
description = "SQL REPL for Data Frames"
license = "MIT"
Expand Down
21 changes: 16 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,20 @@ pub struct Cli {
}

impl Cli {
fn infer_schema_length(&self) -> Option<usize> {
let lazy = self.sql.is_some() || self.lazy;
match lazy {
true => Some(self.infer_schema_length),
false => None,
}
}

pub fn run(self) -> anyhow::Result<()> {
let mut df = read_df_file(&self.input, self.infer_schema_length)?;
let mut df = read_df_file(&self.input, self.infer_schema_length())?;
let mut others = HashMap::new();
for other in &self.join {
let (name, path) = other.split_once(',').context("name,path")?;
let df = read_df_file(path, self.infer_schema_length)?;
let df = read_df_file(path, self.infer_schema_length())?;
others.insert(name.to_string(), df);
}
if let Some(sql_file) = &self.sql {
Expand Down Expand Up @@ -182,7 +190,10 @@ impl Cli {
}
}

fn read_df_file(path: impl AsRef<Path>, infer_schema_length: usize) -> anyhow::Result<LazyFrame> {
fn read_df_file(
path: impl AsRef<Path>,
infer_schema_length: Option<usize>,
) -> anyhow::Result<LazyFrame> {
let Some(extension) = path.as_ref().extension() else {
bail!(
"No extension at the name of the file `{}`",
Expand All @@ -192,14 +203,14 @@ fn read_df_file(path: impl AsRef<Path>, infer_schema_length: usize) -> anyhow::R
Ok(match extension.to_string_lossy().as_ref() {
"csv" => LazyCsvReader::new(&path)
.has_header(true)
.with_infer_schema_length(Some(infer_schema_length))
.with_infer_schema_length(infer_schema_length)
.finish()?,
"json" => {
let file = std::fs::File::options().read(true).open(&path)?;
JsonReader::new(file).finish()?.lazy()
}
"ndjson" | "jsonl" => LazyJsonLineReader::new(&path)
.with_infer_schema_length(Some(infer_schema_length))
.with_infer_schema_length(infer_schema_length)
.finish()?,
_ => bail!(
"Unknown extension `{}` at the name of the file `{}`",
Expand Down

0 comments on commit f56078f

Please sign in to comment.