diff --git a/b.csv b/b.csv new file mode 100644 index 0000000..17e2e13 --- /dev/null +++ b/b.csv @@ -0,0 +1,9 @@ +name, value, bool, date +"abc",0.0001,T,2021-01-01 +"abcde",0.001,F,2021-01-01 +"abcdefgh",0.01,T, +"abcdefghijkl",0.1,F,2021-01-01 +"",1,T,2021-01-01 +"abcdefghijklmnop",10,F,2021-01-01 +"",100,,2021-01-01 +"n/a","",F,2021-01-01 diff --git a/data/a.csv b/data/a.csv index 17e2e13..36fb9f3 100644 --- a/data/a.csv +++ b/data/a.csv @@ -1,9 +1,9 @@ -name, value, bool, date +name,value,bool,date "abc",0.0001,T,2021-01-01 "abcde",0.001,F,2021-01-01 -"abcdefgh",0.01,T, +"abcdefgh",0.01,True, "abcdefghijkl",0.1,F,2021-01-01 -"",1,T,2021-01-01 +"NA",1,T,2021-01-01 "abcdefghijklmnop",10,F,2021-01-01 "",100,,2021-01-01 "n/a","",F,2021-01-01 diff --git a/src/datatype.rs b/src/datatype.rs index ae155b1..f00e882 100644 --- a/src/datatype.rs +++ b/src/datatype.rs @@ -71,3 +71,11 @@ pub fn is_na_string(text: String) -> bool { return lgl; } +pub fn is_na_string_padded(text: String) -> bool { + lazy_static! { + static ref R: Regex = Regex::new(r"^$|^(?:N(?:(?:(?:one|AN|a[Nn]|/A)|[Aa])|ull)|n(?:ull|an?|/a?)|(?:missing))\s*$").unwrap(); + } + let lgl = R.is_match(&text); + return lgl; +} + diff --git a/src/main.rs b/src/main.rs index dbf55f8..10bbef5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,9 +4,6 @@ use itertools::Itertools; use owo_colors::OwoColorize; use std::io::{self}; use structopt::StructOpt; -//use std::io::Write; -//use tabwriter::TabWriter; -// // Nord // nord5 - white // .truecolor(216, 222, 233) @@ -20,7 +17,6 @@ use structopt::StructOpt; mod datatype; -/// Search for a pattern in a file and display the lines that contain it. #[derive(StructOpt)] #[structopt( name = "tv", @@ -28,7 +24,7 @@ mod datatype; " )] struct Cli { - // #[structopt(parse(from_os_str),short = "i", long = "input")] +// #[structopt(parse(from_os_str),short = "i", long = "input")] // input_csv_file_path: std::path::PathBuf, // #[structopt(short = "c",long = "col_types")] // column_types_override: String, @@ -36,19 +32,19 @@ struct Cli { fn infer_type_from_string(text: &str) -> &str { if datatype::is_time(text) { - return "ts-t"; + return ""; } else if datatype::is_integer(text) { - return "int"; + return ""; } else if datatype::is_date_time(text) { - return "ts-dt"; + return ""; } else if datatype::is_date(text) { - return "ts-d"; + return ""; } else if datatype::is_double(text) { - return "dbl"; + return ""; } else if datatype::is_logical(text) { - return "lgl"; + return ""; } else { - return "char"; + return ""; } } @@ -57,23 +53,23 @@ fn trunc_strings(vec_col: Vec<&str>, width: usize) -> Vec { let v = vec_col .into_iter() .map(String::from) + .map(|string| format_if_na(&string)) .map(|mut string| { if string.len() > width { - string.truncate(width); - [string, ellipsis.to_string()].join("") + string.truncate(width-1); + [string, ellipsis.to_string()].join(" ") } else { let l = string.len(); - let add_space = width-l; + let add_space = width-l+1; let owned_string: String = string.to_owned(); let borrowed_string: &str = &" ".repeat(add_space); - [string, owned_string].join(borrowed_string) + //[string, owned_string].join(borrowed_string) + [string, "".to_string()].join(borrowed_string) } }) - .map(|string| format_if_na(&string)) .collect::>(); return v; } - fn header_len(vec_col: Vec) -> Vec { let v = vec_col .into_iter() @@ -82,8 +78,17 @@ fn header_len(vec_col: Vec) -> Vec { .collect::>(); return v; } +fn header_len_str(vec_col: Vec<&str>) -> Vec { + let v = vec_col + .into_iter() + .map(String::from) + .map(|mut string| {string.len()}) + .collect::>(); + return v; +} fn format_if_na(text: &String) -> String { - let s = datatype::is_na(&text); + let s = datatype::is_na(text); + // todo add repeat strings for NA let missing_string_value: String = "NA".to_string(); let string: String = if s { missing_string_value @@ -161,13 +166,6 @@ fn main() { .map(|x| x.expect("a csv record")) .collect::>(); - // dataframe reader - // let rdr = csv::Reader::from_reader(io::stdin()) - // .records() - // .into_iter() - // .map(|x| x.expect("a csv record")) - // .collect::>(); - let cols: usize = rdr[0].len(); let rows: usize = rdr.len(); @@ -185,38 +183,38 @@ fn main() { vec_datatypes[i] = get_col_data_type(v[i].clone()); } - let mut vec_header: Vec<&str> = vec!["#"; cols as usize]; - for i in 0..cols { - vec_header[i] = v[i].get(0).unwrap(); - } - + // vector of formatted values + let mut vf: Vec> = vec![vec!["#".to_string(); rows as usize]; cols as usize]; - //println!("==================================================================="); - //println!("{:?}", vec_header); - //let header_tunc = trunc_strings(vec_header.clone(), 15); - //let vec_header_len = header_len(vec_header); - //println!("{:?}", vec_header_len); - //println!("==================================================================="); + // get max width in columns + let mut col_largest_width = Vec::new(); + for i in 0..cols{ + let size:usize = header_len_str(v[i].clone()).into_iter().max().unwrap(); + col_largest_width.push(size); + } - // --dtype debug - //println!("{:?}", vec_datatypes); + // format datatypes spaces + let mut vec_format_datatypes: Vec<_> = vec!["#"; cols as usize]; + //for i in 0..cols { + // let add_space = col_largest_width[i] - vec_datatypes[i].len(); + // let borrowed_string = " ".repeat(add_space); + // let string = vec_datatypes[i].to_string(); + //} - let mut vf: Vec> = vec![vec!["#".to_string(); rows as usize]; cols as usize]; - // make vector of formatted values for i in 0..cols{ - if vec_datatypes[i] == "char"{ - // println!("{:?}",trunc_strings(v[i].clone(),6)); - vf[i] = trunc_strings(v[i].clone(),12); - }else if vec_datatypes[i] == "dbl"{ - // println!("{:?}",prep_dbl(v[i].clone())); - vf[i] = prep_dbl(v[i].clone()); + if vec_datatypes[i] == ""{ + //vf[i] = (v[i].clone(),col_largest_width[i]); + vf[i] = trunc_strings(v[i].clone(),col_largest_width[i]); + }else if vec_datatypes[i] == ""{ + vf[i] = trunc_strings(v[i].clone(),col_largest_width[i]); + //vf[i] = prep_dbl(v[i].clone()); }else{ - // println!("{:?}",trunc_strings(v[i].clone(),6)); - vf[i] = trunc_strings(v[i].clone(),12); + vf[i] = trunc_strings(v[i].clone(),col_largest_width[i]); } } + println!(); let mut vp: Vec> = vec![vec!["#".to_string(); cols as usize]; rows as usize]; for col in 0..cols{ for row in 0..rows{ @@ -242,34 +240,40 @@ fn main() { return dbl } - // printing - let mut s = String::new(); - for i in 0..rows{ - let a = vp[i].join("\t").to_string() + "\n"; - s.push_str(&a); - } let meta_text = "tv dim:"; let div = "x"; + // meta + print!("{: <6}", ""); println!( - "\t{} {} {} {}", + "{} {} {} {}", meta_text.truecolor(143, 188, 187), (rows - 1).truecolor(143, 188, 187), div.truecolor(143, 188, 187), cols.truecolor(143, 188, 187), ); - // put col headers here - let vec_header_joined = vec_header.join(" "); - println!("\t\t{}",vec_header_joined.truecolor(216, 222, 233).bold()); + // header + print!("{: <6}", ""); + for col in 0..cols{ + let text = vp[0].get(col).unwrap().to_string(); + print!("{}",text.truecolor(216, 222, 233).bold()); + } + println!(); // datatypes - let vec_datatypes_joined = vec_datatypes.join("> <"); - println!("\t\t{}{}{}","<".truecolor(143, 188, 187).dimmed(),vec_datatypes_joined.truecolor(143, 188, 187).dimmed(),">".truecolor(143, 188, 187).dimmed()); - // dataframe - for row in 1..rows{ - print!("\t{} ",(row).truecolor(143, 188, 187).dimmed()); - for col in 0..cols{ + //print!("{: <6}", ""); + //for col in 0..cols{ + // let add_space = vec_datatypes[col].len() - col_largest_width[col]; + // let mut owned_string: String = vec_datatypes[col].to_string(); + // let borrowed_string: &str = &" ".repeat(add_space); + // owned_string.push_str(borrowed_string); + // print!("{}",owned_string.truecolor(143, 188, 187).bold()); + //} + println!(); + for row in 1..rows{ + print!("{: <6}",(row).truecolor(143, 188, 187).dimmed()); + for col in 0..cols{ let text = vp[row].get(col).unwrap().to_string(); - print!("\t{} ", - if datatype::is_na_string(vp[row].get(col).unwrap().to_string()){ + print!("{}", + if datatype::is_na_string_padded(vp[row].get(col).unwrap().to_string()){ text.truecolor(94, 129, 172) }else{ text.truecolor(216, 222, 233)