feat: improved resiliance

zleyyij · Apr 25, 2024 · 4b09a27 · 4b09a27
1 parent 2a76d14
commit 4b09a27
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 28 deletions.
diff --git a/src/scripts/parser/src/lib.rs b/src/scripts/parser/src/lib.rs
@@ -2,12 +2,10 @@
 mod lexer;
 mod parser;
 
-use encoding::all::{UTF_16BE, UTF_16LE, UTF_8};
-use encoding::codec::utf_8::from_utf8;
-use encoding::{all::ISO_8859_1, Encoding};
+use encoding::all::UTF_8;
+use encoding::Encoding;
 use lexer::lexer::lex_csv;
 use parser::parser::deserialize_csv;
-use std::borrow::Cow;
 use std::collections::HashMap;
 use wasm_bindgen::prelude::*;
 
@@ -20,33 +18,34 @@ pub fn parse_csv_wasm(raw_csv: &[u8]) -> JsValue {
 
 fn parse_csv(raw_csv: &[u8]) -> HashMap<String, Vec<f64>> {
     // translate the csv from ISO-8859-1 to a UTF 8 strings
-    let transcoded_csv =
-        transcode_csv(raw_csv);
+    let transcoded_csv = transcode_csv(raw_csv);
     let lexed_csv: Vec<Vec<&str>> = lex_csv(&transcoded_csv).unwrap();
     let parsed_csv: HashMap<String, Vec<f64>> = deserialize_csv(lexed_csv);
     return parsed_csv;
 }
 
 /// Ever since HWINFO 8.0 (<https://www.hwinfo.com/version-history/>), logs are encoded with
-/// unicode, switching from ISO-8559-1 encoding. 
+/// unicode, switching from ISO-8559-1 encoding.
 /// Take a buffer of presumably UTF-8 encoded bytes, and transcode them to a standard rust String.
 #[inline]
 fn transcode_csv(unencoded_csv: &[u8]) -> String {
     // see if it's valid utf 8, for some reason the encoding crate handles this better than the standard library's implementation
-    match UTF_16BE.decode(unencoded_csv, encoding::DecoderTrap::Strict) {
+    match UTF_8.decode(unencoded_csv, encoding::DecoderTrap::Strict) {
         Ok(s) => return s,
         Err(e) => {
-            console_log!("warning: the provided file is not valid UTF 8: interpreting with UTF-8 failed with error {e:?}, falling back to UTF-8 with replacement");
+            console_log!("warning: The provided file is not valid UTF 8: interpreting with UTF-8 failed with error {e:?}, falling back to UTF-8 with replacement.");
             // match ISO_8859_1.decode(iso_8859_1_csv, encoding::DecoderTrap::Strict) {
             //     Ok(s) => return s,
             //     Err(e) => {
             //         console_log!("Unable to interpret as ISO-8559-1, falling back to UTF-8 with replacement, may god help us all (failed with {e:?}");
             //         return UTF_8.decode(iso_8859_1_csv, encoding::DecoderTrap::Replace).unwrap();
             //     }
             // this is fine because Replace should be infallible(within reason)
-            return UTF_8.decode(unencoded_csv, encoding::DecoderTrap::Replace).unwrap()
-            }
+            return UTF_8
+                .decode(unencoded_csv, encoding::DecoderTrap::Replace)
+                .unwrap();
         }
+    }
     // }
     // if let Err(e) = UTF_8.decode(iso_8859_1_csv, encoding::DecoderTrap::Strict) {
     //     console_log!("Warning: input file contains invalid UTF-8: {e:?}");
@@ -66,7 +65,6 @@ mod tests {
     use std::fs::File;
     use std::io::Read;
 
-    // ISO-8859-1 encoding is really close to UTF-8, so more general characters are fine
     fn gen_mock_csv(num_rows: usize, num_columns: usize) -> (String, HashMap<String, Vec<f64>>) {
         // todo!()
         let mock_spreadsheet: Vec<Vec<RefCell<String>>> =
@@ -135,9 +133,9 @@ mod tests {
     #[test]
     fn parse_csv_from_file() {
         // TODO
-        let mut file_handle = File::open("/Users/arc/Downloads/help.csv").unwrap();
-        let mut file_vec = Vec::new();
-        file_handle.read_to_end(&mut file_vec).unwrap();
-        parse_csv(&file_vec);
+        // let mut file_handle = File::open("/Users/arc/Downloads/HWINFO.CSV").unwrap();
+        // let mut file_vec = Vec::new();
+        // file_handle.read_to_end(&mut file_vec).unwrap();
+        // parse_csv(&file_vec);
     }
 }
diff --git a/src/scripts/parser/src/parser.rs b/src/scripts/parser/src/parser.rs
@@ -79,14 +79,7 @@ pub mod parser {
                     processed_column.push(parsed_val);
                 }
                 Err(_) => {
-                    #[cfg(wasm)]
-                    {
-                        console_log!("Failed to parse entry {entry} into a float, skipping");
-                    }
-                    #[cfg(not(wasm))]
-                    {
-                        println!("Failed to parse entry {entry} into a float, skipping column {}", column[0]);
-                    }
+                    console_log!("Failed to parse entry {entry} into a float, skipping");
                 }
             }
         }
@@ -102,8 +95,8 @@ pub mod parser {
                 // .chars().count() is used here instead of .len() because .len()
                 // breaks for multibyte chars
                 [insertion_key.chars().count() - 3..]
-                .iter()
-                .collect::<String>()
+                    .iter()
+                    .collect::<String>()
             } else {
                 insertion_key.to_string()
             };
@@ -167,13 +160,18 @@ pub mod parser {
     /// in a hwinfo csv, translate to another vec of vectors, where
     /// the output is a list of columns
     fn rows_to_columns(input: Vec<Vec<&str>>) -> Vec<Vec<&str>> {
+        // I don't know how to make it warn that the last column is malformed only once
+        let mut sent_warning = false;
         let mut columnar_input: Vec<Vec<&str>> =
             vec![Vec::with_capacity(input.len()); input[0].len()];
         for row in input {
             for (index, item) in row.iter().enumerate() {
                 // sometimes there are other columns that are longer than the first column
                 if columnar_input.len() - 1 < index {
-                    console_log!("The last column in this CSV file is malformed, skipping. Please open in a spreadsheet to view");
+                    if !sent_warning {
+                        console_log!("warning: The last column in this CSV file is malformed, skipping. Please open in a spreadsheet to view.");
+                        sent_warning = true;
+                    }
                 } else {
                     columnar_input[index].push(*item);
                 }
@@ -293,7 +291,6 @@ pub mod parser {
             expected_output.insert("🦆 (2)".to_owned(), vec![2.0, 2.0]);
 
             assert_eq!(deserialize_csv(mock_csv), expected_output);
-
         }
     }
 }