Add PNG support, fuzz, and pin 0.2.2

bovee · Sep 24, 2020 · 0cb22c4 · 0cb22c4
1 parent 8d691d2
commit 0cb22c4
Show file tree

Hide file tree

Showing 17 changed files with 556 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -18,6 +18,7 @@ other formats.
  - FASTA and FASTQ sequence formats
  - FCS flow cytometry format
  - Inficon Hapsite mass specotrometry format
+ - PNG image format
  - SAM and BAM alignment formats
  - Thermo continuous flow isotope mass spectrometry formats
  - TSV

diff --git a/entab-cli/Cargo.toml b/entab-cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "entab-cli"
-version = "0.2.1"
+version = "0.2.2"
 authors = ["Roderick <rbovee@gmail.com>"]
 edition = "2018"
 description = "Record-format file reader CLI"
@@ -11,7 +11,7 @@ categories = ["command-line-utilities", "parsing", "science"]
 
 [dependencies]
 clap = "3.0.0-beta"
-entab = { path = "../entab", version = "0.2.1" }
+entab = { path = "../entab", version = "0.2.2" }
 memchr = "2.3"
 memmap = { version = "0.7", optional = true }
 

diff --git a/entab-cli/src/main.rs b/entab-cli/src/main.rs
@@ -21,25 +21,25 @@ pub fn main() -> Result<(), EtError> {
         .author(crate_authors!())
         .version(crate_version!())
         .arg(
-            Arg::with_name("input")
+            Arg::new("input")
                 .short('i')
                 .about("Path to read; if not provided stdin will be used")
                 .takes_value(true),
         )
         .arg(
-            Arg::with_name("output")
+            Arg::new("output")
                 .short('o')
                 .about("Path to write to; if not provided stdout will be used")
                 .takes_value(true),
         )
         .arg(
-            Arg::with_name("parser")
+            Arg::new("parser")
                 .short('p')
                 .about("Parser to use [if not specified, file type will be auto-detected]")
                 .takes_value(true),
         )
         .arg(
-            Arg::with_name("metadata")
+            Arg::new("metadata")
                 .short('m')
                 .long("metadata")
                 .about("Reports metadata about the file instead of the data itself"),

diff --git a/entab-js/Cargo.toml b/entab-js/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "entab-js"
-version = "0.2.1"
+version = "0.2.2"
 authors = ["Roderick <rbovee@gmail.com>"]
 license = "MIT"
 description = "Record-format file reader"

diff --git a/entab-py/Cargo.toml b/entab-py/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "entab-py"
-version = "0.2.1"
+version = "0.2.2"
 authors = ["Roderick <rbovee@gmail.com>"]
 license = "MIT"
 description = "Record-format file reader"

diff --git a/entab/Cargo.toml b/entab/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "entab"
-version = "0.2.1"
+version = "0.2.2"
 authors = ["Roderick <rbovee@gmail.com>"]
 edition = "2018"
 description = "Record-format file reader"

diff --git a/entab/fuzz/Cargo.lock b/entab/fuzz/Cargo.lock
diff --git a/entab/src/filetype.rs b/entab/src/filetype.rs
@@ -89,6 +89,8 @@ pub enum FileType {
     /// "Log ASCII Standard" format for well log information
     Las,
     // catch all
+    /// Portable Network Graphics image format
+    Png,
     /// Generic scientific data format
     Hdf5,
     /// Tab-seperated value format
@@ -108,6 +110,7 @@ impl FileType {
                 b"FCS3.1  " => return FileType::Facs,
                 b"~VERSION" => return FileType::Las,
                 b"~Version" => return FileType::Las,
+                b"\x89PNG\r\n\x1A\n" => return FileType::Png,
                 b"\x89HDF\r\n\x1A\n" => return FileType::Hdf5,
                 b"\x04\x03\x02\x01SPAH" => return FileType::InficonHapsite,
                 b"\xAEZTR\x0D\x0A\x1A\x0A" => return FileType::Ztr,
@@ -177,6 +180,7 @@ impl FileType {
             FileType::MsRaw => &["raw"],
             FileType::MzXml => &["mzxml"],
             FileType::NetCdf => &["cdf"],
+            FileType::Png => &["png"],
             FileType::InficonHapsite => &["hps"],
             FileType::Sam => &["sam"],
             FileType::Scf => &["scf"],
@@ -200,6 +204,7 @@ impl FileType {
             FileType::Fasta => "fasta",
             FileType::Fastq => "fastq",
             FileType::InficonHapsite => "inficon",
+            FileType::Png => "png",
             FileType::Sam => "sam",
             FileType::ThermoCf => "thermo_cf",
             FileType::ThermoDxf => "thermo_dxf",

diff --git a/entab/src/readers/chemstation.rs b/entab/src/readers/chemstation.rs
@@ -125,9 +125,9 @@ fn get_metadata(header: &[u8]) -> Result<ChemstationMetadata, EtError> {
     // We need to detect the date format before we can convert into a
     // NaiveDateTime; not sure the format even maps to the file type
     // (it may be computer-dependent?)
-    let raw_run_date = str::from_utf8(&header[179..179 + run_date_len])?
-        .trim();
-    let run_date = if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d-%b-%y, %H:%M:%S") {
+    let raw_run_date = str::from_utf8(&header[179..179 + run_date_len])?.trim();
+    let run_date = if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d-%b-%y, %H:%M:%S")
+    {
         // format in MWD
         Some(d)
     } else if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d %b %y %l:%M %P") {

diff --git a/entab/src/readers/flow.rs b/entab/src/readers/flow.rs
@@ -1,4 +1,4 @@
-use alloc::borrow::Cow;
+use alloc::borrow::{Cow, ToOwned};
 use alloc::collections::BTreeMap;
 use alloc::string::{String, ToString};
 use alloc::vec::Vec;

diff --git a/entab/src/readers/inficon.rs b/entab/src/readers/inficon.rs
@@ -32,6 +32,9 @@ impl<'r> FromBuffer<'r> for InficonState {
         }
         let _ = rb.extract::<&[u8]>(148)?;
         let n_segments = rb.extract::<u32>(Endian::Little)? as usize;
+        if n_segments > 10000 {
+            return Err(EtError::new("Inficon file has too many segments"));
+        }
         // now read all of the collection segments
         let mut mz_segments = vec![Vec::new(); n_segments];
         for segment in mz_segments.iter_mut() {
@@ -42,6 +45,10 @@ impl<'r> FromBuffer<'r> for InficonState {
             for _ in 0..n_mzs {
                 let start_mz = rb.extract::<u32>(Endian::Little)?;
                 let end_mz = rb.extract::<u32>(Endian::Little)?;
+                if start_mz >= end_mz || end_mz >= 1e11 as u32 {
+                    // only malformed data should hit this
+                    return Err(EtError::new("m/z range is too big or invalid"));
+                }
                 // then dwell time (u32; microseconds) and three more u32s
                 let _ = rb.extract::<&[u8]>(16)?;
                 let i_type = rb.extract::<u32>(Endian::Little)?;
@@ -152,3 +159,74 @@ impl_reader!(
     InficonState,
     ()
 );
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn bad_inficon_fuzzes() -> Result<(), EtError> {
+        let data = [
+            4, 3, 2, 1, 83, 80, 65, 72, 66, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 246, 255, 255, 255, 0, 0,
+            0, 0, 14, 14, 14, 14, 14, 14, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+            248, 10, 10, 10, 10, 35, 4, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 62, 10, 10, 26, 0, 0,
+            0, 42, 42, 4, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 62, 10, 10, 10, 0, 0, 0, 0, 0, 0,
+            0, 16, 42, 42, 42, 10, 62, 10, 10, 26, 0, 0, 0, 42, 42, 4, 0, 0, 0, 0, 0, 0, 10, 10,
+            10, 10, 10, 62, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 16, 42, 42, 42,
+        ];
+        let buffer = ReadBuffer::from_slice(&data);
+        assert!(InficonReader::new(buffer, ()).is_err());
+
+        let data = [
+            4, 3, 2, 1, 83, 80, 65, 72, 4, 1, 10, 255, 255, 255, 0, 3, 197, 65, 77, 1, 62, 1, 0, 0,
+            255, 255, 255, 255, 255, 255, 62, 10, 10, 10, 10, 62, 10, 10, 10, 8, 10, 62, 10, 10,
+            62, 10, 10, 10, 9, 10, 62, 10, 10, 62, 10, 10, 62, 26, 10, 10, 10, 45, 10, 59, 9, 0,
+            255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 246, 255, 255, 255, 0, 0, 0, 0, 71, 71, 71, 71, 71, 38,
+            200, 62, 10, 255, 255, 255, 255, 169, 77, 86, 139, 139, 116, 116, 116, 116, 116, 246,
+            245, 245, 240, 255, 255, 241, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+            10, 10, 62, 10, 227, 205, 10, 10, 62, 10, 0, 62, 10, 10, 1, 0, 62, 10, 10, 34, 0, 0, 0,
+            0, 0, 0, 0, 10, 10, 10, 10, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+            10, 10, 245, 10, 10, 10, 10, 240, 10, 62, 10, 10, 10, 42, 10, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 134, 134, 14,
+            62, 10, 10, 62, 59, 42, 10, 10, 10, 62, 0, 13, 10, 10, 227, 10, 10, 62, 0, 13, 10, 10,
+            227, 59, 10, 10, 0, 10, 10, 62, 41, 0, 13, 10, 10, 10, 227, 10, 10, 62, 0, 13, 10, 10,
+            10, 62, 10, 10, 8, 10, 62, 10, 10, 10, 10, 10, 62, 10, 10, 10, 62, 10, 10, 10, 10, 62,
+            10, 10, 10, 9, 10, 62, 10, 10, 255, 255, 255, 175, 255, 255, 255, 255, 255, 255, 255,
+            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+            255, 255, 255, 10, 10, 10, 9, 10, 62, 45, 10, 59, 9, 0,
+        ];
+        let buffer = ReadBuffer::from_slice(&data);
+        assert!(InficonReader::new(buffer, ()).is_err());
+
+        let data = [
+            4, 3, 2, 1, 83, 80, 65, 72, 66, 65, 77, 1, 62, 1, 230, 255, 255, 251, 254, 254, 254,
+            254, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 0, 10, 62, 10, 59, 10, 10,
+            10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255,
+            255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 246, 255, 255, 255, 0, 0, 0, 0, 10, 10, 102, 13, 10, 35, 24, 10, 62, 13,
+            10, 13, 227, 5, 62, 10, 227, 134, 134, 10, 62, 10, 10, 62, 42, 10, 10, 10, 62, 0, 13,
+            10, 10, 227, 10, 10, 62, 0, 13, 10, 10, 227, 59, 10, 10, 250, 255, 10, 62, 41, 0, 13,
+            10, 10, 227, 43, 10, 10, 10, 10, 10, 10, 47, 59, 10, 10, 62, 0, 13, 10, 10, 227, 10,
+            10, 227, 59, 10, 10, 0, 10, 10, 10, 10, 26, 10, 10, 41, 0, 13, 10, 10, 227, 59, 10, 10,
+            10, 10, 10, 14, 10, 255, 255, 255, 255, 176, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 175, 255, 255, 255,
+            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+            255, 255, 255, 255, 245, 240, 255, 255, 255, 255, 255, 169, 77, 86, 139, 139, 116, 35,
+            116, 116, 116, 246, 245, 245, 240, 250, 255, 10, 62, 41, 0, 13, 10, 10, 227, 43, 10,
+            10, 10, 10, 10, 10, 47, 59, 10, 10, 4, 3, 2, 1, 83, 80, 181, 181, 181, 181, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255,
+            255, 255, 255, 255, 58, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 122, 255, 255, 255,
+            255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 246, 255, 255, 255, 0, 0, 0, 0, 59, 10, 10, 10, 10, 10, 14, 10, 255, 10,
+            10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 116, 116, 246, 245, 245, 240,
+        ];
+        let buffer = ReadBuffer::from_slice(&data);
+        assert!(InficonReader::new(buffer, ()).is_err());
+
+        Ok(())
+    }
+}
diff --git a/entab/src/readers/mod.rs b/entab/src/readers/mod.rs
@@ -20,6 +20,9 @@ pub mod flow;
 pub mod inficon;
 /// Reader for FASTA/FASTQ formats that parse into "kmers"
 pub mod kmers;
+/// Reader for PNG image format
+#[cfg(feature = "std")]
+pub mod png;
 /// Reader for BAM/SAM bioinformatics formats
 pub mod sam;
 /// Readers for Thermo mass spectral isotopic formats
@@ -42,6 +45,8 @@ pub fn get_reader<'r>(
         "fastq" => Box::new(fastq::FastqReader::new(rb, ())?),
         "fcs" => Box::new(flow::FcsReader::new(rb, ())?),
         "inficon" => Box::new(inficon::InficonReader::new(rb, ())?),
+        #[cfg(feature = "std")]
+        "png" => Box::new(png::PngReader::new(rb, ())?),
         "sam" => Box::new(sam::SamReader::new(rb, ())?),
         "thermo_cf" => Box::new(thermo_iso::ThermoCfReader::new(rb, ())?),
         "thermo_dxf" => Box::new(thermo_iso::ThermoDxfReader::new(rb, ())?),