Skip to content

Commit

Permalink
Add PNG support, fuzz, and pin 0.2.2
Browse files Browse the repository at this point in the history
  • Loading branch information
bovee committed Sep 24, 2020
1 parent 8d691d2 commit 0cb22c4
Show file tree
Hide file tree
Showing 17 changed files with 556 additions and 16 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ other formats.
- FASTA and FASTQ sequence formats
- FCS flow cytometry format
- Inficon Hapsite mass specotrometry format
- PNG image format
- SAM and BAM alignment formats
- Thermo continuous flow isotope mass spectrometry formats
- TSV
Expand Down
4 changes: 2 additions & 2 deletions entab-cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "entab-cli"
version = "0.2.1"
version = "0.2.2"
authors = ["Roderick <rbovee@gmail.com>"]
edition = "2018"
description = "Record-format file reader CLI"
Expand All @@ -11,7 +11,7 @@ categories = ["command-line-utilities", "parsing", "science"]

[dependencies]
clap = "3.0.0-beta"
entab = { path = "../entab", version = "0.2.1" }
entab = { path = "../entab", version = "0.2.2" }
memchr = "2.3"
memmap = { version = "0.7", optional = true }

Expand Down
8 changes: 4 additions & 4 deletions entab-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,25 @@ pub fn main() -> Result<(), EtError> {
.author(crate_authors!())
.version(crate_version!())
.arg(
Arg::with_name("input")
Arg::new("input")
.short('i')
.about("Path to read; if not provided stdin will be used")
.takes_value(true),
)
.arg(
Arg::with_name("output")
Arg::new("output")
.short('o')
.about("Path to write to; if not provided stdout will be used")
.takes_value(true),
)
.arg(
Arg::with_name("parser")
Arg::new("parser")
.short('p')
.about("Parser to use [if not specified, file type will be auto-detected]")
.takes_value(true),
)
.arg(
Arg::with_name("metadata")
Arg::new("metadata")
.short('m')
.long("metadata")
.about("Reports metadata about the file instead of the data itself"),
Expand Down
2 changes: 1 addition & 1 deletion entab-js/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "entab-js"
version = "0.2.1"
version = "0.2.2"
authors = ["Roderick <rbovee@gmail.com>"]
license = "MIT"
description = "Record-format file reader"
Expand Down
2 changes: 1 addition & 1 deletion entab-py/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "entab-py"
version = "0.2.1"
version = "0.2.2"
authors = ["Roderick <rbovee@gmail.com>"]
license = "MIT"
description = "Record-format file reader"
Expand Down
2 changes: 1 addition & 1 deletion entab/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "entab"
version = "0.2.1"
version = "0.2.2"
authors = ["Roderick <rbovee@gmail.com>"]
edition = "2018"
description = "Record-format file reader"
Expand Down
39 changes: 38 additions & 1 deletion entab/fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions entab/src/filetype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ pub enum FileType {
/// "Log ASCII Standard" format for well log information
Las,
// catch all
/// Portable Network Graphics image format
Png,
/// Generic scientific data format
Hdf5,
/// Tab-seperated value format
Expand All @@ -108,6 +110,7 @@ impl FileType {
b"FCS3.1 " => return FileType::Facs,
b"~VERSION" => return FileType::Las,
b"~Version" => return FileType::Las,
b"\x89PNG\r\n\x1A\n" => return FileType::Png,
b"\x89HDF\r\n\x1A\n" => return FileType::Hdf5,
b"\x04\x03\x02\x01SPAH" => return FileType::InficonHapsite,
b"\xAEZTR\x0D\x0A\x1A\x0A" => return FileType::Ztr,
Expand Down Expand Up @@ -177,6 +180,7 @@ impl FileType {
FileType::MsRaw => &["raw"],
FileType::MzXml => &["mzxml"],
FileType::NetCdf => &["cdf"],
FileType::Png => &["png"],
FileType::InficonHapsite => &["hps"],
FileType::Sam => &["sam"],
FileType::Scf => &["scf"],
Expand All @@ -200,6 +204,7 @@ impl FileType {
FileType::Fasta => "fasta",
FileType::Fastq => "fastq",
FileType::InficonHapsite => "inficon",
FileType::Png => "png",
FileType::Sam => "sam",
FileType::ThermoCf => "thermo_cf",
FileType::ThermoDxf => "thermo_dxf",
Expand Down
6 changes: 3 additions & 3 deletions entab/src/readers/chemstation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ fn get_metadata(header: &[u8]) -> Result<ChemstationMetadata, EtError> {
// We need to detect the date format before we can convert into a
// NaiveDateTime; not sure the format even maps to the file type
// (it may be computer-dependent?)
let raw_run_date = str::from_utf8(&header[179..179 + run_date_len])?
.trim();
let run_date = if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d-%b-%y, %H:%M:%S") {
let raw_run_date = str::from_utf8(&header[179..179 + run_date_len])?.trim();
let run_date = if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d-%b-%y, %H:%M:%S")
{
// format in MWD
Some(d)
} else if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d %b %y %l:%M %P") {
Expand Down
2 changes: 1 addition & 1 deletion entab/src/readers/flow.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use alloc::borrow::Cow;
use alloc::borrow::{Cow, ToOwned};
use alloc::collections::BTreeMap;
use alloc::string::{String, ToString};
use alloc::vec::Vec;
Expand Down
78 changes: 78 additions & 0 deletions entab/src/readers/inficon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ impl<'r> FromBuffer<'r> for InficonState {
}
let _ = rb.extract::<&[u8]>(148)?;
let n_segments = rb.extract::<u32>(Endian::Little)? as usize;
if n_segments > 10000 {
return Err(EtError::new("Inficon file has too many segments"));
}
// now read all of the collection segments
let mut mz_segments = vec![Vec::new(); n_segments];
for segment in mz_segments.iter_mut() {
Expand All @@ -42,6 +45,10 @@ impl<'r> FromBuffer<'r> for InficonState {
for _ in 0..n_mzs {
let start_mz = rb.extract::<u32>(Endian::Little)?;
let end_mz = rb.extract::<u32>(Endian::Little)?;
if start_mz >= end_mz || end_mz >= 1e11 as u32 {
// only malformed data should hit this
return Err(EtError::new("m/z range is too big or invalid"));
}
// then dwell time (u32; microseconds) and three more u32s
let _ = rb.extract::<&[u8]>(16)?;
let i_type = rb.extract::<u32>(Endian::Little)?;
Expand Down Expand Up @@ -152,3 +159,74 @@ impl_reader!(
InficonState,
()
);

#[cfg(test)]
mod test {
use super::*;

#[test]
fn bad_inficon_fuzzes() -> Result<(), EtError> {
let data = [
4, 3, 2, 1, 83, 80, 65, 72, 66, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 246, 255, 255, 255, 0, 0,
0, 0, 14, 14, 14, 14, 14, 14, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
248, 10, 10, 10, 10, 35, 4, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 62, 10, 10, 26, 0, 0,
0, 42, 42, 4, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 62, 10, 10, 10, 0, 0, 0, 0, 0, 0,
0, 16, 42, 42, 42, 10, 62, 10, 10, 26, 0, 0, 0, 42, 42, 4, 0, 0, 0, 0, 0, 0, 10, 10,
10, 10, 10, 62, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 16, 42, 42, 42,
];
let buffer = ReadBuffer::from_slice(&data);
assert!(InficonReader::new(buffer, ()).is_err());

let data = [
4, 3, 2, 1, 83, 80, 65, 72, 4, 1, 10, 255, 255, 255, 0, 3, 197, 65, 77, 1, 62, 1, 0, 0,
255, 255, 255, 255, 255, 255, 62, 10, 10, 10, 10, 62, 10, 10, 10, 8, 10, 62, 10, 10,
62, 10, 10, 10, 9, 10, 62, 10, 10, 62, 10, 10, 62, 26, 10, 10, 10, 45, 10, 59, 9, 0,
255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 246, 255, 255, 255, 0, 0, 0, 0, 71, 71, 71, 71, 71, 38,
200, 62, 10, 255, 255, 255, 255, 169, 77, 86, 139, 139, 116, 116, 116, 116, 116, 246,
245, 245, 240, 255, 255, 241, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 62, 10, 227, 205, 10, 10, 62, 10, 0, 62, 10, 10, 1, 0, 62, 10, 10, 34, 0, 0, 0,
0, 0, 0, 0, 10, 10, 10, 10, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 245, 10, 10, 10, 10, 240, 10, 62, 10, 10, 10, 42, 10, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 134, 134, 14,
62, 10, 10, 62, 59, 42, 10, 10, 10, 62, 0, 13, 10, 10, 227, 10, 10, 62, 0, 13, 10, 10,
227, 59, 10, 10, 0, 10, 10, 62, 41, 0, 13, 10, 10, 10, 227, 10, 10, 62, 0, 13, 10, 10,
10, 62, 10, 10, 8, 10, 62, 10, 10, 10, 10, 10, 62, 10, 10, 10, 62, 10, 10, 10, 10, 62,
10, 10, 10, 9, 10, 62, 10, 10, 255, 255, 255, 175, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 10, 10, 10, 9, 10, 62, 45, 10, 59, 9, 0,
];
let buffer = ReadBuffer::from_slice(&data);
assert!(InficonReader::new(buffer, ()).is_err());

let data = [
4, 3, 2, 1, 83, 80, 65, 72, 66, 65, 77, 1, 62, 1, 230, 255, 255, 251, 254, 254, 254,
254, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 0, 10, 62, 10, 59, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255,
255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 246, 255, 255, 255, 0, 0, 0, 0, 10, 10, 102, 13, 10, 35, 24, 10, 62, 13,
10, 13, 227, 5, 62, 10, 227, 134, 134, 10, 62, 10, 10, 62, 42, 10, 10, 10, 62, 0, 13,
10, 10, 227, 10, 10, 62, 0, 13, 10, 10, 227, 59, 10, 10, 250, 255, 10, 62, 41, 0, 13,
10, 10, 227, 43, 10, 10, 10, 10, 10, 10, 47, 59, 10, 10, 62, 0, 13, 10, 10, 227, 10,
10, 227, 59, 10, 10, 0, 10, 10, 10, 10, 26, 10, 10, 41, 0, 13, 10, 10, 227, 59, 10, 10,
10, 10, 10, 14, 10, 255, 255, 255, 255, 176, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 175, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 245, 240, 255, 255, 255, 255, 255, 169, 77, 86, 139, 139, 116, 35,
116, 116, 116, 246, 245, 245, 240, 250, 255, 10, 62, 41, 0, 13, 10, 10, 227, 43, 10,
10, 10, 10, 10, 10, 47, 59, 10, 10, 4, 3, 2, 1, 83, 80, 181, 181, 181, 181, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255,
255, 255, 255, 255, 58, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 122, 255, 255, 255,
255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 246, 255, 255, 255, 0, 0, 0, 0, 59, 10, 10, 10, 10, 10, 14, 10, 255, 10,
10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 116, 116, 246, 245, 245, 240,
];
let buffer = ReadBuffer::from_slice(&data);
assert!(InficonReader::new(buffer, ()).is_err());

Ok(())
}
}
5 changes: 5 additions & 0 deletions entab/src/readers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ pub mod flow;
pub mod inficon;
/// Reader for FASTA/FASTQ formats that parse into "kmers"
pub mod kmers;
/// Reader for PNG image format
#[cfg(feature = "std")]
pub mod png;
/// Reader for BAM/SAM bioinformatics formats
pub mod sam;
/// Readers for Thermo mass spectral isotopic formats
Expand All @@ -42,6 +45,8 @@ pub fn get_reader<'r>(
"fastq" => Box::new(fastq::FastqReader::new(rb, ())?),
"fcs" => Box::new(flow::FcsReader::new(rb, ())?),
"inficon" => Box::new(inficon::InficonReader::new(rb, ())?),
#[cfg(feature = "std")]
"png" => Box::new(png::PngReader::new(rb, ())?),
"sam" => Box::new(sam::SamReader::new(rb, ())?),
"thermo_cf" => Box::new(thermo_iso::ThermoCfReader::new(rb, ())?),
"thermo_dxf" => Box::new(thermo_iso::ThermoDxfReader::new(rb, ())?),
Expand Down
Loading

0 comments on commit 0cb22c4

Please sign in to comment.