Skip to content

Commit

Permalink
Use proper Date object
Browse files Browse the repository at this point in the history
  • Loading branch information
bovee committed Sep 18, 2020
1 parent 1ef4c2b commit 8d691d2
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 87 deletions.
1 change: 1 addition & 0 deletions entab-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ pub fn main() -> Result<(), EtError> {
.arg(
Arg::with_name("metadata")
.short('m')
.long("metadata")
.about("Reports metadata about the file instead of the data itself"),
)
.get_matches();
Expand Down
87 changes: 51 additions & 36 deletions entab-cli/src/tsv_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ impl TsvParams {
if let TsvEscapeStyle::Quote(quote_char) = self.escape_style {
writer.write_all(&[quote_char])?;
writer.write_all(string)?;
return writer
.write_all(&[quote_char])
.map_err(|e| e.into());
return writer.write_all(&[quote_char]).map_err(|e| e.into());
};
writer.write_all(&string[..first])?;
if let TsvEscapeStyle::Escape(escape_char) = self.escape_style {
Expand Down Expand Up @@ -87,7 +85,7 @@ impl TsvParams {
Value::Null => writer.write_all(&self.null_value)?,
Value::Boolean(true) => writer.write_all(&self.true_value)?,
Value::Boolean(false) => writer.write_all(&self.false_value)?,
Value::Datetime(s) => writer.write_all(s.as_bytes())?,
Value::Datetime(s) => writer.write_all(format!("{:+?}", s).as_bytes())?,
Value::Float(v) => writer.write_all(format!("{}", v).as_bytes())?,
Value::Integer(v) => writer.write_all(format!("{}", v).as_bytes())?,
Value::List(l) => {
Expand All @@ -108,47 +106,64 @@ impl TsvParams {
}
}

#[test]
fn test_replace_chars() {
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;

let mut params = TsvParams::default();
params.escape_style = TsvEscapeStyle::Replace(b'|');
#[test]
fn test_replace_chars() {
let mut params = TsvParams::default();
params.escape_style = TsvEscapeStyle::Replace(b'|');

let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"", &mut buffer);
assert_eq!(buffer.get_ref(), b"");
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"", &mut buffer);
assert_eq!(buffer.get_ref(), b"");

let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|");
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|");

let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"test", &mut buffer);
assert_eq!(buffer.get_ref(), b"test");
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"test", &mut buffer);
assert_eq!(buffer.get_ref(), b"test");

let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\ttest", &mut buffer);
assert_eq!(buffer.get_ref(), b"|test");
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\ttest", &mut buffer);
assert_eq!(buffer.get_ref(), b"|test");

let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\ttest\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|test|");
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\ttest\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|test|");

let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\ttest\tt\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|test|t|");
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\ttest\tt\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|test|t|");

let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\t\t\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|||");
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\t\t\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|||");

params.escape_style = TsvEscapeStyle::Escape(b'|');
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|\t");
params.escape_style = TsvEscapeStyle::Escape(b'|');
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|\t");

let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\ttest\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|\ttest|\t");
let mut buffer = Cursor::new(Vec::new());
let _ = params.write_str(b"\ttest\t", &mut buffer);
assert_eq!(buffer.get_ref(), b"|\ttest|\t");
}

#[test]
fn test_write_value_date() -> Result<(), EtError> {
const DATE: &str = "2001-02-03T04:05:06.000Z";
const OUT_DATE: &[u8] = b"2001-02-03T04:05:06";

let p = TsvParams::default();
let mut buffer = Cursor::new(Vec::new());
let datetime = Value::from_iso_date(DATE)?;
let _ = p.write_value(&datetime, &mut buffer);
assert_eq!(buffer.get_ref(), &OUT_DATE);
Ok(())
}
}
35 changes: 0 additions & 35 deletions entab-cli/src/utils.rs

This file was deleted.

1 change: 1 addition & 0 deletions entab-js/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ impl Reader {
.map_err(|_| JsValue::from_str("Error translating metadata"))
}

#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn next(&mut self) -> Result<JsValue, JsValue> {
if let Some(value) = self.reader.next_record().map_err(to_js)? {
Expand Down
13 changes: 7 additions & 6 deletions entab-py/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use entab_base::readers::{get_reader, RecordReader};
use entab_base::record::Value;
use pyo3::class::{PyIterProtocol, PyObjectProtocol};
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList, PyTuple};
use pyo3::types::{PyDateTime, PyDict, PyList, PyTuple};
use pyo3::{create_exception, exceptions};

use crate::raw_io_wrapper::RawIoWrapper;
Expand All @@ -27,7 +27,10 @@ fn py_from_value(value: Value, py: Python) -> PyResult<PyObject> {
Ok(match value {
Value::Null => py.None().as_ref(py).into(),
Value::Boolean(b) => b.to_object(py),
Value::Datetime(d) => d.to_object(py),
Value::Datetime(d) => {
let timestamp = d.timestamp_millis() as f64 / 1000.;
PyDateTime::from_timestamp(py, timestamp, None)?.to_object(py)
}
Value::Float(v) => v.to_object(py),
Value::Integer(v) => v.to_object(py),
Value::String(s) => s.to_object(py),
Expand All @@ -37,11 +40,9 @@ fn py_from_value(value: Value, py: Python) -> PyResult<PyObject> {
list.append(py_from_value(item, py)?)?;
}
list.to_object(py)
},
}
_ => {
return Err(EntabError::py_err(
"record subelements unimplemented",
));
return Err(EntabError::py_err("record subelements unimplemented"));
}
})
}
Expand Down
5 changes: 3 additions & 2 deletions entab/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ categories = ["parsing", "science"]

[dependencies]
# parsing
chrono = { version = "0.4", default-features=false, features = ["alloc", "serde"] }
memchr = "2.3"
serde = { version = "1.0", features = ["derive"] }
serde = { version = "1.0", default-features=false, features = ["derive"] }
# compression
flate2 = { version = "1.0" }
bzip2 = { version = "0.3", optional = true }
Expand All @@ -26,7 +27,7 @@ criterion = "0.3"
default = ["compression", "std"]
compression = ["bzip2", "xz2", "zstd"]
compression_manylinux = ["bzip2", "zstd"]
std = []
std = ["chrono/std", "serde/std"]

[[bench]]
name = "benchmarks"
Expand Down
28 changes: 24 additions & 4 deletions entab/src/readers/chemstation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use alloc::vec;
use alloc::vec::Vec;
use core::marker::Copy;

use chrono::NaiveDateTime;

use crate::buffer::ReadBuffer;
use crate::parsers::{Endian, FromBuffer, FromSlice};
use crate::record::{RecordHeader, StateMetadata, Value};
Expand Down Expand Up @@ -55,7 +57,7 @@ pub struct ChemstationMetadata {
/// The name of the operator
pub operator: String,
/// The date the sample was run
pub run_date: String,
pub run_date: Option<NaiveDateTime>,
/// The instrument the sample was run on
pub instrument: String,
/// The method the instrument ran
Expand Down Expand Up @@ -120,9 +122,27 @@ fn get_metadata(header: &[u8]) -> Result<ChemstationMetadata, EtError> {
.trim()
.to_string();
let run_date_len = usize::from(header[178]);
let run_date = str::from_utf8(&header[179..179 + run_date_len])?
.trim()
.to_string();
// We need to detect the date format before we can convert into a
// NaiveDateTime; not sure the format even maps to the file type
// (it may be computer-dependent?)
let raw_run_date = str::from_utf8(&header[179..179 + run_date_len])?
.trim();
let run_date = if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d-%b-%y, %H:%M:%S") {
// format in MWD
Some(d)
} else if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d %b %y %l:%M %P") {
// format in MS
Some(d)
} else if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%d %b %y %l:%M %P %z") {
// format in MS with timezone
Some(d)
} else if let Ok(d) = NaiveDateTime::parse_from_str(raw_run_date, "%m/%d/%y %I:%M:%S %p") {
// format in FID
Some(d)
} else {
None
};

let instrument_len = usize::from(header[208]);
let instrument = str::from_utf8(&header[209..209 + instrument_len])?
.trim()
Expand Down
51 changes: 48 additions & 3 deletions entab/src/readers/flow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use alloc::vec::Vec;
use alloc::{format, str};
use core::default::Default;

use chrono::{NaiveDate, NaiveTime};

use crate::buffer::ReadBuffer;
use crate::parsers::{Endian, FromBuffer};
use crate::readers::RecordReader;
Expand All @@ -22,7 +24,7 @@ impl<'r> FromBuffer<'r> for Option<FcsHeaderKeyValue<'r>> {
let mut i = 0;
let mut temp = None;
let (key_end, value_end) = loop {
if rb.get_byte_pos() + i as u64 >= text_end {
if rb.get_byte_pos() + i as u64 > text_end {
return Ok(None);
}
if i + 2 >= rb.len() {
Expand Down Expand Up @@ -117,6 +119,8 @@ impl<'r> FromBuffer<'r> for FcsState {
}
let _ = rb.extract::<&[u8]>(text_start as usize - 58 - start_pos)?;
let delim: u8 = rb.extract(Endian::Little)?;
let mut date = NaiveDate::from_yo(2000, 1);
let mut time = NaiveTime::from_num_seconds_from_midnight(0, 0);
while let Some(FcsHeaderKeyValue(key, value)) = rb.extract((delim, text_end))? {
match (key.as_ref(), value.as_ref()) {
("$BEGINDATA", v) => {
Expand Down Expand Up @@ -153,13 +157,38 @@ impl<'r> FromBuffer<'r> for FcsState {
("$MODE", v) => return Err(EtError::new(format!("Unknown FCS $MODE {}", v))),
("$TOT", v) => n_events_left = v.trim().parse()?,
("$BTIM", v) => {
let _ = metadata.insert("start_time".into(), v.to_string().into());
// TODO: sometimes there's a fractional (/60) part after the last colon
// that we should include in the time too
let hms = v
.trim()
.split(':')
.take(3)
.map(|i| i.to_owned())
.collect::<Vec<String>>()
.join(":");
if let Ok(t) = NaiveTime::parse_from_str(&hms, "%H:%M:%S") {
time = t;
}
}
("$CELLS", v) => {
let _ = metadata.insert("specimen".into(), v.to_string().into());
}
("$DATE", v) => {
let _ = metadata.insert("date".into(), v.to_string().into());
// "DD-MM-YYYY"
// "YYYY-mmm-DD"
if let Ok(d) = NaiveDate::parse_from_str(v.trim(), "%d-%b-%y") {
// FCS2.0 only had a two-digit year, e.g. 01-JAN-20).
date = d;
} else if let Ok(d) = NaiveDate::parse_from_str(v.trim(), "%d-%b-%Y") {
// FCS3.0 and 3.1 are supposed to be e.g. 01-JAN-2020.
date = d;
} else if let Ok(d) = NaiveDate::parse_from_str(v.trim(), "%Y-%b-%d") {
// non-standard FCS3.0?
date = d;
} else if let Ok(d) = NaiveDate::parse_from_str(v.trim(), "%d-%m-%Y") {
// one weird Partec FCS2.0 file had this
date = d;
}
}
("$INST", v) => {
let _ = metadata.insert("instrument".into(), v.to_string().into());
Expand Down Expand Up @@ -211,6 +240,7 @@ impl<'r> FromBuffer<'r> for FcsState {
_ => {}
}
}
let _ = metadata.insert("date".into(), date.and_time(time).into());
// get anything between the end of the text segment and the start of the data segment
let _ = rb.extract::<&[u8]>((data_start - rb.get_byte_pos()) as usize)?;

Expand Down Expand Up @@ -394,6 +424,21 @@ mod tests {
Ok(())
}

#[test]
fn test_fcs_reader_metadata() -> Result<(), EtError> {
let rb = ReadBuffer::from_slice(include_bytes!(
"../../tests/data/HTS_BD_LSR_II_Mixed_Specimen_001_D6_D06.fcs"
));
let reader = FcsReader::new(rb, ())?;
let metadata = reader.metadata();
assert_eq!(metadata["specimen_source"], "Specimen_001".into());
assert_eq!(
metadata["date"],
NaiveDate::from_ymd(2012, 10, 26).and_hms(18, 08, 10).into()
);
Ok(())
}

#[test]
fn test_fcs_bad_fuzzes() -> Result<(), EtError> {
let rb = ReadBuffer::from_slice(b"FCS3.1 \n\n\n0\n\n\n\n\n\n0\n\n\n\n\n\n\n \n\n\n0\n\n\n\n \n\n\n0\n\nCS3.1 \n\n\n0\n\n\n\n\n;");
Expand Down
Loading

0 comments on commit 8d691d2

Please sign in to comment.