From 2acca88724f7ba76ed2bd40c39798d5d3a905042 Mon Sep 17 00:00:00 2001 From: KeKsBoTer <14186588+KeKsBoTer@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:35:32 +0200 Subject: [PATCH 1/8] make public --- src/read.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/read.rs b/src/read.rs index 9fe5698..74c96ff 100644 --- a/src/read.rs +++ b/src/read.rs @@ -346,7 +346,7 @@ impl NpyHeader { impl NpyReader { #[inline(always)] - fn reader(&self) -> &R { + pub fn reader(&self) -> &R { &self.reader_and_current_index.0 } From 30cce5ffb3b296c8040240914c8482bd2e5cd3ed Mon Sep 17 00:00:00 2001 From: KeKsBoTer <14186588+KeKsBoTer@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:49:02 +0200 Subject: [PATCH 2/8] test --- src/read.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/read.rs b/src/read.rs index 74c96ff..0d267ad 100644 --- a/src/read.rs +++ b/src/read.rs @@ -182,7 +182,7 @@ pub struct NpyReader { header: NpyHeader, type_reader: ::TypeReader, // stateful parts, put together like this to remind you to always update them in sync - reader_and_current_index: (R, u64), + pub reader_and_current_index: (R, u64), } /// Legacy type for reading `npy` files. @@ -346,7 +346,7 @@ impl NpyHeader { impl NpyReader { #[inline(always)] - pub fn reader(&self) -> &R { + fn reader(&self) -> &R { &self.reader_and_current_index.0 } From 5085c73efe6e1273423dec929da511ff685d495d Mon Sep 17 00:00:00 2001 From: KeKsBoTer <14186588+KeKsBoTer@users.noreply.github.com> Date: Tue, 23 Apr 2024 17:28:07 +0200 Subject: [PATCH 3/8] added fast read --- Cargo.toml | 3 ++- src/read.rs | 42 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a972f3f..674c75f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ num-bigint = "0.4" num-complex = { version = "0.4", optional = true } arrayvec = { version = "0.7.2", optional = true } half = { version = "2.1.0", optional = true } - +bytemuck = {version = "1.15.0", optional = true} [dependencies.npyz-derive] path = "derive" @@ -70,6 +70,7 @@ arrayvec = ["dep:arrayvec"] complex = ["dep:num-complex"] half = ["dep:half"] npz = ["dep:zip"] +bytemuck= ["dep:bytemuck"] [[bench]] name = "bench" diff --git a/src/read.rs b/src/read.rs index 0d267ad..590bbc2 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1,8 +1,10 @@ use std::collections::HashMap; +use std::f32::consts::E; use std::io; use crate::header::{Value, DType, read_header, convert_value_to_shape}; use crate::serialize::{Deserialize, TypeRead, DTypeError}; +use crate::Endianness; /// Object for reading an `npy` file. /// @@ -182,7 +184,7 @@ pub struct NpyReader { header: NpyHeader, type_reader: ::TypeReader, // stateful parts, put together like this to remind you to always update them in sync - pub reader_and_current_index: (R, u64), + reader_and_current_index: (R, u64), } /// Legacy type for reading `npy` files. @@ -410,6 +412,44 @@ impl NpyReader where R: io::Seek { } } +/// # Bytemuck read +#[cfg(feature = "bytemuck")] +impl NpyReader { + + fn read_bytemuck(&mut self) -> io::Result> { + let mut buffer = Vec::with_capacity(self.len() as usize*self.header.item_size); + self.reader_and_current_index.0.read_to_end(&mut buffer)?; + Ok(bytemuck::cast_slice(&buffer).to_vec()) + } + + fn read_byteorder(&mut self) -> io::Result> { + todo!("implement me") + } + /// Read the remaining data as a slice of `T`. + pub fn read_complete(&mut self) -> io::Result> { + match &self.header.dtype{ + DType::Plain(d) =>{ + if cfg!(endian = "big") { + if d.endianness == Endianness::Big{ + self.read_bytemuck() + }else{ + self.read_byteorder() + } + }else if cfg!(endian = "big") { + if d.endianness == Endianness::Little{ + self.read_bytemuck() + }else{ + self.read_byteorder() + } + }else{ + panic!("unsuporrted endianess {:}" ,cfg!(endian)) + } + }, + _ => io::Result::Err(io::Error::new(io::ErrorKind::InvalidData, "only supported for plain data types")) + } + } +} + #[allow(deprecated)] impl<'a, T: Deserialize> NpyData<'a, T> { /// Deserialize a NPY file represented as bytes From c2a21c5922cc4356d7dc4d73be0aee2a58dd6d2d Mon Sep 17 00:00:00 2001 From: KeKsBoTer <14186588+KeKsBoTer@users.noreply.github.com> Date: Tue, 23 Apr 2024 17:31:24 +0200 Subject: [PATCH 4/8] fix --- src/read.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/read.rs b/src/read.rs index 590bbc2..46da2c5 100644 --- a/src/read.rs +++ b/src/read.rs @@ -429,20 +429,21 @@ impl NpyReader { pub fn read_complete(&mut self) -> io::Result> { match &self.header.dtype{ DType::Plain(d) =>{ - if cfg!(endian = "big") { + #[cfg(target_endian = "big")] + { if d.endianness == Endianness::Big{ self.read_bytemuck() }else{ self.read_byteorder() } - }else if cfg!(endian = "big") { + } + #[cfg(target_endian = "little")] + { if d.endianness == Endianness::Little{ self.read_bytemuck() }else{ self.read_byteorder() } - }else{ - panic!("unsuporrted endianess {:}" ,cfg!(endian)) } }, _ => io::Result::Err(io::Error::new(io::ErrorKind::InvalidData, "only supported for plain data types")) From 18fbfce9617019956cd8d505d827bbeb59f7ae5e Mon Sep 17 00:00:00 2001 From: Simon Niedermayr Date: Thu, 16 May 2024 16:35:42 +0200 Subject: [PATCH 5/8] added read_many --- src/read.rs | 46 +++++------------------------------ src/serialize/primitive.rs | 50 ++++++++++++++++++++++++++++++++++++++ src/serialize/traits.rs | 26 ++++++++++++++++++-- 3 files changed, 80 insertions(+), 42 deletions(-) diff --git a/src/read.rs b/src/read.rs index 46da2c5..799a83f 100644 --- a/src/read.rs +++ b/src/read.rs @@ -278,8 +278,12 @@ impl NpyFile { /// /// This is a convenience wrapper around [`Self::data`] and [`Iterator::collect`]. pub fn into_vec(self) -> io::Result> { - match self.data() { - Ok(r) => r.collect(), + match self.data::() { + Ok(r) => { + let n = r.header.n_records as usize; + let reader: T::TypeReader = r.type_reader; + return reader.read_many(r.reader_and_current_index.0,n); + }, Err(e) => Err(invalid_data(e)), } } @@ -412,44 +416,6 @@ impl NpyReader where R: io::Seek { } } -/// # Bytemuck read -#[cfg(feature = "bytemuck")] -impl NpyReader { - - fn read_bytemuck(&mut self) -> io::Result> { - let mut buffer = Vec::with_capacity(self.len() as usize*self.header.item_size); - self.reader_and_current_index.0.read_to_end(&mut buffer)?; - Ok(bytemuck::cast_slice(&buffer).to_vec()) - } - - fn read_byteorder(&mut self) -> io::Result> { - todo!("implement me") - } - /// Read the remaining data as a slice of `T`. - pub fn read_complete(&mut self) -> io::Result> { - match &self.header.dtype{ - DType::Plain(d) =>{ - #[cfg(target_endian = "big")] - { - if d.endianness == Endianness::Big{ - self.read_bytemuck() - }else{ - self.read_byteorder() - } - } - #[cfg(target_endian = "little")] - { - if d.endianness == Endianness::Little{ - self.read_bytemuck() - }else{ - self.read_byteorder() - } - } - }, - _ => io::Result::Err(io::Error::new(io::ErrorKind::InvalidData, "only supported for plain data types")) - } - } -} #[allow(deprecated)] impl<'a, T: Deserialize> NpyData<'a, T> { diff --git a/src/serialize/primitive.rs b/src/serialize/primitive.rs index 09e26db..7b00a17 100644 --- a/src/serialize/primitive.rs +++ b/src/serialize/primitive.rs @@ -18,6 +18,14 @@ pub trait PrimitiveReadWrite: Sized { #[doc(hidden)] fn primitive_read_one(reader: R, swap_bytes: bool) -> io::Result; #[doc(hidden)] + fn primitive_read_many(mut reader: R, swap_bytes: bool,n:usize) -> io::Result> { + let mut vec = Vec::with_capacity(n); + for _ in 0..n { + vec.push(Self::primitive_read_one(&mut reader, swap_bytes)?); + } + Ok(vec) + } + #[doc(hidden)] fn primitive_write_one(&self, writer: W, swap_bytes: bool) -> io::Result<()>; } @@ -38,6 +46,24 @@ macro_rules! derive_int_primitive_read_write { } } + #[inline] + fn primitive_read_many(mut reader: R, swap_bytes: bool,n:usize) -> io::Result> { + if !swap_bytes{ + use std::mem::size_of; + + let mut buf:Vec = vec![0u8; size_of::<$int>()*n]; + reader.read_exact(&mut buf)?; + + Ok(bytemuck::cast_slice(&buf).to_vec()) + }else{ + let mut vec = Vec::with_capacity(n); + for _ in 0..n { + vec.push(Self::primitive_read_one(&mut reader, swap_bytes)?); + } + Ok(vec) + } + } + #[inline] fn primitive_write_one(&self, mut writer: W, swap_bytes: bool) -> io::Result<()> { let swapped = match swap_bytes { @@ -62,6 +88,25 @@ macro_rules! derive_float_primitive_read_write { Ok(<$float>::from_bits(bits)) } + #[inline] + fn primitive_read_many(mut reader: R, swap_bytes: bool,n:usize) -> io::Result> { + if !swap_bytes{ + use std::mem::size_of; + + let mut buf:Vec = vec![0u8; size_of::<$int>()*n]; + reader.read_exact(&mut buf)?; + + Ok(bytemuck::cast_slice(&buf).to_vec()) + }else{ + let mut vec = Vec::with_capacity(n); + for _ in 0..n { + vec.push(Self::primitive_read_one(&mut reader, swap_bytes)?); + } + Ok(vec) + } + } + + #[inline] fn primitive_write_one(&self, writer: W, swap_bytes: bool) -> io::Result<()> { self.to_bits().primitive_write_one(writer, swap_bytes) @@ -131,6 +176,11 @@ impl TypeRead for PrimitiveReader { fn read_one(&self, reader: R) -> io::Result { T::primitive_read_one(reader, self.swap_bytes) } + + fn read_many(&self, mut bytes: R,n:usize) -> io::Result> { + T::primitive_read_many(&mut bytes, self.swap_bytes,n) + } + } impl TypeWrite for PrimitiveWriter { diff --git a/src/serialize/traits.rs b/src/serialize/traits.rs index 04ea1e9..c2ef032 100644 --- a/src/serialize/traits.rs +++ b/src/serialize/traits.rs @@ -2,7 +2,7 @@ use std::io; use std::fmt; use crate::header::DType; -use crate::type_str::{TypeStr}; +use crate::type_str::TypeStr; #[allow(unused)] // used by docstrings use crate::type_matchup_docs; @@ -111,7 +111,17 @@ pub trait TypeRead { /// The function. fn read_one(&self, bytes: R) -> io::Result - where Self: Sized; + where Self: Sized; + + /// read n values from the reader + fn read_many(&self, mut bytes: R,n:usize) -> io::Result> + where Self: Sized{ + let mut vec = Vec::with_capacity(n); + for _ in 0..n { + vec.push(self.read_one(&mut bytes)?); + } + Ok(vec) + } } /// Like some sort of `for Fn(W, &T) -> io::Result<()>`. @@ -141,6 +151,14 @@ pub trait TypeWrite { pub trait TypeReadDyn: TypeRead { #[doc(hidden)] fn read_one_dyn(&self, writer: &mut dyn io::Read) -> io::Result; + #[doc(hidden)] + fn read_many_dyn(&self, mut reader: &mut dyn io::Read,n:usize) -> io::Result>{ + let mut data = Vec::with_capacity(n); + for _ in 0..n { + data.push(self.read_one_dyn(&mut reader)?); + } + Ok(data) + } } impl TypeReadDyn for T { @@ -148,6 +166,10 @@ impl TypeReadDyn for T { fn read_one_dyn(&self, reader: &mut dyn io::Read) -> io::Result { self.read_one(reader) } + + fn read_many_dyn(&self, reader: &mut dyn io::Read,n:usize) -> io::Result> { + self.read_many(reader, n) + } } impl TypeRead for Box> { From 428b30656e9faad66a284bb3106e2f1c0dddf4f8 Mon Sep 17 00:00:00 2001 From: Simon Niedermayr Date: Thu, 16 May 2024 16:56:00 +0200 Subject: [PATCH 6/8] fix half dependencies --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 674c75f..c0cb685 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,7 @@ arrayvec = ["dep:arrayvec"] complex = ["dep:num-complex"] half = ["dep:half"] npz = ["dep:zip"] -bytemuck= ["dep:bytemuck"] +bytemuck= ["dep:bytemuck","half/bytemuck"] [[bench]] name = "bench" From 876add72449d111881987869a9e6ef1ee2dc7f9f Mon Sep 17 00:00:00 2001 From: Simon Niedermayr Date: Thu, 16 May 2024 16:58:52 +0200 Subject: [PATCH 7/8] cleanup --- src/read.rs | 3 +-- src/serialize/mod.rs | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/read.rs b/src/read.rs index 799a83f..7c3845a 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1,10 +1,9 @@ use std::collections::HashMap; -use std::f32::consts::E; use std::io; use crate::header::{Value, DType, read_header, convert_value_to_shape}; use crate::serialize::{Deserialize, TypeRead, DTypeError}; -use crate::Endianness; + /// Object for reading an `npy` file. /// diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs index 1eb3490..3d389f8 100644 --- a/src/serialize/mod.rs +++ b/src/serialize/mod.rs @@ -16,9 +16,11 @@ mod traits; pub use slice::*; mod slice; +#[allow(unused_imports)] pub use primitive::*; mod primitive; +#[allow(unused_imports)] pub use array_member::*; mod array_member; From 09bd710f347e6d43f0859bdc86f8cb960f9f5383 Mon Sep 17 00:00:00 2001 From: Simon Niedermayr Date: Thu, 16 May 2024 17:01:20 +0200 Subject: [PATCH 8/8] only use with feature --- src/serialize/primitive.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/serialize/primitive.rs b/src/serialize/primitive.rs index 7b00a17..8706ac7 100644 --- a/src/serialize/primitive.rs +++ b/src/serialize/primitive.rs @@ -46,6 +46,7 @@ macro_rules! derive_int_primitive_read_write { } } + #[cfg(feature = "bytemuck")] #[inline] fn primitive_read_many(mut reader: R, swap_bytes: bool,n:usize) -> io::Result> { if !swap_bytes{ @@ -88,6 +89,7 @@ macro_rules! derive_float_primitive_read_write { Ok(<$float>::from_bits(bits)) } + #[cfg(feature = "bytemuck")] #[inline] fn primitive_read_many(mut reader: R, swap_bytes: bool,n:usize) -> io::Result> { if !swap_bytes{ @@ -95,8 +97,7 @@ macro_rules! derive_float_primitive_read_write { let mut buf:Vec = vec![0u8; size_of::<$int>()*n]; reader.read_exact(&mut buf)?; - - Ok(bytemuck::cast_slice(&buf).to_vec()) + Ok(bytemuck::cast_slice(&buf).to_vec()) }else{ let mut vec = Vec::with_capacity(n); for _ in 0..n {