From 252b1211bb7635c1f5dd83f92754387b0d9429f1 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 7 Dec 2025 22:27:22 -0500 Subject: [PATCH 1/2] WIP: Expose npy module --- src/lib.rs | 2 +- src/npy/header.rs | 44 +++++++++++++++++++++++++++++++++++--------- src/npy/mod.rs | 4 ++++ 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index dd0c698..89899ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,7 +50,7 @@ //! //! [header dictionary]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.format.html#format-version-1-0 -mod npy; +pub mod npy; #[cfg(feature = "npz")] mod npz; diff --git a/src/npy/header.rs b/src/npy/header.rs index 2a1e5f4..fb25859 100644 --- a/src/npy/header.rs +++ b/src/npy/header.rs @@ -1,3 +1,8 @@ +//! Types and methods for (de)serializing the header of an `.npy` file. +//! +//! In most cases, users do not need this module, since they can use the more convenient, +//! higher-level functionality instead. + use byteorder::{ByteOrder, LittleEndian, ReadBytesExt}; use num_traits::ToPrimitive; use py_literal::{ @@ -17,30 +22,40 @@ const MAGIC_STRING: &[u8] = b"\x93NUMPY"; // If this changes, update the docs of `ViewNpyExt` and `ViewMutNpyExt`. const HEADER_DIVISOR: usize = 64; +/// Error parsing an `.npy` header. #[derive(Debug)] pub enum ParseHeaderError { + /// The first several bytes are not the expected magic string. MagicString, - Version { - major: u8, - minor: u8, - }, - /// Indicates that the `HEADER_LEN` doesn't fit in `usize`. + /// The version number specified in the header is unsupported. + Version { major: u8, minor: u8 }, + /// The `HEADER_LEN` doesn't fit in `usize`. HeaderLengthOverflow(u32), - /// Indicates that the array format string contains non-ASCII characters. + /// The array format string contains non-ASCII characters. + /// /// This is an error for .npy format versions 1.0 and 2.0. NonAscii, - /// Error parsing the array format string as UTF-8. This does not apply to - /// .npy format versions 1.0 and 2.0, which require the array format string - /// to be ASCII. + /// Error parsing the array format string as UTF-8. + /// + /// This does not apply to .npy format versions 1.0 and 2.0, which require the array format + /// string to be ASCII. Utf8Parse(std::str::Utf8Error), + /// The Python dictionary in the header contains an unexpected key. UnknownKey(PyValue), + /// The Python dictionary in the header is missing an expected key. MissingKey(String), + /// The value corresponding to an expected key is illegal (e.g., the wrong type). IllegalValue { + /// The key in the header dictionary. key: String, + /// The corresponding (illegal) value. value: PyValue, }, + /// Error parsing the dictionary in the header. DictParse(PyValueParseError), + /// The metadata in the header is not a dictionary. MetaNotDict(PyValue), + /// There is no newline at the end of the header. MissingNewline, } @@ -248,8 +263,10 @@ struct HeaderLengthInfo { formatted_header_len: Vec, } +/// Error formatting an `.npy` header. #[derive(Debug)] pub enum FormatHeaderError { + /// Error formatting the header's metadata dictionary. PyValue(PyValueFormatError), /// The total header length overflows `usize`, or `HEADER_LEN` exceeds the /// maximum encodable value. @@ -316,6 +333,7 @@ impl From for WriteHeaderError { } } +/// Layout of an array stored in an `.npy` file. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum Layout { /// Standard layout (C order). @@ -332,10 +350,15 @@ impl Layout { } } +/// Header of an `.npy` file. #[derive(Clone, Debug)] pub struct Header { + /// A Python literal which can be passed as an argument to the `numpy.dtype` constructor to + /// create the array's dtype. pub type_descriptor: PyValue, + /// The layout of the array. pub layout: Layout, + /// The shape of the array. pub shape: Vec, } @@ -408,6 +431,7 @@ impl Header { } } + /// Deserializes a header from the provided reader. pub fn from_reader(reader: &mut R) -> Result { // Check for magic string. let mut buf = vec![0; MAGIC_STRING.len()]; @@ -470,6 +494,7 @@ impl Header { ]) } + /// Returns the serialized representation of the header. pub fn to_bytes(&self) -> Result, FormatHeaderError> { // Metadata describing array's format as ASCII string. let mut arr_format = Vec::new(); @@ -499,6 +524,7 @@ impl Header { Ok(out) } + /// Writes the serialized representation of the header to the provided writer. pub fn write(&self, mut writer: W) -> Result<(), WriteHeaderError> { let bytes = self.to_bytes()?; writer.write_all(&bytes)?; diff --git a/src/npy/mod.rs b/src/npy/mod.rs index e8b30f8..8a11ded 100644 --- a/src/npy/mod.rs +++ b/src/npy/mod.rs @@ -1,3 +1,7 @@ +//! Functionality for `.npy` files. +//! +//! Most of this functionality is reexported at the top level of the crate. + mod elements; pub mod header; From 96486979f8b3731d1f8b9081b085747a8dfe2c2c Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 14 Dec 2025 18:32:08 -0500 Subject: [PATCH 2/2] Add more docs --- src/npy/header.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/npy/header.rs b/src/npy/header.rs index fb25859..af65067 100644 --- a/src/npy/header.rs +++ b/src/npy/header.rs @@ -109,9 +109,12 @@ impl From for ParseHeaderError { } } +/// Error reading an `.npy` header. #[derive(Debug)] pub enum ReadHeaderError { + /// I/O error. Io(io::Error), + /// Error parsing the header. Parse(ParseHeaderError), } @@ -297,9 +300,12 @@ impl From for FormatHeaderError { } } +/// Error writing an `.npy` header. #[derive(Debug)] pub enum WriteHeaderError { + /// I/O error. Io(io::Error), + /// Error formatting the header. Format(FormatHeaderError), }