diff --git a/CHANGELOG.md b/CHANGELOG.md index 1457d95d..32f0bc81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Breaking**: change `RawBytesOffsets` into a validated newtype - **Breaking**: `ArrayBytes::new_vlen()` not returns a `Result` and validates bytes/offsets compatibility - Reenable broken compatibility tests since fixed in `zarr-python`/`numcodecs` +- **Breaking**: move the `zarrs::array::{data_type,fill_value}` modules into the `zarrs_data_type` crate ## [0.19.1] - 2025-01-19 diff --git a/Cargo.toml b/Cargo.toml index 41825a07..00653ae8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ resolver = "2" members = [ "zarrs", + "zarrs_data_type", "zarrs_metadata", "zarrs_storage", "zarrs_filesystem", @@ -26,6 +27,10 @@ module_name_repetitions = "allow" missing_panics_doc = "warn" missing_errors_doc = "warn" +[workspace.dependencies.zarrs_data_type] +version = "0.1.0" +path = "zarrs_data_type" + [workspace.dependencies.zarrs_metadata] version = "0.3.0" path = "zarrs_metadata" @@ -62,3 +67,10 @@ version = "0.51.0" [workspace.dependencies.zip] version = "2.1.3" + +[workspace.dependencies.half] +version = "2.0.0" +features = ["bytemuck"] + +[workspace.dependencies.num] +version = "0.4.1" diff --git a/README.md b/README.md index 694750fb..31b03404 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ println!("{array_ndarray:4}"); ### Core - [`zarrs`]: The core library for manipulating Zarr hierarchies. +- [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`). - [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`). - [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`). @@ -134,6 +135,7 @@ Unless you explicitly state otherwise, any contribution intentionally submitted [The `zarrs` Book]: https://book.zarrs.dev [`zarrs`]: https://github.com/LDeakin/zarrs/tree/main/zarrs +[`zarrs_data_type`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_data_type [`zarrs_metadata`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_metadata [`zarrs_storage`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_storage [`zarrs_filesystem`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_filesystem diff --git a/zarrs/Cargo.toml b/zarrs/Cargo.toml index e4cee82d..5afcb988 100644 --- a/zarrs/Cargo.toml +++ b/zarrs/Cargo.toml @@ -52,13 +52,13 @@ derive_more = { version = "1.0.0", features = ["deref", "display", "from"] } flate2 = { version = "1.0.30", optional = true } futures = { version = "0.3.29", optional = true } gdeflate-sys = { version = "0.4.1", optional = true } -half = { version = "2.0.0", features = ["bytemuck"] } +half = { workspace = true } inventory = "0.3.0" itertools = "0.14.0" lru = "0.12.4" moka = { version = "0.12.8", features = ["sync"] } ndarray = { version = ">=0.15.0,<17", optional = true } -num = { version = "0.4.1" } +num = { workspace = true } pco = { version = "0.4.0", optional = true } rayon = "1.10.0" rayon_iter_concurrent_limit = "0.2.0" @@ -68,6 +68,7 @@ thiserror = "2.0.0" thread_local = "1.1.8" unsafe_cell_slice = "0.2.0" zarrs_filesystem = { workspace = true, optional = true } +zarrs_data_type = { workspace = true } zarrs_metadata = { workspace = true } zarrs_storage = { workspace = true } zfp-sys = {version = "0.3.0", features = ["static"], optional = true } diff --git a/zarrs/doc/ecosystem.md b/zarrs/doc/ecosystem.md index b132e0f1..db528a37 100644 --- a/zarrs/doc/ecosystem.md +++ b/zarrs/doc/ecosystem.md @@ -1,5 +1,6 @@ #### Core - [`zarrs`]: The core library for manipulating Zarr hierarchies. +- [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`). - [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`). - [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`). @@ -26,6 +27,7 @@ - Benchmarking tools and performance benchmarks of `zarrs`. [`zarrs`]: https://github.com/LDeakin/zarrs/tree/main/zarrs +[`zarrs_data_type`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_data_type [`zarrs_metadata`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_metadata [`zarrs_storage`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_storage [`zarrs_filesystem`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_filesystem diff --git a/zarrs/doc/status/data_types.md b/zarrs/doc/status/data_types.md index af053bf6..2b4c1202 100644 --- a/zarrs/doc/status/data_types.md +++ b/zarrs/doc/status/data_types.md @@ -8,24 +8,24 @@ † Experimental data types are recommended for evaluation only. -[bool]: crate::array::data_type::DataType::Bool -[int8]: crate::array::data_type::DataType::Int8 -[int16]: crate::array::data_type::DataType::Int16 -[int32]: crate::array::data_type::DataType::Int32 -[int64]: crate::array::data_type::DataType::Int64 -[uint8]: crate::array::data_type::DataType::UInt8 -[uint16]: crate::array::data_type::DataType::UInt16 -[uint32]: crate::array::data_type::DataType::UInt32 -[uint64]: crate::array::data_type::DataType::UInt64 -[float16]: crate::array::data_type::DataType::Float16 -[float32]: crate::array::data_type::DataType::Float32 -[float64]: crate::array::data_type::DataType::Float64 -[complex64]: crate::array::data_type::DataType::Complex64 -[complex128]: crate::array::data_type::DataType::Complex128 -[bfloat16]: crate::array::data_type::DataType::BFloat16 -[r* (raw bits)]: crate::array::data_type::DataType::RawBits -[string]: crate::array::data_type::DataType::String -[bytes]: crate::array::data_type::DataType::Bytes +[bool]: crate::data_type::DataType::Bool +[int8]: crate::data_type::DataType::Int8 +[int16]: crate::data_type::DataType::Int16 +[int32]: crate::data_type::DataType::Int32 +[int64]: crate::data_type::DataType::Int64 +[uint8]: crate::data_type::DataType::UInt8 +[uint16]: crate::data_type::DataType::UInt16 +[uint32]: crate::data_type::DataType::UInt32 +[uint64]: crate::data_type::DataType::UInt64 +[float16]: crate::data_type::DataType::Float16 +[float32]: crate::data_type::DataType::Float32 +[float64]: crate::data_type::DataType::Float64 +[complex64]: crate::data_type::DataType::Complex64 +[complex128]: crate::data_type::DataType::Complex128 +[bfloat16]: crate::data_type::DataType::BFloat16 +[r* (raw bits)]: crate::data_type::DataType::RawBits +[string]: crate::data_type::DataType::String +[bytes]: crate::data_type::DataType::Bytes [ZEP0001]: https://zarr.dev/zeps/accepted/ZEP0001.html [zarr-specs #130]: https://github.com/zarr-developers/zarr-specs/issues/130 diff --git a/zarrs/src/array.rs b/zarrs/src/array.rs index ff7eb378..f0ae3f96 100644 --- a/zarrs/src/array.rs +++ b/zarrs/src/array.rs @@ -33,10 +33,9 @@ pub mod chunk_grid; pub mod chunk_key_encoding; pub mod codec; pub mod concurrency; -pub mod data_type; mod element; -mod fill_value; pub mod storage_transformer; +pub use crate::data_type; // re-export for zarrs < 0.20 compat #[cfg(feature = "sharding")] mod array_sharded_ext; @@ -65,11 +64,11 @@ pub use self::{ codec::ArrayCodecTraits, codec::CodecChain, concurrency::RecommendedConcurrency, - data_type::DataType, element::{Element, ElementFixedLength, ElementOwned}, - fill_value::FillValue, storage_transformer::StorageTransformerChain, }; +pub use crate::data_type::{DataType, FillValue}; // re-export for zarrs < 0.20 compat + pub use crate::metadata::v2::ArrayMetadataV2; use crate::metadata::v2_to_v3::ArrayMetadataV2ToV3ConversionError; pub use crate::metadata::v3::{ diff --git a/zarrs/src/array/array_builder.rs b/zarrs/src/array/array_builder.rs index 48a58b82..7ea6d8ba 100644 --- a/zarrs/src/array/array_builder.rs +++ b/zarrs/src/array/array_builder.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use crate::{ + data_type::IncompatibleFillValueError, metadata::{v3::AdditionalFields, ChunkKeySeparator}, node::NodePath, }; @@ -11,7 +12,6 @@ use super::{ array_to_bytes::vlen::VlenCodec, ArrayToArrayCodecTraits, ArrayToBytesCodecTraits, BytesCodec, BytesToBytesCodecTraits, }, - data_type::IncompatibleFillValueError, Array, ArrayCreateError, ArrayMetadata, ArrayMetadataV3, ArrayShape, ChunkGrid, CodecChain, DataType, DimensionName, FillValue, StorageTransformerChain, }; diff --git a/zarrs/src/array/array_errors.rs b/zarrs/src/array/array_errors.rs index 527d0a58..272aff53 100644 --- a/zarrs/src/array/array_errors.rs +++ b/zarrs/src/array/array_errors.rs @@ -2,19 +2,16 @@ use thiserror::Error; use crate::{ array_subset::{ArraySubset, IncompatibleDimensionalityError}, + data_type::{ + IncompatibleFillValueError, IncompatibleFillValueMetadataError, UnsupportedDataTypeError, + }, metadata::v3::UnsupportedAdditionalFieldError, node::NodePathError, plugin::PluginCreateError, storage::StorageError, }; -use super::{ - codec::CodecError, - data_type::{ - IncompatibleFillValueError, IncompatibleFillValueMetadataError, UnsupportedDataTypeError, - }, - ArrayIndices, ArrayShape, -}; +use super::{codec::CodecError, ArrayIndices, ArrayShape}; /// An array creation error. #[derive(Debug, Error)] diff --git a/zarrs/src/array/array_representation.rs b/zarrs/src/array/array_representation.rs index 2c781fc5..5f186d50 100644 --- a/zarrs/src/array/array_representation.rs +++ b/zarrs/src/array/array_representation.rs @@ -1,6 +1,7 @@ use std::num::NonZeroU64; -use super::{data_type::IncompatibleFillValueError, ArrayShape, DataType, DataTypeSize, FillValue}; +use super::{ArrayShape, DataType, DataTypeSize, FillValue}; +use crate::data_type::IncompatibleFillValueError; use derive_more::Display; /// The shape, data type, and fill value of an `array`. diff --git a/zarrs/src/array/codec/array_to_bytes/bytes.rs b/zarrs/src/array/codec/array_to_bytes/bytes.rs index a2970240..4962dc1e 100644 --- a/zarrs/src/array/codec/array_to_bytes/bytes.rs +++ b/zarrs/src/array/codec/array_to_bytes/bytes.rs @@ -73,6 +73,10 @@ pub(crate) fn reverse_endianness(v: &mut [u8], data_type: &DataType) { } // Variable-sized data types are not supported and are rejected outside of this function DataType::String | DataType::Bytes => unreachable!(), + _ => { + // FIXME: Data type extensions, endianness reversal for custom data types + unimplemented!("Reverse endianness for data type {:?}", data_type) + } } } diff --git a/zarrs/src/lib.rs b/zarrs/src/lib.rs index 29586900..e0298e52 100644 --- a/zarrs/src/lib.rs +++ b/zarrs/src/lib.rs @@ -189,6 +189,7 @@ pub mod node; pub mod plugin; pub mod version; +pub use zarrs_data_type as data_type; pub use zarrs_metadata as metadata; pub use zarrs_storage as storage; diff --git a/zarrs/src/plugin.rs b/zarrs/src/plugin.rs index 47486045..ddcc00fe 100644 --- a/zarrs/src/plugin.rs +++ b/zarrs/src/plugin.rs @@ -3,7 +3,8 @@ //! A [`Plugin`] creates objects from [`MetadataV3`] (consisting of a name and optional configuration). //! It is used to implement [Zarr extension points](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#extension-points), such as [chunk grids][`crate::array::chunk_grid`], [chunk key encodings](`crate::array::chunk_key_encoding`), [codecs](`crate::array::codec`), and [storage transformers](`crate::array::storage_transformer`). //! -//! [Data types](`crate::array::data_type`) are not currently supported as an extension point. +//! [`DataType`](crate::data_type::DataType)s are not currently supported as an extension point. +// FIXME: Data type extensions //! //! Plugins are registered at compile time using the [inventory] crate. //! At runtime, a name matching function is applied to identify which registered plugin is associated with the metadata. diff --git a/zarrs_data_type/CHANGELOG.md b/zarrs_data_type/CHANGELOG.md new file mode 100644 index 00000000..91bd6bbf --- /dev/null +++ b/zarrs_data_type/CHANGELOG.md @@ -0,0 +1,17 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.0] - 2025-01-24 + +### Added +- Initial release +- Split from the `zarrs::array::{data_type,fill_value}` modules of `zarrs` 0.20.0-dev + +[unreleased]: https://github.com/LDeakin/zarrs/compare/zarrs_data_type-v0.1.0...HEAD +[0.1.0]: https://github.com/LDeakin/zarrs/releases/tag/zarrs_data_type-v0.1.0 diff --git a/zarrs_data_type/Cargo.toml b/zarrs_data_type/Cargo.toml new file mode 100644 index 00000000..921a13bf --- /dev/null +++ b/zarrs_data_type/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "zarrs_data_type" +version = "0.1.0" +authors = ["Lachlan Deakin "] +edition = "2021" +rust-version = "1.77" +description = "Zarr data types for the zarrs crate" +homepage = "https://zarrs.dev" +documentation = "https://docs.rs/zarrs_data_type" +repository = "https://github.com/LDeakin/zarrs" +license = "MIT OR Apache-2.0" +keywords = ["zarr", "zarrs"] +categories = ["encoding"] + +[lints] +workspace = true + +[package.metadata.docs.rs] +all-features = true + +[dependencies] +half = { workspace = true } +num = { workspace = true } +thiserror = "2.0.0" +derive_more = { version = "1.0.0", features = ["display", "from"] } +zarrs_metadata = { workspace = true } + +[dev-dependencies] +serde_json = { version = "1.0.71", features = ["float_roundtrip", "preserve_order"] } +bytemuck = { version = "1.14.0", features = ["extern_crate_alloc", "must_cast", "min_const_generics"] } diff --git a/zarrs_data_type/LICENCE-APACHE b/zarrs_data_type/LICENCE-APACHE new file mode 120000 index 00000000..536a3dbc --- /dev/null +++ b/zarrs_data_type/LICENCE-APACHE @@ -0,0 +1 @@ +../LICENCE-APACHE \ No newline at end of file diff --git a/zarrs_data_type/LICENCE-MIT b/zarrs_data_type/LICENCE-MIT new file mode 120000 index 00000000..e259b4c0 --- /dev/null +++ b/zarrs_data_type/LICENCE-MIT @@ -0,0 +1 @@ +../LICENCE-MIT \ No newline at end of file diff --git a/zarrs_data_type/README.md b/zarrs_data_type/README.md new file mode 100644 index 00000000..abe03ad1 --- /dev/null +++ b/zarrs_data_type/README.md @@ -0,0 +1,15 @@ +# zarrs_data_type + +[![Latest Version](https://img.shields.io/crates/v/zarrs_data_type.svg)](https://crates.io/crates/zarrs_data_type) +[![zarrs_data_type documentation](https://docs.rs/zarrs_data_type/badge.svg)](https://docs.rs/zarrs_data_type) +![msrv](https://img.shields.io/crates/msrv/zarrs_data_type) +[![build](https://github.com/LDeakin/zarrs/actions/workflows/ci.yml/badge.svg)](https://github.com/LDeakin/zarrs/actions/workflows/ci.yml) + +[Zarr](https://zarr-specs.readthedocs.io/) data types for the [`zarrs`](https://crates.io/crates/zarrs) Rust crate. + +## Licence +`zarrs_data_type` is licensed under either of + - the Apache License, Version 2.0 [LICENSE-APACHE](./LICENCE-APACHE) or or + - the MIT license [LICENSE-MIT](./LICENCE-MIT) or , at your option. + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/zarrs/src/array/data_type.rs b/zarrs_data_type/src/data_type.rs similarity index 99% rename from zarrs/src/array/data_type.rs rename to zarrs_data_type/src/data_type.rs index 0ca4300e..1d0d64da 100644 --- a/zarrs/src/array/data_type.rs +++ b/zarrs_data_type/src/data_type.rs @@ -6,7 +6,7 @@ use derive_more::From; use half::{bf16, f16}; use thiserror::Error; -use crate::metadata::v3::array::{ +use zarrs_metadata::v3::array::{ data_type::{DataTypeMetadataV3, DataTypeSize}, fill_value::{ bfloat16_to_fill_value, float16_to_fill_value, float32_to_fill_value, @@ -356,7 +356,7 @@ impl core::fmt::Display for DataType { mod tests { use super::*; - use crate::metadata::v3::array::{ + use zarrs_metadata::v3::array::{ fill_value::{FillValueFloatStringNonFinite, HexString}, nan_representations::{ZARR_NAN_BF16, ZARR_NAN_F16, ZARR_NAN_F32, ZARR_NAN_F64}, }; diff --git a/zarrs/src/array/fill_value.rs b/zarrs_data_type/src/fill_value.rs similarity index 95% rename from zarrs/src/array/fill_value.rs rename to zarrs_data_type/src/fill_value.rs index 98560702..fa4609a9 100644 --- a/zarrs/src/array/fill_value.rs +++ b/zarrs_data_type/src/fill_value.rs @@ -233,10 +233,21 @@ impl FillValue { #[cfg(test)] mod tests { - use crate::array::transmute_to_bytes_vec; - use super::*; + /// Convert from `&[T]` to `Vec`. + #[must_use] + fn convert_to_bytes_vec(from: &[T]) -> Vec { + bytemuck::allocation::pod_collect_to_vec(from) + } + + /// Transmute from `Vec` to `Vec`. + #[must_use] + fn transmute_to_bytes_vec(from: Vec) -> Vec { + bytemuck::allocation::try_cast_vec(from) + .unwrap_or_else(|(_err, from)| convert_to_bytes_vec(&from)) + } + #[test] fn fill_value() { assert_eq!( diff --git a/zarrs_data_type/src/lib.rs b/zarrs_data_type/src/lib.rs new file mode 100644 index 00000000..b9a4eeaf --- /dev/null +++ b/zarrs_data_type/src/lib.rs @@ -0,0 +1,10 @@ +//! [Zarr](https://zarr-specs.readthedocs.io/) data types for the [`zarrs`](https://docs.rs/zarrs/latest/zarrs/index.html) crate. + +mod data_type; +mod fill_value; + +pub use data_type::{ + DataType, IncompatibleFillValueError, IncompatibleFillValueMetadataError, + UnsupportedDataTypeError, +}; +pub use fill_value::FillValue; diff --git a/zarrs_metadata/Cargo.toml b/zarrs_metadata/Cargo.toml index 940579cf..be95bac4 100644 --- a/zarrs_metadata/Cargo.toml +++ b/zarrs_metadata/Cargo.toml @@ -17,9 +17,9 @@ workspace = true [dependencies] derive_more = { version = "1.0.0", features = ["display", "from"] } -half = { version = "2.0.0", features = ["bytemuck"] } +half = { workspace = true } monostate = "0.1.0" -num = { version = "0.4.1" } +num = { workspace = true } serde = { version = "1.0.185", features = ["derive"] } serde_json = { version = "1.0.71", features = ["float_roundtrip", "preserve_order"] } serde_repr = "0.1.19"