Skip to content

Commit 177f701

Browse files
authored
refactor!: change RawBytesOffsets into a validated newtype (#137)
Closes #132
1 parent 091019b commit 177f701

File tree

9 files changed

+167
-26
lines changed

9 files changed

+167
-26
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2323
- `ArrayToBytesCodecTraits::decode_into`
2424
- `zarrs::array::copy_fill_value_into`
2525
- `zarrs::array::update_array_bytes`
26+
- **Breaking**: change `RawBytesOffsets` into a validated newtype
2627

2728
## [0.19.1] - 2025-01-19
2829

zarrs/src/array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pub use self::{
4949
array_builder::ArrayBuilder,
5050
array_bytes::{
5151
copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesError, RawBytes,
52-
RawBytesOffsets,
52+
RawBytesOffsets, RawBytesOffsetsCreateError,
5353
},
5454
array_bytes_fixed_disjoint_view::{
5555
ArrayBytesFixedDisjointView, ArrayBytesFixedDisjointViewCreateError,

zarrs/src/array/array_bytes.rs

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ use super::{
1515
ravel_indices, ArrayBytesFixedDisjointView, ArraySize, DataType, FillValue,
1616
};
1717

18+
mod raw_bytes_offsets;
19+
pub use raw_bytes_offsets::{RawBytesOffsets, RawBytesOffsetsCreateError};
20+
1821
/// Array element bytes.
1922
///
2023
/// These can represent:
@@ -23,11 +26,6 @@ use super::{
2326
/// - Encoded array bytes after an array to bytes or bytes to bytes codecs.
2427
pub type RawBytes<'a> = Cow<'a, [u8]>;
2528

26-
/// Array element byte offsets.
27-
///
28-
/// These must be monotonically increasing. See [`ArrayBytes::Variable`].
29-
pub type RawBytesOffsets<'a> = Cow<'a, [usize]>; // FIXME: Switch to a validated newtype in zarrs 0.20
30-
3129
/// Fixed or variable length array bytes.
3230
#[derive(Clone, Debug, PartialEq, Eq)]
3331
pub enum ArrayBytes<'a> {
@@ -60,15 +58,10 @@ impl<'a> ArrayBytes<'a> {
6058
}
6159

6260
/// Create a new variable length array bytes from `bytes` and `offsets`.
63-
pub fn new_vlen(
64-
bytes: impl Into<RawBytes<'a>>,
65-
offsets: impl Into<RawBytesOffsets<'a>>, // FIXME: TryInto
66-
) -> Self {
67-
Self::Variable(bytes.into(), offsets.into())
61+
pub fn new_vlen(bytes: impl Into<RawBytes<'a>>, offsets: RawBytesOffsets<'a>) -> Self {
62+
Self::Variable(bytes.into(), offsets)
6863
}
6964

70-
// TODO: new_vlen_unchecked
71-
7265
/// Create a new [`ArrayBytes`] with `num_elements` composed entirely of the `fill_value`.
7366
///
7467
/// # Panics
@@ -85,12 +78,14 @@ impl<'a> ArrayBytes<'a> {
8578
}
8679
ArraySize::Variable { num_elements } => {
8780
let num_elements = usize::try_from(num_elements).unwrap();
88-
Self::new_vlen(
89-
fill_value.as_ne_bytes().repeat(num_elements),
90-
(0..=num_elements)
91-
.map(|i| i * fill_value.size())
92-
.collect::<Vec<_>>(),
93-
)
81+
Self::new_vlen(fill_value.as_ne_bytes().repeat(num_elements), unsafe {
82+
// SAFETY: The offsets are monotonically increasing.
83+
RawBytesOffsets::new_unchecked(
84+
(0..=num_elements)
85+
.map(|i| i * fill_value.size())
86+
.collect::<Vec<_>>(),
87+
)
88+
})
9489
}
9590
}
9691
}
@@ -207,6 +202,10 @@ impl<'a> ArrayBytes<'a> {
207202
ss_bytes.extend_from_slice(&bytes[curr..next]);
208203
}
209204
ss_offsets.push(ss_bytes.len());
205+
let ss_offsets = unsafe {
206+
// SAFETY: The offsets are monotonically increasing.
207+
RawBytesOffsets::new_unchecked(ss_offsets)
208+
};
210209
Ok(ArrayBytes::new_vlen(ss_bytes, ss_offsets))
211210
}
212211
ArrayBytes::Fixed(bytes) => {
@@ -334,6 +333,10 @@ pub(crate) fn update_bytes_vlen<'a>(
334333
}
335334
}
336335
offsets_new.push(bytes_new.len());
336+
let offsets_new = unsafe {
337+
// SAFETY: The offsets are monotonically increasing.
338+
RawBytesOffsets::new_unchecked(offsets_new)
339+
};
337340

338341
Ok(ArrayBytes::new_vlen(bytes_new, offsets_new))
339342
}
@@ -438,6 +441,10 @@ pub(crate) fn merge_chunks_vlen<'a>(
438441
*acc += sz;
439442
Some(*acc)
440443
}));
444+
let offsets = unsafe {
445+
// SAFETY: The offsets are monotonically increasing.
446+
RawBytesOffsets::new_unchecked(offsets)
447+
};
441448

442449
// Write bytes
443450
// TODO: Go parallel
@@ -485,6 +492,10 @@ pub(crate) fn extract_decoded_regions_vlen<'a>(
485492
region_bytes.extend_from_slice(&bytes[curr..next]);
486493
}
487494
region_offsets.push(region_bytes.len());
495+
let region_offsets = unsafe {
496+
// SAFETY: The offsets are monotonically increasing.
497+
RawBytesOffsets::new_unchecked(region_offsets)
498+
};
488499
out.push(ArrayBytes::new_vlen(region_bytes, region_offsets));
489500
}
490501
Ok(out)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
use std::{borrow::Cow, ops::Deref};
2+
3+
use derive_more::derive::Display;
4+
use thiserror::Error;
5+
6+
/// Array element byte offsets.
7+
///
8+
/// These must be monotonically increasing. See [`ArrayBytes::Variable`](crate::array::ArrayBytes::Variable).
9+
#[derive(Clone, Debug, PartialEq, Eq)]
10+
pub struct RawBytesOffsets<'a>(Cow<'a, [usize]>);
11+
12+
impl Deref for RawBytesOffsets<'_> {
13+
type Target = [usize];
14+
15+
fn deref(&self) -> &Self::Target {
16+
&self.0
17+
}
18+
}
19+
20+
/// An error creating [`RawBytesOffsets`].
21+
///
22+
/// This error occurs when the offsets are not monotonically increasing.
23+
#[derive(Debug, Error, Display)]
24+
pub struct RawBytesOffsetsCreateError;
25+
26+
impl<'a> RawBytesOffsets<'a> {
27+
/// Creates a new `RawBytesOffsets`.
28+
///
29+
/// # Errors
30+
/// Returns an error if the offsets are not monotonically increasing.
31+
pub fn new(offsets: impl Into<Cow<'a, [usize]>>) -> Result<Self, RawBytesOffsetsCreateError> {
32+
let offsets = offsets.into();
33+
if offsets.windows(2).all(|w| w[1] >= w[0]) {
34+
Ok(Self(offsets))
35+
} else {
36+
Err(RawBytesOffsetsCreateError)
37+
}
38+
}
39+
40+
/// Creates a new `RawBytesOffsets` without checking the offsets.
41+
///
42+
/// # Safety
43+
/// The offsets must be monotonically increasing.
44+
#[must_use]
45+
pub unsafe fn new_unchecked(offsets: impl Into<Cow<'a, [usize]>>) -> Self {
46+
let offsets = offsets.into();
47+
debug_assert!(offsets.windows(2).all(|w| w[1] >= w[0]));
48+
Self(offsets)
49+
}
50+
51+
/// Clones the offsets if not already owned.
52+
#[must_use]
53+
pub fn into_owned(self) -> RawBytesOffsets<'static> {
54+
RawBytesOffsets(self.0.into_owned().into())
55+
}
56+
}
57+
58+
impl<'a> TryFrom<Cow<'a, [usize]>> for RawBytesOffsets<'a> {
59+
type Error = RawBytesOffsetsCreateError;
60+
61+
fn try_from(value: Cow<'a, [usize]>) -> Result<Self, Self::Error> {
62+
Self::new(value)
63+
}
64+
}
65+
66+
impl<'a> TryFrom<&'a [usize]> for RawBytesOffsets<'a> {
67+
type Error = RawBytesOffsetsCreateError;
68+
69+
fn try_from(value: &'a [usize]) -> Result<Self, Self::Error> {
70+
Self::new(value)
71+
}
72+
}
73+
74+
impl<'a, const N: usize> TryFrom<&'a [usize; N]> for RawBytesOffsets<'a> {
75+
type Error = RawBytesOffsetsCreateError;
76+
77+
fn try_from(value: &'a [usize; N]) -> Result<Self, Self::Error> {
78+
Self::new(value)
79+
}
80+
}
81+
82+
impl TryFrom<Vec<usize>> for RawBytesOffsets<'_> {
83+
type Error = RawBytesOffsetsCreateError;
84+
85+
fn try_from(value: Vec<usize>) -> Result<Self, Self::Error> {
86+
Self::new(value)
87+
}
88+
}
89+
90+
#[cfg(test)]
91+
mod tests {
92+
use super::*;
93+
94+
#[test]
95+
fn raw_bytes_offsets() {
96+
let offsets = RawBytesOffsets::new(vec![0, 1, 2, 3]).unwrap();
97+
assert_eq!(&*offsets, &[0, 1, 2, 3]);
98+
assert!(RawBytesOffsets::new(vec![0, 1, 1]).is_ok());
99+
assert!(RawBytesOffsets::new(vec![0, 1, 0]).is_err());
100+
assert!(RawBytesOffsets::try_from(vec![0, 1, 2]).is_ok());
101+
assert!(RawBytesOffsets::try_from(vec![0, 1, 0]).is_err());
102+
assert!(RawBytesOffsets::try_from([0, 1, 2].as_slice()).is_ok());
103+
assert!(RawBytesOffsets::try_from([0, 1, 0].as_slice()).is_err());
104+
assert!(RawBytesOffsets::try_from(&[0, 1, 2]).is_ok());
105+
assert!(RawBytesOffsets::try_from(&[0, 1, 0]).is_err());
106+
assert!(RawBytesOffsets::try_from(Cow::Owned(vec![0, 1, 0])).is_err());
107+
}
108+
}

zarrs/src/array/codec.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,10 @@ use std::borrow::Cow;
9898
use std::sync::Arc;
9999

100100
use super::{
101-
concurrency::RecommendedConcurrency, BytesRepresentation, ChunkRepresentation, ChunkShape,
102-
DataType,
101+
array_bytes::RawBytesOffsetsCreateError, concurrency::RecommendedConcurrency, ArrayBytes,
102+
ArrayBytesFixedDisjointView, BytesRepresentation, ChunkRepresentation, ChunkShape, DataType,
103+
RawBytes,
103104
};
104-
use super::{ArrayBytes, ArrayBytesFixedDisjointView, RawBytes};
105105

106106
/// A codec plugin.
107107
pub type CodecPlugin = Plugin<Codec>;
@@ -1060,6 +1060,9 @@ pub enum CodecError {
10601060
/// Subset out of bounds.
10611061
#[error(transparent)]
10621062
SubsetOutOfBounds(#[from] SubsetOutOfBoundsError),
1063+
/// Invalid byte offsets for variable length data.
1064+
#[error(transparent)]
1065+
RawBytesOffsetsCreate(#[from] RawBytesOffsetsCreateError),
10631066
}
10641067

10651068
impl From<&str> for CodecError {

zarrs/src/array/codec/array_to_array/transpose.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ fn transpose_vlen<'a>(
120120
bytes_new.extend_from_slice(&bytes[curr..next]);
121121
}
122122
offsets_new.push(bytes_new.len());
123+
let offsets_new = unsafe {
124+
// SAFETY: The offsets are monotonically increasing.
125+
RawBytesOffsets::new_unchecked(offsets_new)
126+
};
123127

124128
ArrayBytes::new_vlen(bytes_new, offsets_new)
125129
}

zarrs/src/array/codec/array_to_bytes/vlen/vlen_codec.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::{
99
CodecOptions, CodecTraits, RecommendedConcurrency,
1010
},
1111
transmute_to_bytes_vec, ArrayBytes, BytesRepresentation, ChunkRepresentation, CodecChain,
12-
DataType, DataTypeSize, Endianness, FillValue, RawBytes,
12+
DataType, DataTypeSize, Endianness, FillValue, RawBytes, RawBytesOffsets,
1313
},
1414
config::global_config,
1515
metadata::v3::{array::codec::vlen::VlenIndexDataType, MetadataV3},
@@ -265,14 +265,16 @@ impl ArrayToBytesCodecTraits for VlenCodec {
265265
}
266266
}
267267
.unwrap();
268-
let (data, index) = super::get_vlen_bytes_and_offsets(
268+
let (data, offsets) = super::get_vlen_bytes_and_offsets(
269269
&index_chunk_rep,
270270
&bytes,
271271
&self.index_codecs,
272272
&self.data_codecs,
273273
options,
274274
)?;
275-
Ok(ArrayBytes::new_vlen(data, index))
275+
let offsets = RawBytesOffsets::new(offsets)?;
276+
277+
Ok(ArrayBytes::new_vlen(data, offsets))
276278
}
277279

278280
fn partial_decoder(

zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_codec.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::{
1111
RecommendedConcurrency,
1212
},
1313
ArrayBytes, BytesRepresentation, ChunkRepresentation, DataTypeSize, RawBytes,
14+
RawBytesOffsets,
1415
},
1516
config::global_config,
1617
metadata::v3::MetadataV3,
@@ -110,6 +111,7 @@ impl ArrayToBytesCodecTraits for VlenV2Codec {
110111
) -> Result<ArrayBytes<'a>, CodecError> {
111112
let num_elements = decoded_representation.num_elements_usize();
112113
let (bytes, offsets) = super::get_interleaved_bytes_and_offsets(num_elements, &bytes)?;
114+
let offsets = RawBytesOffsets::new(offsets)?;
113115
Ok(ArrayBytes::new_vlen(bytes, offsets))
114116
}
115117

zarrs/src/array/element.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ use std::mem::ManuallyDrop;
33
use itertools::Itertools;
44
use ArrayError::IncompatibleElementType as IET;
55

6-
use super::{convert_from_bytes_slice, transmute_to_bytes, ArrayBytes, ArrayError, DataType};
6+
use super::{
7+
convert_from_bytes_slice, transmute_to_bytes, ArrayBytes, ArrayError, DataType, RawBytesOffsets,
8+
};
79

810
/// A trait representing an array element type.
911
pub trait Element: Sized + Clone {
@@ -184,6 +186,10 @@ macro_rules! impl_element_string {
184186
len = len.checked_add(element.len()).unwrap();
185187
}
186188
offsets.push(len);
189+
let offsets = unsafe {
190+
// SAFETY: The offsets are monotonically increasing.
191+
RawBytesOffsets::new_unchecked(offsets)
192+
};
187193

188194
// Concatenate bytes
189195
let mut bytes = Vec::with_capacity(usize::try_from(len).unwrap());
@@ -238,6 +244,10 @@ macro_rules! impl_element_binary {
238244
len = len.checked_add(element.len()).unwrap();
239245
}
240246
offsets.push(len);
247+
let offsets = unsafe {
248+
// SAFETY: The offsets are monotonically increasing.
249+
RawBytesOffsets::new_unchecked(offsets)
250+
};
241251

242252
// Concatenate bytes
243253
let bytes = elements.concat();

0 commit comments

Comments
 (0)