From 0270b2a47dbd251aa0a9ac3c90dcca8d9eea4716 Mon Sep 17 00:00:00 2001 From: Gaute Hope Date: Sat, 3 Jun 2023 20:58:29 +0200 Subject: [PATCH] ds: add valid check --- src/idx/dataset/any.rs | 6 ++++++ src/idx/dataset/dataset.rs | 30 +++++++++++++++++++++++++++--- src/idx/dataset/slicer.rs | 32 ++++++++++++++++++++++---------- tests/read_norkyst.rs | 4 ++++ 4 files changed, 59 insertions(+), 13 deletions(-) diff --git a/src/idx/dataset/any.rs b/src/idx/dataset/any.rs index 1a0561e..fa16c70 100644 --- a/src/idx/dataset/any.rs +++ b/src/idx/dataset/any.rs @@ -116,6 +116,8 @@ pub trait DatasetExt { fn chunk_shape(&self) -> &[u64]; + fn valid(&self) -> anyhow::Result; + fn as_par_reader(&self, p: &dyn AsRef) -> anyhow::Result>; } @@ -140,6 +142,10 @@ impl<'a> DatasetExt for DatasetD<'a> { self.inner().chunk_shape() } + fn valid(&self) -> anyhow::Result { + self.inner().valid() + } + fn as_par_reader(&self, p: &dyn AsRef) -> anyhow::Result> { self.inner().as_par_reader(p) } diff --git a/src/idx/dataset/dataset.rs b/src/idx/dataset/dataset.rs index 69144e8..ecf05a7 100644 --- a/src/idx/dataset/dataset.rs +++ b/src/idx/dataset/dataset.rs @@ -300,6 +300,26 @@ impl Dataset<'_, D> { self.shape.is_empty() } + /// Test whether dataset and chunk layout is valid. + pub fn valid(&self) -> anyhow::Result { + for chunk in self.chunks.iter() { + let offset = chunk.offset.iter().map(|u| u.get()).collect::>(); + ensure!(chunk.contains(&offset, &self.chunk_shape) == std::cmp::Ordering::Equal, "chunk does not contain its offset"); + } + + let end: u64 = self.shape.iter().product(); + let chunk_sh: u64 = self.chunk_shape.iter().product(); + + ensure!(end % chunk_sh == 0, "chunks not modulo of dataset shape: {0:?} vs {1:?}", self.shape, self.chunk_shape); + let chunks = end / chunk_sh; + ensure!(chunks == self.chunks.len() as u64, "number of chunks does not match dataset shape: {chunks} != {}", self.chunks.len()); + + + + + Ok(true) + } + /// Returns an iterator over chunk, offset and size which if joined will make up the specified slice through the /// variable. pub fn chunk_slices( @@ -410,6 +430,10 @@ impl DatasetExt for Dataset<'_, D> { self.chunk_shape.as_slice() } + fn valid(&self) -> anyhow::Result { + self.valid() + } + fn as_par_reader(&self, p: &dyn AsRef) -> anyhow::Result> { use crate::reader::direct::Direct; @@ -617,9 +641,9 @@ impl<'a, const D: usize> Iterator for ChunkSlicer<'a, D> { // position in chunk of new offset let chunk_end = chunk_start + advance; - debug_assert!( - chunk_end as usize <= self.dataset.chunk_shape.iter().product::() as usize - ); + // debug_assert!( + // chunk_end as usize <= self.dataset.chunk_shape.iter().product::() as usize + // ); Some((chunk, chunk_start, chunk_end)) } diff --git a/src/idx/dataset/slicer.rs b/src/idx/dataset/slicer.rs index d18e6d0..666506f 100644 --- a/src/idx/dataset/slicer.rs +++ b/src/idx/dataset/slicer.rs @@ -157,40 +157,49 @@ impl<'a, const D: usize> Iterator for ChunkSlice<'a, D> { // // chunk dimension will always be less or equal to the dataset // dimension, so we do not need to check it. + + // When a chunk_dimension is size 1 it does make the chunk greater. So + // we ignore it. Is the case when all dimensions are 1 special? // // When all the higher chunk dimensions are size one we // will reach the next chunk and we can stop. If we advance to the end of the chunk. We must however advance at least one. - if self.dataset.chunk_shape[di] == 1 { - // if advance == 0 { - // advance = 1; - // } - continue; - } + // if self.dataset.chunk_shape[di] == 1 { + // if advance == 0 { + // advance = 1; + // } + // continue; + // } // Assert that we have not advanced to the next chunk. - assert_eq!(chunk, self.dataset.chunk_at_coord(&I)); + assert_eq!(chunk, self.dataset.chunk_at_coord(&I), "advanced into next chunk"); + // Assert that the coordinates are in this chunk. debug_assert!( - chunk.contains(&I, &self.dataset.chunk_shape) == std::cmp::Ordering::Equal + chunk.contains(&I, &self.dataset.chunk_shape) == std::cmp::Ordering::Equal, + "coordinates are not in this chunk." ); // End of chunk dimension. let chunk_d = chunk.offset[di].get() + self.dataset.chunk_shape[di]; + debug_assert!(chunk_d <= self.dataset.shape[di]); // End of count dimension. let count_d = self.slice_start[di] + self.slice_counts[di]; + debug_assert!(count_d <= self.dataset.shape[di]); let Id = I[di]; // Coordinate in current dimension of entire // dataset. let nId = min(chunk_d, count_d); // New coordinate in current // dimension of entire // dataset. - debug_assert!(nId < self.dataset.shape[di]); + debug_assert!(nId <= self.dataset.shape[di], "coordinate above shape of dataset dimension."); dbg!(chunk_d); dbg!(count_d); - assert!(nId > Id); + assert!(nId > Id); // XXX: This one should probably go. There might be + // cases when we don't advance in this dim, but in the + // next. let dim_sz = self.dataset.dim_sz[di]; @@ -223,6 +232,7 @@ impl<'a, const D: usize> Iterator for ChunkSlice<'a, D> { let chunk_end = chunk_start + advance; self.slice_offset += advance; + assert!(self.slice_offset <= self.slice_end, "advanced further than slice end."); assert!(advance > 0, "Iterator not advancing"); @@ -382,6 +392,8 @@ mod tests { ) .unwrap(); + ds.valid().unwrap(); + ChunkSlice::new(&ds, [0, 0, 0], [2, 32, 580]).for_each(drop); // Should be all chunks. diff --git a/tests/read_norkyst.rs b/tests/read_norkyst.rs index 90a7d8b..1498203 100644 --- a/tests/read_norkyst.rs +++ b/tests/read_norkyst.rs @@ -57,6 +57,8 @@ fn wind() { let hUw = hi.reader("Uwind").unwrap().values::(None, None).unwrap(); let hVw = hi.reader("Vwind").unwrap().values::(None, None).unwrap(); + hi.dataset("Uwind").unwrap().valid().unwrap(); + assert_eq!(Uw, hUw); assert_eq!(Vw, hVw); } @@ -75,6 +77,8 @@ fn current() { assert_eq!(u.len(), hi.dataset("u_eastward").unwrap().size()); + // hi.dataset("u_eastward").unwrap().valid().unwrap(); + let hu = hi.reader("u_eastward").unwrap().values::(None, None).unwrap(); let hv = hi.reader("v_northward").unwrap().values::(None, None).unwrap();