diff --git a/benches/large.rs b/benches/large.rs index 83cd8b6..895f704 100644 --- a/benches/large.rs +++ b/benches/large.rs @@ -2,21 +2,52 @@ extern crate test; use test::Bencher; +use std::path::PathBuf; +use std::sync::Mutex; + use hidefix::prelude::*; use ndarray::s; -const FILE: Option<&'static str> = option_env!("HIDEFIX_LARGE_FILE"); -const VAR: Option<&'static str> = option_env!("HIDEFIX_LARGE_VAR"); +const URL: &'static str = "https://thredds.met.no/thredds/fileServer/fou-hi/norkyst800m-1h/NorKyst-800m_ZDEPTHS_his.an.2023081600.nc"; +const VAR: &'static str = "u_eastward"; + +fn get_file() -> PathBuf { + use std::time::Duration; + + static NK: Mutex<()> = Mutex::new(()); + let _guard = NK.lock().unwrap(); + + let mut p = std::env::temp_dir(); + p.push("hidefix"); + + let d = p.clone(); + + p.push("norkyst.nc"); + + if !p.exists() { + println!("downloading norkyst file to {p:#?}.."); + std::fs::create_dir_all(&d).unwrap(); + let c = reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(10 * 60)) + .build() + .unwrap(); + let r = c.get(URL).send().unwrap(); + std::fs::write(&p, r.bytes().unwrap()).unwrap(); + } + + p +} #[ignore] #[bench] fn idx_small_slice(b: &mut Bencher) { - let i = Index::index(FILE.unwrap()).unwrap(); - let mut r = i.reader(VAR.unwrap()).unwrap(); + let p = get_file(); + let i = Index::index(&p).unwrap(); + let mut r = i.reader(VAR).unwrap(); // test against native - let h = hdf5::File::open(FILE.unwrap()).unwrap(); - let d = h.dataset(VAR.unwrap()).unwrap(); + let h = hdf5::File::open(&p).unwrap(); + let d = h.dataset(VAR).unwrap(); let hv = d .read_slice_1d::(s![0..2, 0..2, 0..1, 0..5]) .unwrap() @@ -41,8 +72,9 @@ fn idx_small_slice(b: &mut Bencher) { #[ignore] #[bench] fn native_small_slice(b: &mut Bencher) { - let h = hdf5::File::open(FILE.unwrap()).unwrap(); - let d = h.dataset(VAR.unwrap()).unwrap(); + let p = get_file(); + let h = hdf5::File::open(&p).unwrap(); + let d = h.dataset(VAR).unwrap(); b.iter(|| { test::black_box( @@ -55,12 +87,13 @@ fn native_small_slice(b: &mut Bencher) { #[ignore] #[bench] fn idx_med_slice(b: &mut Bencher) { - let i = Index::index(FILE.unwrap()).unwrap(); - let mut r = i.reader(VAR.unwrap()).unwrap(); + let p = get_file(); + let i = Index::index(&p).unwrap(); + let mut r = i.reader(VAR).unwrap(); // test against native - let h = hdf5::File::open(FILE.unwrap()).unwrap(); - let d = h.dataset(VAR.unwrap()).unwrap(); + let h = hdf5::File::open(&p).unwrap(); + let d = h.dataset(VAR).unwrap(); let hv = d .read_slice_1d::(s![0..10, 0..10, 0..1, 0..700]) .unwrap() @@ -85,8 +118,9 @@ fn idx_med_slice(b: &mut Bencher) { #[ignore] #[bench] fn native_med_slice(b: &mut Bencher) { - let h = hdf5::File::open(FILE.unwrap()).unwrap(); - let d = h.dataset(VAR.unwrap()).unwrap(); + let p = get_file(); + let h = hdf5::File::open(&p).unwrap(); + let d = h.dataset(VAR).unwrap(); b.iter(|| { test::black_box( @@ -99,12 +133,13 @@ fn native_med_slice(b: &mut Bencher) { #[ignore] #[bench] fn idx_big_slice(b: &mut Bencher) { - let i = Index::index(FILE.unwrap()).unwrap(); - let mut r = i.reader(VAR.unwrap()).unwrap(); + let p = get_file(); + let i = Index::index(&p).unwrap(); + let mut r = i.reader(VAR).unwrap(); // test against native - let h = hdf5::File::open(FILE.unwrap()).unwrap(); - let d = h.dataset(VAR.unwrap()).unwrap(); + let h = hdf5::File::open(&p).unwrap(); + let d = h.dataset(VAR).unwrap(); let hv = d .read_slice_1d::(s![0..24, 0..16, 0..1, 0..739]) .unwrap() @@ -129,8 +164,9 @@ fn idx_big_slice(b: &mut Bencher) { #[ignore] #[bench] fn native_big_slice(b: &mut Bencher) { - let h = hdf5::File::open(FILE.unwrap()).unwrap(); - let d = h.dataset(VAR.unwrap()).unwrap(); + let p = get_file(); + let h = hdf5::File::open(&p).unwrap(); + let d = h.dataset(VAR).unwrap(); b.iter(|| { test::black_box( diff --git a/benches/norkyst.rs b/benches/norkyst.rs new file mode 100644 index 0000000..afd072e --- /dev/null +++ b/benches/norkyst.rs @@ -0,0 +1,57 @@ +#![feature(test)] +extern crate test; +use test::Bencher; + +use std::path::PathBuf; +use std::sync::Mutex; + +use hidefix::prelude::*; + +const URL: &'static str = "https://thredds.met.no/thredds/fileServer/fou-hi/norkyst800m-1h/NorKyst-800m_ZDEPTHS_his.an.2023081600.nc"; + +fn get_file() -> PathBuf { + use std::time::Duration; + + static NK: Mutex<()> = Mutex::new(()); + let _guard = NK.lock().unwrap(); + + let mut p = std::env::temp_dir(); + p.push("hidefix"); + + let d = p.clone(); + + p.push("norkyst.nc"); + + if !p.exists() { + println!("downloading norkyst file to {p:#?}.."); + std::fs::create_dir_all(&d).unwrap(); + let c = reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(10 * 60)) + .build() + .unwrap(); + let r = c.get(URL).send().unwrap(); + std::fs::write(&p, r.bytes().unwrap()).unwrap(); + } + + p +} + +#[ignore] +#[bench] +fn idx_big_slice(b: &mut Bencher) { + let p = get_file(); + let i = Index::index(&p).unwrap(); + let mut u = i.reader("u_eastward").unwrap(); + + b.iter(|| test::black_box(u.values::(None, None).unwrap())); +} + +#[ignore] +#[bench] +fn native_big_slice(b: &mut Bencher) { + let p = get_file(); + let h = hdf5::File::open(&p).unwrap(); + let d = h.dataset("u_eastward").unwrap(); + + b.iter(|| test::black_box(d.read_raw::().unwrap())) +} diff --git a/src/idx/chunk.rs b/src/idx/chunk.rs index 45ce20a..edb7f52 100644 --- a/src/idx/chunk.rs +++ b/src/idx/chunk.rs @@ -44,7 +44,12 @@ impl Chunk { } pub fn offset_u64(&self) -> [u64; D] { - self.offset.iter().map(|o| o.get()).collect::>().try_into().unwrap() + self.offset + .iter() + .map(|o| o.get()) + .collect::>() + .try_into() + .unwrap() } /// Is the point described by the indices inside the chunk (`Equal`), before (`Less`) or after diff --git a/src/idx/dataset/dataset.rs b/src/idx/dataset/dataset.rs index 1e898f5..03bc5dc 100644 --- a/src/idx/dataset/dataset.rs +++ b/src/idx/dataset/dataset.rs @@ -304,7 +304,10 @@ impl Dataset<'_, D> { pub fn valid(&self) -> anyhow::Result { for chunk in self.chunks.iter() { let offset = chunk.offset.iter().map(|u| u.get()).collect::>(); - ensure!(chunk.contains(&offset, &self.chunk_shape) == std::cmp::Ordering::Equal, "chunk does not contain its offset"); + ensure!( + chunk.contains(&offset, &self.chunk_shape) == std::cmp::Ordering::Equal, + "chunk does not contain its offset" + ); } Ok(true) @@ -556,17 +559,18 @@ impl<'a, const D: usize> Iterator for ChunkSlicer<'a, D> { let mut carry = 0; let mut di = 0; - for (idx, offset, count, count_sru, chunk_offset, chunk_sz, chunk_dim_sz, dataset_shape) in izip!( - &self.indices, - &mut self.offset_coords, - &self.counts, - &self.counts_reduced, - &chunk.offset, - &self.dataset.chunk_shape, - &self.dataset.chunk_dim_sz, - &self.dataset.shape, - ) - .rev() + for (idx, offset, count, count_sru, chunk_offset, chunk_sz, chunk_dim_sz, dataset_shape) in + izip!( + &self.indices, + &mut self.offset_coords, + &self.counts, + &self.counts_reduced, + &chunk.offset, + &self.dataset.chunk_shape, + &self.dataset.chunk_dim_sz, + &self.dataset.shape, + ) + .rev() { // The chunk size may not align to the dataset size. If the chunk // dimension is greater than the end of the dataset, it must be cut diff --git a/src/reader/cache.rs b/src/reader/cache.rs index cd3b84f..2791481 100644 --- a/src/reader/cache.rs +++ b/src/reader/cache.rs @@ -67,8 +67,6 @@ impl<'a, R: Read + Seek, const D: usize> Reader for CacheReader<'a, R, D> { "destination buffer has insufficient capacity" ); - let mut i = 0; - for (c, start, end) in self.ds.chunk_slices(indices, Some(counts)) { let start = (start * dsz) as usize; let end = (end * dsz) as usize; @@ -93,8 +91,6 @@ impl<'a, R: Read + Seek, const D: usize> Reader for CacheReader<'a, R, D> { false, )?; - i += 1; - debug_assert!(start <= cache.len()); debug_assert!(end <= cache.len()); dst[..slice_sz].copy_from_slice(&cache[start..end]); @@ -103,7 +99,6 @@ impl<'a, R: Read + Seek, const D: usize> Reader for CacheReader<'a, R, D> { dst = &mut dst[slice_sz..]; } - println!("chunks read: {i}"); Ok(vsz as usize) } diff --git a/src/reader/direct.rs b/src/reader/direct.rs index 5b311c5..eff4926 100644 --- a/src/reader/direct.rs +++ b/src/reader/direct.rs @@ -153,7 +153,6 @@ impl<'a, const D: usize> Reader for Direct<'a, D> { let mut fd = std::fs::File::open(&self.path)?; - let mut i = 0; let mut last_chunk: Option<(&Chunk, Vec)> = None; for (c, current, start, end) in groups { @@ -173,7 +172,6 @@ impl<'a, const D: usize> Reader for Direct<'a, D> { self.ds.shuffle, false, )?; - i += 1; last_chunk = Some((c, cache)); &last_chunk.as_mut().unwrap().1 @@ -194,8 +192,6 @@ impl<'a, const D: usize> Reader for Direct<'a, D> { dst[current..(current + sz)].copy_from_slice(&cache[start..end]); } - println!("chunks read: {i}"); - Ok(vsz as usize) } } diff --git a/tests/read_norkyst.rs b/tests/read_norkyst.rs index 1498203..5ae3c5d 100644 --- a/tests/read_norkyst.rs +++ b/tests/read_norkyst.rs @@ -2,13 +2,16 @@ #![allow(non_snake_case)] extern crate test; +use hidefix::idx::{Dataset, DatasetD}; +use hidefix::prelude::*; use std::path::PathBuf; use std::sync::Mutex; -use hidefix::prelude::*; -const URL: &'static str = "https://thredds.met.no/thredds/fileServer/fou-hi/norkyst800m/NorKyst-800m_ZDEPTHS_avg.an.2023050800.nc"; +const URL: &'static str = "https://thredds.met.no/thredds/fileServer/fou-hi/norkyst800m-1h/NorKyst-800m_ZDEPTHS_his.an.2023081600.nc"; + +fn get_file() -> PathBuf { + use std::time::Duration; -fn get_file() -> PathBuf { static NK: Mutex<()> = Mutex::new(()); let _guard = NK.lock().unwrap(); @@ -22,7 +25,11 @@ fn get_file() -> PathBuf { if !p.exists() { println!("downloading norkyst file to {p:#?}.."); std::fs::create_dir_all(&d).unwrap(); - let r = reqwest::blocking::get(URL).unwrap(); + let c = reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(10 * 60)) + .build() + .unwrap(); + let r = c.get(URL).send().unwrap(); std::fs::write(&p, r.bytes().unwrap()).unwrap(); } @@ -50,12 +57,20 @@ fn wind() { let p = get_file(); let h = hdf5::File::open(&p).unwrap(); - let Uw = h.dataset("Uwind").unwrap().read_raw::().unwrap(); - let Vw = h.dataset("Vwind").unwrap().read_raw::().unwrap(); + let Uw = h.dataset("Uwind").unwrap().read_raw::().unwrap(); + let Vw = h.dataset("Vwind").unwrap().read_raw::().unwrap(); let hi = Index::index(&p).unwrap(); - let hUw = hi.reader("Uwind").unwrap().values::(None, None).unwrap(); - let hVw = hi.reader("Vwind").unwrap().values::(None, None).unwrap(); + let hUw = hi + .reader("Uwind") + .unwrap() + .values::(None, None) + .unwrap(); + let hVw = hi + .reader("Vwind") + .unwrap() + .values::(None, None) + .unwrap(); hi.dataset("Uwind").unwrap().valid().unwrap(); @@ -79,10 +94,87 @@ fn current() { // hi.dataset("u_eastward").unwrap().valid().unwrap(); - let hu = hi.reader("u_eastward").unwrap().values::(None, None).unwrap(); - let hv = hi.reader("v_northward").unwrap().values::(None, None).unwrap(); + let hu = hi + .reader("u_eastward") + .unwrap() + .values::(None, None) + .unwrap(); + let hv = hi + .reader("v_northward") + .unwrap() + .values::(None, None) + .unwrap(); assert_eq!(u, hu); assert_eq!(v, hv); } +#[test] +fn temperature_salinity() { + let p = get_file(); + + let h = hdf5::File::open(&p).unwrap(); + let Uw = h.dataset("temperature").unwrap().read_raw::().unwrap(); + let Vw = h.dataset("salinity").unwrap().read_raw::().unwrap(); + + let hi = Index::index(&p).unwrap(); + let hUw = hi + .reader("temperature") + .unwrap() + .values::(None, None) + .unwrap(); + let hVw = hi + .reader("salinity") + .unwrap() + .values::(None, None) + .unwrap(); + + assert_eq!(Uw, hUw); + assert_eq!(Vw, hVw); +} + +#[test] +fn chunk_slice_fracture() { + let p = get_file(); + let hi = Index::index(&p).unwrap(); + + // test that chunks are not unnecessarily fractured + fn test_slices(ds: &Dataset) { + let chunks = ds.chunk_slices(None, None).collect::>(); + + println!("chunks len: {}", chunks.len()); + + // might have to make `chunks` unique. + assert_eq!(chunks.len(), ds.chunks.len()); + + for i in 1..chunks.len() { + let p = chunks[i - 1]; + let c = chunks[i]; + + // assert_eq!(p.2 - p.1, chunk_total_size); + // assert_eq!(c.2 - c.1, chunk_total_size); + + assert_ne!(c, p); + } + } + + let DatasetD::D1(ds) = hi.dataset("X").unwrap() else { + panic!("wrong dims") + }; + test_slices(ds); + + let DatasetD::D4(ds) = hi.dataset("temperature").unwrap() else { + panic!("wrong dims") + }; + test_slices(ds); + + let DatasetD::D4(ds) = hi.dataset("u_eastward").unwrap() else { + panic!("wrong dims") + }; + test_slices(ds); + + let DatasetD::D3(ds) = hi.dataset("Uwind").unwrap() else { + panic!("wrong dims") + }; + test_slices(ds); +}