Skip to content

Commit

Permalink
tests/bench: get large file
Browse files Browse the repository at this point in the history
  • Loading branch information
gauteh committed Aug 16, 2023
1 parent eca937c commit 6e6c14d
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 52 deletions.
76 changes: 56 additions & 20 deletions benches/large.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,52 @@
extern crate test;
use test::Bencher;

use std::path::PathBuf;
use std::sync::Mutex;

use hidefix::prelude::*;
use ndarray::s;

const FILE: Option<&'static str> = option_env!("HIDEFIX_LARGE_FILE");
const VAR: Option<&'static str> = option_env!("HIDEFIX_LARGE_VAR");
const URL: &'static str = "https://thredds.met.no/thredds/fileServer/fou-hi/norkyst800m-1h/NorKyst-800m_ZDEPTHS_his.an.2023081600.nc";
const VAR: &'static str = "u_eastward";

fn get_file() -> PathBuf {
use std::time::Duration;

static NK: Mutex<()> = Mutex::new(());
let _guard = NK.lock().unwrap();

let mut p = std::env::temp_dir();
p.push("hidefix");

let d = p.clone();

p.push("norkyst.nc");

if !p.exists() {
println!("downloading norkyst file to {p:#?}..");
std::fs::create_dir_all(&d).unwrap();
let c = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(10 * 60))
.build()
.unwrap();
let r = c.get(URL).send().unwrap();
std::fs::write(&p, r.bytes().unwrap()).unwrap();
}

p
}

#[ignore]
#[bench]
fn idx_small_slice(b: &mut Bencher) {
let i = Index::index(FILE.unwrap()).unwrap();
let mut r = i.reader(VAR.unwrap()).unwrap();
let p = get_file();
let i = Index::index(&p).unwrap();
let mut r = i.reader(VAR).unwrap();

// test against native
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();
let hv = d
.read_slice_1d::<i32, _>(s![0..2, 0..2, 0..1, 0..5])
.unwrap()
Expand All @@ -41,8 +72,9 @@ fn idx_small_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn native_small_slice(b: &mut Bencher) {
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let p = get_file();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();

b.iter(|| {
test::black_box(
Expand All @@ -55,12 +87,13 @@ fn native_small_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn idx_med_slice(b: &mut Bencher) {
let i = Index::index(FILE.unwrap()).unwrap();
let mut r = i.reader(VAR.unwrap()).unwrap();
let p = get_file();
let i = Index::index(&p).unwrap();
let mut r = i.reader(VAR).unwrap();

// test against native
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();
let hv = d
.read_slice_1d::<i32, _>(s![0..10, 0..10, 0..1, 0..700])
.unwrap()
Expand All @@ -85,8 +118,9 @@ fn idx_med_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn native_med_slice(b: &mut Bencher) {
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let p = get_file();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();

b.iter(|| {
test::black_box(
Expand All @@ -99,12 +133,13 @@ fn native_med_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn idx_big_slice(b: &mut Bencher) {
let i = Index::index(FILE.unwrap()).unwrap();
let mut r = i.reader(VAR.unwrap()).unwrap();
let p = get_file();
let i = Index::index(&p).unwrap();
let mut r = i.reader(VAR).unwrap();

// test against native
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();
let hv = d
.read_slice_1d::<i32, _>(s![0..24, 0..16, 0..1, 0..739])
.unwrap()
Expand All @@ -129,8 +164,9 @@ fn idx_big_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn native_big_slice(b: &mut Bencher) {
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let p = get_file();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();

b.iter(|| {
test::black_box(
Expand Down
57 changes: 57 additions & 0 deletions benches/norkyst.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#![feature(test)]
extern crate test;
use test::Bencher;

use std::path::PathBuf;
use std::sync::Mutex;

use hidefix::prelude::*;

const URL: &'static str = "https://thredds.met.no/thredds/fileServer/fou-hi/norkyst800m-1h/NorKyst-800m_ZDEPTHS_his.an.2023081600.nc";

fn get_file() -> PathBuf {
use std::time::Duration;

static NK: Mutex<()> = Mutex::new(());
let _guard = NK.lock().unwrap();

let mut p = std::env::temp_dir();
p.push("hidefix");

let d = p.clone();

p.push("norkyst.nc");

if !p.exists() {
println!("downloading norkyst file to {p:#?}..");
std::fs::create_dir_all(&d).unwrap();
let c = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(10 * 60))
.build()
.unwrap();
let r = c.get(URL).send().unwrap();
std::fs::write(&p, r.bytes().unwrap()).unwrap();
}

p
}

#[ignore]
#[bench]
fn idx_big_slice(b: &mut Bencher) {
let p = get_file();
let i = Index::index(&p).unwrap();
let mut u = i.reader("u_eastward").unwrap();

b.iter(|| test::black_box(u.values::<f32>(None, None).unwrap()));
}

#[ignore]
#[bench]
fn native_big_slice(b: &mut Bencher) {
let p = get_file();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset("u_eastward").unwrap();

b.iter(|| test::black_box(d.read_raw::<f32>().unwrap()))
}
7 changes: 6 additions & 1 deletion src/idx/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@ impl<const D: usize> Chunk<D> {
}

pub fn offset_u64(&self) -> [u64; D] {
self.offset.iter().map(|o| o.get()).collect::<Vec<_>>().try_into().unwrap()
self.offset
.iter()
.map(|o| o.get())
.collect::<Vec<_>>()
.try_into()
.unwrap()
}

/// Is the point described by the indices inside the chunk (`Equal`), before (`Less`) or after
Expand Down
28 changes: 16 additions & 12 deletions src/idx/dataset/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,10 @@ impl<const D: usize> Dataset<'_, D> {
pub fn valid(&self) -> anyhow::Result<bool> {
for chunk in self.chunks.iter() {
let offset = chunk.offset.iter().map(|u| u.get()).collect::<Vec<_>>();
ensure!(chunk.contains(&offset, &self.chunk_shape) == std::cmp::Ordering::Equal, "chunk does not contain its offset");
ensure!(
chunk.contains(&offset, &self.chunk_shape) == std::cmp::Ordering::Equal,
"chunk does not contain its offset"
);
}

Ok(true)
Expand Down Expand Up @@ -556,17 +559,18 @@ impl<'a, const D: usize> Iterator for ChunkSlicer<'a, D> {
let mut carry = 0;
let mut di = 0;

for (idx, offset, count, count_sru, chunk_offset, chunk_sz, chunk_dim_sz, dataset_shape) in izip!(
&self.indices,
&mut self.offset_coords,
&self.counts,
&self.counts_reduced,
&chunk.offset,
&self.dataset.chunk_shape,
&self.dataset.chunk_dim_sz,
&self.dataset.shape,
)
.rev()
for (idx, offset, count, count_sru, chunk_offset, chunk_sz, chunk_dim_sz, dataset_shape) in
izip!(
&self.indices,
&mut self.offset_coords,
&self.counts,
&self.counts_reduced,
&chunk.offset,
&self.dataset.chunk_shape,
&self.dataset.chunk_dim_sz,
&self.dataset.shape,
)
.rev()
{
// The chunk size may not align to the dataset size. If the chunk
// dimension is greater than the end of the dataset, it must be cut
Expand Down
5 changes: 0 additions & 5 deletions src/reader/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ impl<'a, R: Read + Seek, const D: usize> Reader for CacheReader<'a, R, D> {
"destination buffer has insufficient capacity"
);

let mut i = 0;

for (c, start, end) in self.ds.chunk_slices(indices, Some(counts)) {
let start = (start * dsz) as usize;
let end = (end * dsz) as usize;
Expand All @@ -93,8 +91,6 @@ impl<'a, R: Read + Seek, const D: usize> Reader for CacheReader<'a, R, D> {
false,
)?;

i += 1;

debug_assert!(start <= cache.len());
debug_assert!(end <= cache.len());
dst[..slice_sz].copy_from_slice(&cache[start..end]);
Expand All @@ -103,7 +99,6 @@ impl<'a, R: Read + Seek, const D: usize> Reader for CacheReader<'a, R, D> {

dst = &mut dst[slice_sz..];
}
println!("chunks read: {i}");

Ok(vsz as usize)
}
Expand Down
4 changes: 0 additions & 4 deletions src/reader/direct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ impl<'a, const D: usize> Reader for Direct<'a, D> {

let mut fd = std::fs::File::open(&self.path)?;

let mut i = 0;
let mut last_chunk: Option<(&Chunk<D>, Vec<u8>)> = None;

for (c, current, start, end) in groups {
Expand All @@ -173,7 +172,6 @@ impl<'a, const D: usize> Reader for Direct<'a, D> {
self.ds.shuffle,
false,
)?;
i += 1;

last_chunk = Some((c, cache));
&last_chunk.as_mut().unwrap().1
Expand All @@ -194,8 +192,6 @@ impl<'a, const D: usize> Reader for Direct<'a, D> {
dst[current..(current + sz)].copy_from_slice(&cache[start..end]);
}

println!("chunks read: {i}");

Ok(vsz as usize)
}
}
Expand Down
Loading

0 comments on commit 6e6c14d

Please sign in to comment.