Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: test norkyst files between hdf5 and hidefix #21

Merged
merged 17 commits into from
Aug 16, 2023
Merged
661 changes: 659 additions & 2 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ version = "1"
[dev-dependencies]
rand = "0.8"
sled = "0.34.6"
reqwest = { version = "0.11", features = [ "blocking" ] }

[patch.crates-io]
hdf5 = { git = "https://github.com/magnusuMET/hdf5-rust", branch = "hidefix_jul_2023" }
Expand Down
110 changes: 74 additions & 36 deletions benches/large.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,70 @@
extern crate test;
use test::Bencher;

use std::path::PathBuf;
use std::sync::Mutex;

use hidefix::prelude::*;
use ndarray::s;
use ndarray::{s, IxDyn};

const URL: &'static str = "https://thredds.met.no/thredds/fileServer/fou-hi/norkyst800m-1h/NorKyst-800m_ZDEPTHS_his.an.2023081600.nc";
const VAR: &'static str = "u_eastward";

type T = f32;

fn get_file() -> PathBuf {
use std::time::Duration;

static NK: Mutex<()> = Mutex::new(());
let _guard = NK.lock().unwrap();

const FILE: Option<&'static str> = option_env!("HIDEFIX_LARGE_FILE");
const VAR: Option<&'static str> = option_env!("HIDEFIX_LARGE_VAR");
let mut p = std::env::temp_dir();
p.push("hidefix");

let d = p.clone();

p.push("norkyst.nc");

if !p.exists() {
println!("downloading norkyst file to {p:#?}..");
std::fs::create_dir_all(&d).unwrap();
let c = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(10 * 60))
.build()
.unwrap();
let r = c.get(URL).send().unwrap();
std::fs::write(&p, r.bytes().unwrap()).unwrap();
}

p
}

#[ignore]
#[bench]
fn idx_small_slice(b: &mut Bencher) {
let i = Index::index(FILE.unwrap()).unwrap();
let mut r = i.reader(VAR.unwrap()).unwrap();
let p = get_file();
let i = Index::index(&p).unwrap();
let mut r = i.reader(VAR).unwrap();

// test against native
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();
let hv = d
.read_slice_1d::<i32, _>(s![0..2, 0..2, 0..1, 0..5])
.read_slice::<T, _, IxDyn>(s![0..2, 0..2, 0..1, 0..5])
.unwrap()
.iter()
.map(|v| *v)
.collect::<Vec<i32>>();
.collect::<Vec<T>>();

assert_eq!(
hv,
r.values::<i32>(Some(&[0, 0, 0, 0]), Some(&[2, 2, 1, 5]))
r.values::<T>(Some(&[0, 0, 0, 0]), Some(&[2, 2, 1, 5]))
.unwrap()
);

b.iter(|| {
test::black_box(
r.values::<i32>(Some(&[0, 0, 0, 0]), Some(&[2, 2, 1, 5]))
r.values::<T>(Some(&[0, 0, 0, 0]), Some(&[2, 2, 1, 5]))
.unwrap(),
)
});
Expand All @@ -41,12 +74,13 @@ fn idx_small_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn native_small_slice(b: &mut Bencher) {
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let p = get_file();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();

b.iter(|| {
test::black_box(
d.read_slice_1d::<i32, _>(s![0..2, 0..2, 0..1, 0..5])
d.read_slice::<T, _, IxDyn>(s![0..2, 0..2, 0..1, 0..5])
.unwrap(),
)
})
Expand All @@ -55,28 +89,29 @@ fn native_small_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn idx_med_slice(b: &mut Bencher) {
let i = Index::index(FILE.unwrap()).unwrap();
let mut r = i.reader(VAR.unwrap()).unwrap();
let p = get_file();
let i = Index::index(&p).unwrap();
let mut r = i.reader(VAR).unwrap();

// test against native
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();
let hv = d
.read_slice_1d::<i32, _>(s![0..10, 0..10, 0..1, 0..700])
.read_slice::<T, _, IxDyn>(s![0..10, 0..10, 0..1, 0..700])
.unwrap()
.iter()
.map(|v| *v)
.collect::<Vec<i32>>();
.collect::<Vec<T>>();

assert_eq!(
hv,
r.values::<i32>(Some(&[0, 0, 0, 0]), Some(&[10, 10, 1, 700]))
r.values::<T>(Some(&[0, 0, 0, 0]), Some(&[10, 10, 1, 700]))
.unwrap()
);

b.iter(|| {
test::black_box(
r.values::<i32>(Some(&[0, 0, 0, 0]), Some(&[10, 10, 1, 700]))
r.values::<T>(Some(&[0, 0, 0, 0]), Some(&[10, 10, 1, 2602]))
.unwrap(),
)
});
Expand All @@ -85,12 +120,13 @@ fn idx_med_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn native_med_slice(b: &mut Bencher) {
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let p = get_file();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();

b.iter(|| {
test::black_box(
d.read_slice_1d::<i32, _>(s![0..10, 0..10, 0..1, 0..20000])
d.read_slice::<T, _, IxDyn>(s![0..10, 0..10, 0..1, 0..2602])
.unwrap(),
)
})
Expand All @@ -99,28 +135,29 @@ fn native_med_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn idx_big_slice(b: &mut Bencher) {
let i = Index::index(FILE.unwrap()).unwrap();
let mut r = i.reader(VAR.unwrap()).unwrap();
let p = get_file();
let i = Index::index(&p).unwrap();
let mut r = i.reader(VAR).unwrap();

// test against native
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();
let hv = d
.read_slice_1d::<i32, _>(s![0..24, 0..16, 0..1, 0..739])
.read_slice::<T, _, IxDyn>(s![0..24, 0..16, 0..1, 0..739])
.unwrap()
.iter()
.map(|v| *v)
.collect::<Vec<i32>>();
.collect::<Vec<T>>();

assert_eq!(
hv,
r.values::<i32>(Some(&[0, 0, 0, 0]), Some(&[24, 16, 1, 739]))
r.values::<T>(Some(&[0, 0, 0, 0]), Some(&[24, 16, 1, 739]))
.unwrap()
);

b.iter(|| {
test::black_box(
r.values::<i32>(Some(&[0, 0, 0, 0]), Some(&[24, 16, 1, 739]))
r.values::<T>(Some(&[0, 0, 0, 0]), Some(&[24, 16, 1, 2602]))
.unwrap(),
)
});
Expand All @@ -129,12 +166,13 @@ fn idx_big_slice(b: &mut Bencher) {
#[ignore]
#[bench]
fn native_big_slice(b: &mut Bencher) {
let h = hdf5::File::open(FILE.unwrap()).unwrap();
let d = h.dataset(VAR.unwrap()).unwrap();
let p = get_file();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset(VAR).unwrap();

b.iter(|| {
test::black_box(
d.read_slice_1d::<i32, _>(s![0..65, 0..65, 0..1, 0..20000])
d.read_slice::<T, _, IxDyn>(s![0..24, 0..16, 0..1, 0..2602])
.unwrap(),
)
})
Expand Down
57 changes: 57 additions & 0 deletions benches/norkyst.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#![feature(test)]
extern crate test;
use test::Bencher;

use std::path::PathBuf;
use std::sync::Mutex;

use hidefix::prelude::*;

const URL: &'static str = "https://thredds.met.no/thredds/fileServer/fou-hi/norkyst800m-1h/NorKyst-800m_ZDEPTHS_his.an.2023081600.nc";

fn get_file() -> PathBuf {
use std::time::Duration;

static NK: Mutex<()> = Mutex::new(());
let _guard = NK.lock().unwrap();

let mut p = std::env::temp_dir();
p.push("hidefix");

let d = p.clone();

p.push("norkyst.nc");

if !p.exists() {
println!("downloading norkyst file to {p:#?}..");
std::fs::create_dir_all(&d).unwrap();
let c = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(10 * 60))
.build()
.unwrap();
let r = c.get(URL).send().unwrap();
std::fs::write(&p, r.bytes().unwrap()).unwrap();
}

p
}

#[ignore]
#[bench]
fn idx_big_slice(b: &mut Bencher) {
let p = get_file();
let i = Index::index(&p).unwrap();
let mut u = i.reader("u_eastward").unwrap();

b.iter(|| test::black_box(u.values::<f32>(None, None).unwrap()));
}

#[ignore]
#[bench]
fn native_big_slice(b: &mut Bencher) {
let p = get_file();
let h = hdf5::File::open(&p).unwrap();
let d = h.dataset("u_eastward").unwrap();

b.iter(|| test::black_box(d.read_raw::<f32>().unwrap()))
}
15 changes: 12 additions & 3 deletions src/idx/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,26 @@ impl<const D: usize> Chunk<D> {
}
}

pub fn offset_u64(&self) -> [u64; D] {
self.offset
.iter()
.map(|o| o.get())
.collect::<Vec<_>>()
.try_into()
.unwrap()
}

/// Is the point described by the indices inside the chunk (`Equal`), before (`Less`) or after
/// (`Greater`).
#[must_use]
pub fn contains(&self, i: &[u64], shape: &[u64]) -> Ordering {
assert!(i.len() == shape.len());
pub fn contains(&self, i: &[u64], chunk_shape: &[u64]) -> Ordering {
assert!(i.len() == chunk_shape.len());
assert!(i.len() == self.offset.len());

for j in 0..i.len() {
if i[j] < self.offset[j].get() {
return Ordering::Less;
} else if i[j] >= self.offset[j].get() + shape[j] {
} else if i[j] >= self.offset[j].get() + chunk_shape[j] {
return Ordering::Greater;
}
}
Expand Down
6 changes: 6 additions & 0 deletions src/idx/dataset/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ pub trait DatasetExt {

fn chunk_shape(&self) -> &[u64];

fn valid(&self) -> anyhow::Result<bool>;

fn as_par_reader(&self, p: &dyn AsRef<Path>) -> anyhow::Result<Box<dyn DatasetExtReader + '_>>;
}

Expand All @@ -140,6 +142,10 @@ impl<'a> DatasetExt for DatasetD<'a> {
self.inner().chunk_shape()
}

fn valid(&self) -> anyhow::Result<bool> {
self.inner().valid()
}

fn as_par_reader(&self, p: &dyn AsRef<Path>) -> anyhow::Result<Box<dyn DatasetExtReader + '_>> {
self.inner().as_par_reader(p)
}
Expand Down
Loading
Loading