Skip to content

Commit

Permalink
get gpu_destroyer working on single
Browse files Browse the repository at this point in the history
  • Loading branch information
d3v-null committed May 7, 2024
1 parent 1023058 commit 48a78bf
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ required-features = ["hip"]

[[example]]
name = "gpu_destroyer"
required-features = ["hip"]

[patch.crates-io]
hip-sys = { git = "https://github.com/d3v-null/hip-sys", branch = "rocm6" }
Expand Down
28 changes: 19 additions & 9 deletions examples/gpu_destroyer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// }

// compute on CPU for comparison.
#[cfg(feature = "gpu-single")]
let elem: Jones<f32> = Jones::default();
#[cfg(not(feature = "gpu-single"))]
let elem: Jones<f64> = Jones::default();
let mut cpu_jones =
Array3::from_elem((delays.dim().0, freqs_hz.len(), az.len()), Jones::default());
Array3::from_elem( (delays.dim().0, freqs_hz.len(), az.len()), elem );

for ((mut out, delays), amps) in cpu_jones
.outer_iter_mut()
Expand Down Expand Up @@ -143,24 +147,30 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut pass = true;
for (&cpu, &gpu) in cpu_jones.iter().zip(gpu_jones.iter()) {
let norm = (cpu - gpu).norm_sqr();
// #[cfg(feature = "gpu-single")]
// if norm.iter().sum::<f32>() > 1e-6_f32 { pass = false; break }
// #[cfg(not(feature = "gpu-single"))]
#[cfg(feature = "gpu-single")]
if norm.iter().sum::<f32>() > 1e-6_f32 { pass = false; break }
#[cfg(not(feature = "gpu-single"))]
if norm.iter().sum::<f64>() > 1e-12_f64 {
pass = false;
paniq_();
break;
}
}
#[cfg(feature = "gpu-single")]
let init: (f32, f32) = (f32::MAX, f32::MIN);
#[cfg(not(feature = "gpu-single"))]
let init: (f64, f64) = (f64::MAX, f64::MIN);
let (min_norm, max_norm) = cpu_jones
.iter()
.zip(gpu_jones.iter())
.map(|(&cpu, &gpu)| (cpu - gpu).norm_sqr())
.fold((f64::MAX, f64::MIN), |(min, max), norm|
// #[cfg(feature = "gpu-single")]
// {let s:f32=norm.iter().sum(); (min.min(s), max.max(s))}
// #[cfg(not(feature = "gpu-single"))]
{let s:f64=norm.iter().sum(); (min.min(s), max.max(s))});
.fold(init, |(min, max), norm| {
#[cfg(feature = "gpu-single")]
let s:f32=norm.iter().sum();
#[cfg(not(feature = "gpu-single"))]
let s:f64=norm.iter().sum();
(min.min(s), max.max(s))
});
if pass {
eprintln!(
" attempt {:4} passed, min_norm={:?} max_norm={:?}",
Expand Down

0 comments on commit 48a78bf

Please sign in to comment.