Skip to content

Commit

Permalink
Added integral approximation of gaussian
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Aug 27, 2024
1 parent 76a339a commit 8828d60
Show file tree
Hide file tree
Showing 26 changed files with 3,305 additions and 159 deletions.
196 changes: 132 additions & 64 deletions benches/gauss_bench/main.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use criterion::{criterion_group, criterion_main, Criterion};
use image::io::Reader as ImageReader;
use image::GenericImageView;
use libblur::{EdgeMode, FastBlurChannels, ThreadingPolicy};
use opencv::core::{
find_file, mean, split, Mat, MatTraitConst, MatTraitConstManual, Size, Vector, BORDER_DEFAULT,
};
use opencv::imgcodecs::{imread, IMREAD_COLOR};
use libblur::{EdgeMode, FastBlurChannels, GaussianPreciseLevel, ThreadingPolicy};
// use opencv::core::{
// find_file, mean, split, Mat, MatTraitConst, MatTraitConstManual, Size, Vector, BORDER_DEFAULT,
// };
// use opencv::imgcodecs::{imread, IMREAD_COLOR};

pub(crate) fn split_channels_3<T: Copy>(
image: &[T],
Expand Down Expand Up @@ -58,9 +58,32 @@ pub fn criterion_benchmark(c: &mut Criterion) {
FastBlurChannels::Channels4,
EdgeMode::KernelClip,
ThreadingPolicy::Adaptive,
GaussianPreciseLevel::EXACT,
);
})
});

c.bench_function("RGBA gauss blur kernel clip approx", |b| {
b.iter(|| {
let mut dst_bytes: Vec<u8> = Vec::with_capacity(dimensions.1 as usize * stride);
dst_bytes.resize(dimensions.1 as usize * stride, 0);
libblur::gaussian_blur(
&src_bytes,
stride as u32,
&mut dst_bytes,
stride as u32,
dimensions.0,
dimensions.1,
77 * 2 + 1,
(77f32 * 2f32 + 1f32) / 6f32,
FastBlurChannels::Channels4,
EdgeMode::KernelClip,
ThreadingPolicy::Adaptive,
GaussianPreciseLevel::INTEGRAL,
);
})
});

c.bench_function("RGBA gauss blur edge clamp", |b| {
b.iter(|| {
let mut dst_bytes: Vec<u8> = Vec::with_capacity(dimensions.1 as usize * stride);
Expand All @@ -77,33 +100,55 @@ pub fn criterion_benchmark(c: &mut Criterion) {
FastBlurChannels::Channels4,
EdgeMode::Clamp,
ThreadingPolicy::Adaptive,
GaussianPreciseLevel::EXACT,
);
})
});

let src = imread(
&find_file(&"assets/test_image_4.png", false, false).unwrap(),
IMREAD_COLOR,
)
.unwrap();
let mut planes = Vector::<Mat>::new();
// split(&src, &mut planes).unwrap();

c.bench_function("OpenCV RGBA Gaussian", |b| {
c.bench_function("RGBA gauss blur edge clamp approx", |b| {
b.iter(|| {
let mut dst = Mat::default();
opencv::imgproc::gaussian_blur(
&src,
&mut dst,
Size::new(77 * 2 + 1, 77 * 2 + 1),
(77f64 * 2f64 + 1f64) / 6f64,
(77f64 * 2f64 + 1f64) / 6f64,
BORDER_DEFAULT,
)
.unwrap();
let mut dst_bytes: Vec<u8> = Vec::with_capacity(dimensions.1 as usize * stride);
dst_bytes.resize(dimensions.1 as usize * stride, 0);
libblur::gaussian_blur(
&src_bytes,
stride as u32,
&mut dst_bytes,
stride as u32,
dimensions.0,
dimensions.1,
77 * 2 + 1,
(77f32 * 2f32 + 1f32) / 6f32,
FastBlurChannels::Channels4,
EdgeMode::Clamp,
ThreadingPolicy::Adaptive,
GaussianPreciseLevel::INTEGRAL,
);
})
});

// let src = imread(
// &find_file(&"assets/test_image_4.png", false, false).unwrap(),
// IMREAD_COLOR,
// )
// .unwrap();
// let mut planes = Vector::<Mat>::new();
// // split(&src, &mut planes).unwrap();
//
// c.bench_function("OpenCV RGBA Gaussian", |b| {
// b.iter(|| {
// let mut dst = Mat::default();
// opencv::imgproc::gaussian_blur(
// &src,
// &mut dst,
// Size::new(77 * 2 + 1, 77 * 2 + 1),
// (77f64 * 2f64 + 1f64) / 6f64,
// (77f64 * 2f64 + 1f64) / 6f64,
// BORDER_DEFAULT,
// )
// .unwrap();
// })
// });

{
let img = ImageReader::open("assets/test_image_1.jpg")
.unwrap()
Expand All @@ -129,30 +174,31 @@ pub fn criterion_benchmark(c: &mut Criterion) {
FastBlurChannels::Channels3,
EdgeMode::Clamp,
ThreadingPolicy::Adaptive,
GaussianPreciseLevel::EXACT,
);
})
});

let src = imread(
&find_file(&"assets/test_image_1.jpg", false, false).unwrap(),
IMREAD_COLOR,
)
.unwrap();

c.bench_function("OpenCV RGB Gaussian", |b| {
b.iter(|| {
let mut dst = Mat::default();
opencv::imgproc::gaussian_blur(
&src,
&mut dst,
Size::new(77 * 2 + 1, 77 * 2 + 1),
(77f64 * 2f64 + 1f64) / 6f64,
(77f64 * 2f64 + 1f64) / 6f64,
BORDER_DEFAULT,
)
.unwrap();
})
});
// let src = imread(
// &find_file(&"assets/test_image_1.jpg", false, false).unwrap(),
// IMREAD_COLOR,
// )
// .unwrap();
//
// c.bench_function("OpenCV RGB Gaussian", |b| {
// b.iter(|| {
// let mut dst = Mat::default();
// opencv::imgproc::gaussian_blur(
// &src,
// &mut dst,
// Size::new(77 * 2 + 1, 77 * 2 + 1),
// (77f64 * 2f64 + 1f64) / 6f64,
// (77f64 * 2f64 + 1f64) / 6f64,
// BORDER_DEFAULT,
// )
// .unwrap();
// })
// });
}
{
let img = ImageReader::open("assets/test_image_1.jpg")
Expand Down Expand Up @@ -193,33 +239,55 @@ pub fn criterion_benchmark(c: &mut Criterion) {
FastBlurChannels::Plane,
EdgeMode::Clamp,
ThreadingPolicy::Adaptive,
GaussianPreciseLevel::EXACT,
);
})
});

let src = imread(
&find_file(&"assets/test_image_1.jpg", false, false).unwrap(),
IMREAD_COLOR,
)
.unwrap();
let mut planes = Vector::<Mat>::new();
split(&src, &mut planes).unwrap();
let source_plane = planes.get(0).unwrap();

c.bench_function("OpenCV Plane Gaussian", |b| {
c.bench_function("Plane Gauss Blur Clamp Approx", |b| {
b.iter(|| {
let mut dst = Mat::default();
opencv::imgproc::gaussian_blur(
&source_plane,
&mut dst,
Size::new(77 * 2 + 1, 77 * 2 + 1),
(77f64 * 2f64 + 1f64) / 6f64,
(77f64 * 2f64 + 1f64) / 6f64,
BORDER_DEFAULT,
)
.unwrap();
let mut dst_plane_1 = vec![0u8; width * height];
let stride = width;
libblur::gaussian_blur(
&plane_1,
stride as u32,
&mut dst_plane_1,
stride as u32,
dimensions.0,
dimensions.1,
77 * 2 + 1,
(77f32 * 2f32 + 1f32) / 6f32,
FastBlurChannels::Plane,
EdgeMode::Clamp,
ThreadingPolicy::Adaptive,
GaussianPreciseLevel::INTEGRAL,
);
})
});

// let src = imread(
// &find_file(&"assets/test_image_1.jpg", false, false).unwrap(),
// IMREAD_COLOR,
// )
// .unwrap();
// let mut planes = Vector::<Mat>::new();
// split(&src, &mut planes).unwrap();
// let source_plane = planes.get(0).unwrap();
//
// c.bench_function("OpenCV Plane Gaussian", |b| {
// b.iter(|| {
// let mut dst = Mat::default();
// opencv::imgproc::gaussian_blur(
// &source_plane,
// &mut dst,
// Size::new(77 * 2 + 1, 77 * 2 + 1),
// (77f64 * 2f64 + 1f64) / 6f64,
// (77f64 * 2f64 + 1f64) / 6f64,
// BORDER_DEFAULT,
// )
// .unwrap();
// })
// });
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/lib/gaussian/avx/filter_vertical_f32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use crate::gaussian::avx::utils::_mm256_opt_fma_ps;
use crate::gaussian::gauss_sse::_mm_opt_fma_ps;
use crate::gaussian::sse::_mm_opt_fma_ps;

pub fn gaussian_blur_vertical_pass_filter_f32_avx<
T,
Expand Down
2 changes: 1 addition & 1 deletion src/lib/gaussian/gaussian.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ use crate::gaussian::avx::{
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::gaussian::neon::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::gaussian::gauss_sse::{
use crate::gaussian::sse::{
gaussian_blur_horizontal_pass_impl_sse, gaussian_blur_vertical_pass_impl_f32_sse,
gaussian_blur_vertical_pass_impl_sse, gaussian_horiz_one_chan_f32,
gaussian_horiz_sse_t_f_chan_f32, gaussian_sse_horiz_one_chan_u8,
Expand Down
84 changes: 79 additions & 5 deletions src/lib/gaussian/gaussian_approx_dispatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ use crate::gaussian::gaussian_kernel::get_gaussian_kernel_1d_integral;
use crate::gaussian::neon::{
gaussian_blur_horizontal_pass_approx_neon, gaussian_blur_vertical_approx_neon,
};
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::gaussian::neon::{
gaussian_blur_horizontal_pass_filter_approx_neon,
gaussian_blur_vertical_pass_filter_approx_neon, gaussian_horiz_one_approx_u8,
gaussian_horiz_one_chan_filter_approx,
};
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::gaussian::sse::{
gaussian_blur_horizontal_pass_approx_sse, gaussian_blur_horizontal_pass_filter_approx_sse,
gaussian_blur_vertical_pass_approx_sse, gaussian_blur_vertical_pass_filter_approx_sse,
};
use crate::unsafe_slice::UnsafeSlice;
use crate::{EdgeMode, ThreadingPolicy};
use rayon::ThreadPool;
Expand Down Expand Up @@ -66,10 +77,29 @@ fn gaussian_blur_horizontal_pass<const CHANNEL_CONFIGURATION: usize, const EDGE_
start_y: u32,
end_y: u32,
) = gaussian_blur_horizontal_pass_impl_approx::<CHANNEL_CONFIGURATION, EDGE_MODE>;
let edge_mode: EdgeMode = EDGE_MODE.into();
let _edge_mode: EdgeMode = EDGE_MODE.into();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
if (CHANNEL_CONFIGURATION == 3 || CHANNEL_CONFIGURATION == 4) && edge_mode == EdgeMode::Clamp {
_dispatcher = gaussian_blur_horizontal_pass_approx_neon::<CHANNEL_CONFIGURATION>;
{
if (CHANNEL_CONFIGURATION == 3 || CHANNEL_CONFIGURATION == 4)
&& _edge_mode == EdgeMode::Clamp
{
_dispatcher = gaussian_blur_horizontal_pass_approx_neon::<CHANNEL_CONFIGURATION>;
} else if CHANNEL_CONFIGURATION == 1 && _edge_mode == EdgeMode::Clamp {
_dispatcher = gaussian_horiz_one_approx_u8;
}
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let _is_sse_available = std::arch::is_x86_feature_detected!("sse4.1");

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if _is_sse_available
&& _edge_mode == EdgeMode::Clamp
&& (CHANNEL_CONFIGURATION == 3 || CHANNEL_CONFIGURATION == 4)
{
_dispatcher = gaussian_blur_horizontal_pass_approx_sse::<CHANNEL_CONFIGURATION>;
}
}
let unsafe_dst = UnsafeSlice::new(dst);
if let Some(thread_pool) = thread_pool {
Expand Down Expand Up @@ -136,13 +166,23 @@ fn gaussian_blur_vertical_pass<const CHANNEL_CONFIGURATION: usize, const EDGE_MO
start_y: u32,
end_y: u32,
) = gaussian_blur_vertical_pass_c_approx::<CHANNEL_CONFIGURATION, EDGE_MODE>;
let edge_mode: EdgeMode = EDGE_MODE.into();
let _edge_mode: EdgeMode = EDGE_MODE.into();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
if edge_mode == EdgeMode::Clamp {
if _edge_mode == EdgeMode::Clamp {
_dispatcher = gaussian_blur_vertical_approx_neon::<CHANNEL_CONFIGURATION>;
}
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let _is_sse_available = std::arch::is_x86_feature_detected!("sse4.1");

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if _is_sse_available && _edge_mode == EdgeMode::Clamp {
_dispatcher = gaussian_blur_vertical_pass_approx_sse::<CHANNEL_CONFIGURATION>;
}
}
let unsafe_dst = UnsafeSlice::new(dst);
if let Some(thread_pool) = thread_pool {
thread_pool.scope(|scope| {
Expand Down Expand Up @@ -211,6 +251,21 @@ pub(crate) fn gaussian_blur_vertical_pass_approx_clip_dispatch<
start_y: u32,
end_y: u32,
) = gaussian_blur_vertical_pass_clip_edge_approx::<CHANNEL_CONFIGURATION>;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
_dispatcher = gaussian_blur_vertical_pass_filter_approx_neon::<CHANNEL_CONFIGURATION>;
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let _is_sse_available = std::arch::is_x86_feature_detected!("sse4.1");

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if _is_sse_available {
_dispatcher = gaussian_blur_vertical_pass_filter_approx_sse::<CHANNEL_CONFIGURATION>;
}
}

let unsafe_dst = UnsafeSlice::new(dst);
if let Some(thread_pool) = thread_pool {
thread_pool.scope(|scope| {
Expand Down Expand Up @@ -274,6 +329,25 @@ fn gaussian_blur_horizontal_pass_clip_approx_dispatch<const CHANNEL_CONFIGURATIO
start_y: u32,
end_y: u32,
) = gaussian_blur_horizontal_pass_impl_clip_edge_approx::<CHANNEL_CONFIGURATION>;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
if CHANNEL_CONFIGURATION == 3 || CHANNEL_CONFIGURATION == 4 {
_dispatcher = gaussian_blur_horizontal_pass_filter_approx_neon::<CHANNEL_CONFIGURATION>;
} else if CHANNEL_CONFIGURATION == 1 {
_dispatcher = gaussian_horiz_one_chan_filter_approx;
}
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let _is_sse_available = std::arch::is_x86_feature_detected!("sse4.1");

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if _is_sse_available && (CHANNEL_CONFIGURATION == 3 || CHANNEL_CONFIGURATION == 4) {
_dispatcher = gaussian_blur_horizontal_pass_filter_approx_sse::<CHANNEL_CONFIGURATION>;
}
}

let unsafe_dst = UnsafeSlice::new(dst);
if let Some(thread_pool) = thread_pool {
thread_pool.scope(|scope| {
Expand Down
Loading

0 comments on commit 8828d60

Please sign in to comment.