Skip to content

Commit

Permalink
Use rayon to speed up BC7 compression
Browse files Browse the repository at this point in the history
  • Loading branch information
hasenbanck committed Jan 7, 2025
1 parent 3558e9b commit 04d0eab
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 12 deletions.
6 changes: 4 additions & 2 deletions korangar/src/loaders/async/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,11 @@ impl AsyncLoader {
sprite_loader: Arc<SpriteLoader>,
texture_loader: Arc<TextureLoader>,
) -> Self {
let num_cpu = rayon::current_num_threads();

let thread_pool = ThreadPoolBuilder::new()
.num_threads(2)
.thread_name(|_| "async loader".to_string())
.num_threads(num_cpu)
.thread_name(|number| format!("async loader {number}"))
.build()
.unwrap();

Expand Down
41 changes: 31 additions & 10 deletions korangar/src/loaders/texture/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use korangar_util::color::contains_transparent_pixel;
use korangar_util::container::SimpleCache;
use korangar_util::texture_atlas::{AllocationId, AtlasAllocation, OfflineTextureAtlas};
use korangar_util::FileLoader;
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
use rayon::prelude::ParallelSliceMut;
use wgpu::{
CommandEncoderDescriptor, Device, Extent3d, Queue, TextureAspect, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages,
TextureViewDescriptor, TextureViewDimension,
Expand Down Expand Up @@ -246,16 +248,35 @@ impl TextureLoader {
}
TextureCompression::BC7 => {
let mip_size = intel_tex_2::bc7::calc_output_size(width, height);
intel_tex_2::bc7::compress_blocks_into(
&intel_tex_2::bc7::alpha_ultra_fast_settings(),
&RgbaSurface {
data: current_image.as_raw(),
width,
height,
stride: width * 4,
},
&mut final_buffer[offset..offset + mip_size],
);
let settings = intel_tex_2::bc7::alpha_ultra_fast_settings();

let strip_height = 16;
let bytes_per_row = width as usize * 4;
let blocks_per_row = width as usize / 4;
let strip_output_size = blocks_per_row * strip_height / 4 * 16;

final_buffer[offset..offset + mip_size]
.par_chunks_mut(strip_output_size)
.enumerate()
.for_each(|(strip_idx, output_chunk)| {
let strip_y = strip_idx * strip_height;
let strip_height = (height as usize - strip_y).min(strip_height);
let strip_height = strip_height - (strip_height % 4);

let src_offset = strip_y * bytes_per_row;

intel_tex_2::bc7::compress_blocks_into(
&settings,
&RgbaSurface {
data: &current_image.as_raw()[src_offset..],
width,
height: strip_height as u32,
stride: (width as usize * 4) as u32,
},
output_chunk,
);
});

offset += mip_size;
}
}
Expand Down

0 comments on commit 04d0eab

Please sign in to comment.