diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 00000000..dfdd35cc --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,7 @@ +# Steam Hardware survey: https://store.steampowered.com/hwsurvey/Steam-Hardware-Software-Survey-Welcome-to-Steam +[target.'cfg(target_arch="x86_64")'] +rustflags = ["-C", "target-feature=+aes,+avx,+avx2,+cmpxchg16b,+fma,+sse3,+ssse3,+sse4.1,+sse4.2"] + +# On linux nighly Rust uses rust-lld, which runs into problems with linking C++ code. +[target.x86_64-unknown-linux-gnu] +rustflags = ["-Zlinker-features=-lld"] diff --git a/Cargo.lock b/Cargo.lock index 68ae0ba1..793d3d72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -818,6 +818,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd2e7510819d6fbf51a5545c8f922716ecfb14df168a3242f7d33e0239efe6a1" +[[package]] +name = "fast_image_resize" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7890c03bc18abd95e18d972f6f8f19ecc792cc01f208f060b3764a672de9e2" +dependencies = [ + "bytemuck", + "cfg-if", + "document-features", + "image", + "num-traits", + "rayon", + "thiserror", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -1624,6 +1639,7 @@ dependencies = [ "cosmic-text", "derive-new", "encoding_rs", + "fast_image_resize", "flate2", "glidesort", "hashbrown", diff --git a/Cargo.toml b/Cargo.toml index 53ca2010..85e9e613 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ cpal = "0.15" derive-new = "0.7" encoding_rs = "0.8" etherparse = "0.16" +fast_image_resize = "5.1" fast-srgb8 = "1" flate2 = { version = "1", default-features = false } glidesort = "0.1" diff --git a/korangar/Cargo.toml b/korangar/Cargo.toml index 3f7cb3b0..e5bb97b3 100644 --- a/korangar/Cargo.toml +++ b/korangar/Cargo.toml @@ -12,6 +12,7 @@ chrono = { workspace = true } cosmic-text = { workspace = true, features = ["std", "fontconfig"] } derive-new = { workspace = true } encoding_rs = { workspace = true } +fast_image_resize = { workspace = true, features = ["image", "rayon"] } flate2 = { workspace = true, features = ["zlib-rs"] } hashbrown = { workspace = true } glidesort = { workspace = true } diff --git a/korangar/src/graphics/mod.rs b/korangar/src/graphics/mod.rs index c2ac065c..c4521653 100644 --- a/korangar/src/graphics/mod.rs +++ b/korangar/src/graphics/mod.rs @@ -37,7 +37,6 @@ pub use self::engine::{GraphicsEngine, GraphicsEngineDescriptor}; pub use self::error::error_handler; pub use self::frame_pacer::*; pub use self::instruction::*; -pub use self::passes::{Lanczos3Drawer, MipMapRenderPassContext}; pub use self::picker_target::PickerTarget; pub use self::projection::*; pub use self::settings::*; diff --git a/korangar/src/graphics/passes/mipmap/lanczos3.rs b/korangar/src/graphics/passes/mipmap/lanczos3.rs deleted file mode 100644 index 4c44f3e2..00000000 --- a/korangar/src/graphics/passes/mipmap/lanczos3.rs +++ /dev/null @@ -1,112 +0,0 @@ -use wgpu::{ - include_wgsl, ColorTargetState, ColorWrites, Device, FragmentState, MultisampleState, PipelineCompilationOptions, - PipelineLayoutDescriptor, PrimitiveState, RenderPass, RenderPipeline, RenderPipelineDescriptor, ShaderModuleDescriptor, TextureFormat, - VertexState, -}; - -use crate::graphics::passes::mipmap::MipMapRenderPassContext; - -const SHADER: ShaderModuleDescriptor = include_wgsl!("shader/lanczos3.wgsl"); -const DRAWER_NAME: &str = "lanczos3"; - -pub struct Lanczos3Drawer { - pipeline: RenderPipeline, -} - -impl Lanczos3Drawer { - pub fn new(device: &Device) -> Self { - let shader_module = device.create_shader_module(SHADER); - - let pass_bind_group_layouts = MipMapRenderPassContext::bind_group_layout(device); - - let pipeline_layout = device.create_pipeline_layout(&PipelineLayoutDescriptor { - label: Some(DRAWER_NAME), - bind_group_layouts: &[pass_bind_group_layouts[0]], - push_constant_ranges: &[], - }); - - let pipeline = device.create_render_pipeline(&RenderPipelineDescriptor { - label: Some(DRAWER_NAME), - layout: Some(&pipeline_layout), - vertex: VertexState { - module: &shader_module, - entry_point: Some("vs_main"), - compilation_options: PipelineCompilationOptions::default(), - buffers: &[], - }, - primitive: PrimitiveState::default(), - depth_stencil: None, - multisample: MultisampleState::default(), - fragment: Some(FragmentState { - module: &shader_module, - entry_point: Some("fs_main"), - compilation_options: PipelineCompilationOptions::default(), - targets: &[Some(ColorTargetState { - format: TextureFormat::Rgba8UnormSrgb, - blend: None, - write_mask: ColorWrites::default(), - })], - }), - multiview: None, - cache: None, - }); - - Self { pipeline } - } - - pub fn draw(&self, pass: &mut RenderPass<'_>) { - pass.set_pipeline(&self.pipeline); - pass.draw(0..3, 0..1); - } -} - -#[cfg(test)] -mod test { - use std::f64::consts::PI; - - fn lanczos3(x: f64) -> f64 { - match x { - 0.0 => 1.0, - _ if x.abs() >= 3.0 => 0.0, - _ => (3.0 * (PI * x).sin() * (PI * x / 3.0).sin()) / (PI * PI * x * x), - } - } - - fn generate_lanczos3_kernel() -> Vec { - let size = 6; - let mut kernel = Vec::with_capacity(size * size); - let mut sum = 0.0; - - // Generate kernel values. - for y in 0..size { - for x in 0..size { - // Center is between pixels 2 and 3 (since the range is 0..=5). - let dx = (x as f64) - 2.5; - let dy = (y as f64) - 2.5; - let value = lanczos3(dx) * lanczos3(dy); - kernel.push(value); - sum += value; - } - } - - // Normalize the kernel. - for value in kernel.iter_mut() { - *value /= sum; - } - - kernel - } - - /// This test function can be used to create the lanczos3 kernel parameters. - #[test] - fn test_generate_lanczos3_kernel() { - let kernel = generate_lanczos3_kernel(); - - println!("const KERNEL: array = array("); - for chunk in kernel.chunks(6) { - let line = chunk.iter().map(|x| format!("{:.8}", x)).collect::>().join(", "); - println!(" {line},"); - } - println!(");"); - } -} diff --git a/korangar/src/graphics/passes/mipmap/mod.rs b/korangar/src/graphics/passes/mipmap/mod.rs deleted file mode 100644 index fa87c988..00000000 --- a/korangar/src/graphics/passes/mipmap/mod.rs +++ /dev/null @@ -1,85 +0,0 @@ -mod lanczos3; - -use std::sync::OnceLock; - -pub use lanczos3::Lanczos3Drawer; -use wgpu::{ - BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayout, BindGroupLayoutDescriptor, BindGroupLayoutEntry, BindingResource, - BindingType, Color, CommandEncoder, Device, LoadOp, Operations, RenderPass, RenderPassColorAttachment, RenderPassDescriptor, - ShaderStages, StoreOp, TextureSampleType, TextureView, TextureViewDimension, -}; - -const PASS_NAME: &str = "mip map render pass"; - -#[derive(Default)] -pub struct MipMapRenderPassContext {} - -impl MipMapRenderPassContext { - pub fn create_pass<'encoder>( - &self, - device: &Device, - encoder: &'encoder mut CommandEncoder, - source_texture: &TextureView, - destination_texture_view: &TextureView, - ) -> RenderPass<'encoder> { - let bind_group = Self::create_bind_group(device, source_texture); - - let mut pass = encoder.begin_render_pass(&RenderPassDescriptor { - label: Some(PASS_NAME), - color_attachments: &[Some(RenderPassColorAttachment { - view: &destination_texture_view, - resolve_target: None, - ops: Operations { - load: LoadOp::Clear(Color { - r: 1.0, - g: 0.0, - b: 1.0, - a: 1.0, - }), - store: StoreOp::Store, - }, - })], - depth_stencil_attachment: None, - timestamp_writes: None, - occlusion_query_set: None, - }); - - pass.set_bind_group(0, &bind_group, &[]); - - pass - } - - pub fn bind_group_layout(device: &Device) -> [&'static BindGroupLayout; 1] { - [Self::create_bind_group_layout(device)] - } - - fn create_bind_group(device: &Device, source_texture_view: &TextureView) -> BindGroup { - device.create_bind_group(&BindGroupDescriptor { - label: Some(PASS_NAME), - layout: Self::create_bind_group_layout(device), - entries: &[BindGroupEntry { - binding: 0, - resource: BindingResource::TextureView(source_texture_view), - }], - }) - } - - fn create_bind_group_layout(device: &Device) -> &'static BindGroupLayout { - static LAYOUT: OnceLock = OnceLock::new(); - LAYOUT.get_or_init(|| { - device.create_bind_group_layout(&BindGroupLayoutDescriptor { - label: Some(PASS_NAME), - entries: &[BindGroupLayoutEntry { - binding: 0, - visibility: ShaderStages::FRAGMENT, - ty: BindingType::Texture { - sample_type: TextureSampleType::Float { filterable: true }, - view_dimension: TextureViewDimension::D2, - multisampled: false, - }, - count: None, - }], - }) - }) - } -} diff --git a/korangar/src/graphics/passes/mipmap/shader/lanczos3.wgsl b/korangar/src/graphics/passes/mipmap/shader/lanczos3.wgsl deleted file mode 100644 index 6bf0c70c..00000000 --- a/korangar/src/graphics/passes/mipmap/shader/lanczos3.wgsl +++ /dev/null @@ -1,51 +0,0 @@ -struct VertexOutput { - @builtin(position) position: vec4, - @location(0) texture_coordinate: vec2, -} - -@group(0) @binding(0) var source_texture: texture_2d; - -@vertex -fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { - // Full screen triangle. - let uv = vec2(f32((vertex_index << 1u) & 2u), f32(vertex_index & 2u)); - var output: VertexOutput; - output.position = vec4(uv * vec2(2.0, -2.0) + vec2(-1.0, 1.0), 0.0, 1.0); - output.texture_coordinate = uv; - return output; -} - -// A 6x6 Lanczos 3 kernel. -const KERNEL: array = array( - 0.00059812, -0.00332290, 0.01495304, 0.01495304, -0.00332290, 0.00059812, - -0.00332290, 0.01846054, -0.08307242, -0.08307242, 0.01846054, -0.00332290, - 0.01495304, -0.08307242, 0.37382591, 0.37382591, -0.08307242, 0.01495304, - 0.01495304, -0.08307242, 0.37382591, 0.37382591, -0.08307242, 0.01495304, - -0.00332290, 0.01846054, -0.08307242, -0.08307242, 0.01846054, -0.00332290, - 0.00059812, -0.00332290, 0.01495304, 0.01495304, -0.00332290, 0.00059812, -); - -const KERNEL_SIZE: u32 = 6; -const BORDER_SIZE: i32 = 3; - -@fragment -fn fs_main(input: VertexOutput) -> @location(0) vec4 { - let texture_dimensions = textureDimensions(source_texture); - let pixel_coords = vec2(input.texture_coordinate * vec2(texture_dimensions)); - - var color = vec4(0.0); - - for(var ky = 0u; ky < KERNEL_SIZE; ky++) { - for(var kx = 0u; kx < KERNEL_SIZE; kx++) { - let sample_position = vec2( - pixel_coords.x + i32(kx) - BORDER_SIZE, - pixel_coords.y + i32(ky) - BORDER_SIZE - ); - let clamped_position = clamp(sample_position, vec2(0), vec2(texture_dimensions) - 1); - let source_color = textureLoad(source_texture, clamped_position, 0); - color += source_color * KERNEL[ky * 6u + kx]; - } - } - - return color; -} diff --git a/korangar/src/graphics/passes/mod.rs b/korangar/src/graphics/passes/mod.rs index 1c93d4f4..4ecf3431 100644 --- a/korangar/src/graphics/passes/mod.rs +++ b/korangar/src/graphics/passes/mod.rs @@ -2,7 +2,7 @@ mod directional_shadow; mod forward; mod interface; mod light_culling; -mod mipmap; + mod picker; mod point_shadow; mod postprocessing; @@ -16,7 +16,6 @@ pub(crate) use directional_shadow::*; pub(crate) use forward::*; pub(crate) use interface::*; pub(crate) use light_culling::*; -pub use mipmap::*; pub(crate) use picker::*; pub(crate) use point_shadow::*; pub(crate) use postprocessing::*; diff --git a/korangar/src/graphics/settings.rs b/korangar/src/graphics/settings.rs index 44a4219f..06cd1b45 100644 --- a/korangar/src/graphics/settings.rs +++ b/korangar/src/graphics/settings.rs @@ -168,6 +168,12 @@ pub enum TextureCompression { BC7, } +impl TextureCompression { + pub fn is_uncompressed(&self) -> bool { + *self == TextureCompression::Off + } +} + impl From for TextureFormat { fn from(value: TextureCompression) -> Self { match value { diff --git a/korangar/src/loaders/async/mod.rs b/korangar/src/loaders/async/mod.rs index 939c226e..0dfde69c 100644 --- a/korangar/src/loaders/async/mod.rs +++ b/korangar/src/loaders/async/mod.rs @@ -1,4 +1,5 @@ use std::cmp::PartialEq; +use std::num::NonZero; use std::sync::{Arc, Mutex}; use hashbrown::HashMap; @@ -55,6 +56,12 @@ impl PartialEq for LoadStatus { } } +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] +enum TaskType { + Light, + Heavy, +} + pub struct AsyncLoader { action_loader: Arc, animation_loader: Arc, @@ -63,7 +70,8 @@ pub struct AsyncLoader { sprite_loader: Arc, texture_loader: Arc, pending_loads: Arc>>, - thread_pool: ThreadPool, + light_task_thread_pool: ThreadPool, + heavy_task_thread_pool: ThreadPool, } impl AsyncLoader { @@ -75,9 +83,17 @@ impl AsyncLoader { sprite_loader: Arc, texture_loader: Arc, ) -> Self { - let thread_pool = ThreadPoolBuilder::new() - .num_threads(2) - .thread_name(|_| "async loader".to_string()) + let parallelism = std::thread::available_parallelism().unwrap_or_else(|_| NonZero::new(2).unwrap()); + + let light_task_thread_pool = ThreadPoolBuilder::new() + .num_threads(1) + .thread_name(|number| format!("light task thread pool {number}")) + .build() + .unwrap(); + + let heavy_task_thread_pool = ThreadPoolBuilder::new() + .num_threads(parallelism.get()) + .thread_name(|number| format!("heavy task thread pool {number}")) .build() .unwrap(); @@ -89,7 +105,8 @@ impl AsyncLoader { sprite_loader, texture_loader, pending_loads: Arc::new(Mutex::new(HashMap::new())), - thread_pool, + light_task_thread_pool, + heavy_task_thread_pool, } } @@ -107,7 +124,7 @@ impl AsyncLoader { let action_loader = self.action_loader.clone(); let animation_loader = self.animation_loader.clone(); - self.request_load(LoaderId::AnimationData(entity_id), move || { + self.request_load(TaskType::Light, LoaderId::AnimationData(entity_id), move || { #[cfg(feature = "debug")] let _load_measurement = Profiler::start_measurement("animation data load"); @@ -137,7 +154,7 @@ impl AsyncLoader { let texture_loader = self.texture_loader.clone(); let path = path.to_string(); - self.request_load(LoaderId::ItemSprite(item_id), move || { + self.request_load(TaskType::Light, LoaderId::ItemSprite(item_id), move || { #[cfg(feature = "debug")] let _load_measurement = Profiler::start_measurement("item sprite load"); @@ -167,7 +184,7 @@ impl AsyncLoader { let model_loader = self.model_loader.clone(); let texture_loader = self.texture_loader.clone(); - self.request_load(LoaderId::Map(map_name.clone()), move || { + self.request_load(TaskType::Heavy, LoaderId::Map(map_name.clone()), move || { #[cfg(feature = "debug")] let _load_measurement = Profiler::start_measurement("map load"); @@ -183,7 +200,7 @@ impl AsyncLoader { }); } - fn request_load(&self, id: LoaderId, load_function: F) + fn request_load(&self, task_type: TaskType, id: LoaderId, load_function: F) where F: FnOnce() -> Result + Send + 'static, { @@ -191,7 +208,12 @@ impl AsyncLoader { pending_loads.lock().unwrap().insert(id.clone(), LoadStatus::Loading); - self.thread_pool.spawn(move || { + let thread_pool = match task_type { + TaskType::Light => &self.light_task_thread_pool, + TaskType::Heavy => &self.heavy_task_thread_pool, + }; + + thread_pool.spawn(move || { #[cfg(feature = "debug")] let _measurement = threads::Loader::start_frame(); diff --git a/korangar/src/loaders/texture/mod.rs b/korangar/src/loaders/texture/mod.rs index 233f435e..4c70d318 100644 --- a/korangar/src/loaders/texture/mod.rs +++ b/korangar/src/loaders/texture/mod.rs @@ -2,8 +2,10 @@ use std::io::Cursor; use std::num::{NonZeroU32, NonZeroUsize}; use std::sync::{Arc, Mutex}; +use fast_image_resize::images::Image; +use fast_image_resize::{FilterType, PixelType, ResizeAlg, ResizeOptions, Resizer, SrcCropping}; use hashbrown::HashMap; -use image::{imageops, GrayImage, ImageBuffer, ImageFormat, ImageReader, Rgba, RgbaImage}; +use image::{DynamicImage, GrayImage, ImageBuffer, ImageFormat, ImageReader, Rgba, RgbaImage}; use intel_tex_2::RgbaSurface; #[cfg(feature = "debug")] use korangar_debug::logging::{print_debug, Colorize, Timer}; @@ -11,14 +13,13 @@ use korangar_util::color::contains_transparent_pixel; use korangar_util::container::SimpleCache; use korangar_util::texture_atlas::{AllocationId, AtlasAllocation, OfflineTextureAtlas}; use korangar_util::FileLoader; -use wgpu::{ - CommandEncoderDescriptor, Device, Extent3d, Queue, TextureAspect, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages, - TextureViewDescriptor, TextureViewDimension, -}; +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +use rayon::prelude::ParallelSliceMut; +use wgpu::{Device, Extent3d, Queue, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages}; use super::error::LoadError; use super::{FALLBACK_BMP_FILE, FALLBACK_JPEG_FILE, FALLBACK_PNG_FILE, FALLBACK_TGA_FILE, MIP_LEVELS}; -use crate::graphics::{Lanczos3Drawer, MipMapRenderPassContext, Texture, TextureCompression}; +use crate::graphics::{Texture, TextureCompression}; use crate::loaders::GameFileLoader; const MAX_CACHE_COUNT: u32 = 512; @@ -35,21 +36,15 @@ pub struct TextureLoader { device: Arc, queue: Arc, game_file_loader: Arc, - mip_map_render_context: MipMapRenderPassContext, - lanczos3_drawer: Lanczos3Drawer, cache: Mutex>>, } impl TextureLoader { pub fn new(device: Arc, queue: Arc, game_file_loader: Arc) -> Self { - let lanczos3_drawer = Lanczos3Drawer::new(&device); - Self { device, queue, game_file_loader, - mip_map_render_context: MipMapRenderPassContext::default(), - lanczos3_drawer, cache: Mutex::new(SimpleCache::new( NonZeroU32::new(MAX_CACHE_COUNT).unwrap(), NonZeroUsize::new(MAX_CACHE_SIZE).unwrap(), @@ -134,149 +129,132 @@ impl TextureLoader { transparent: bool, image: RgbaImage, ) -> Arc { + const BC_BLOCK_SIZE: usize = 16; + const BC_STRIP_HEIGHT: usize = 16; + + #[cfg(feature = "debug")] + let timer = Timer::new_dynamic(format!("pre-process texture for {}", name.magenta())); + let mut width = image.width(); let mut height = image.height(); - match texture_compression { - TextureCompression::Off => { - let texture = Texture::new_with_data( - &self.device, - &self.queue, - &TextureDescriptor { - label: Some(name), - size: Extent3d { - width: image.width(), - height: image.height(), - depth_or_array_layers: 1, - }, - mip_level_count: MIP_LEVELS, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::Rgba8UnormSrgb, - usage: TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING | TextureUsages::RENDER_ATTACHMENT, - view_formats: &[], - }, - image.as_raw(), - transparent, - ); - - if mips_level > 1 { - let mut mip_views = Vec::with_capacity(mips_level as usize); - - for level in 0..mips_level { - let view = texture.get_texture().create_view(&TextureViewDescriptor { - label: Some(&format!("mip map level {level}")), - format: None, - dimension: Some(TextureViewDimension::D2), - aspect: TextureAspect::All, - base_mip_level: level, - mip_level_count: Some(1), - base_array_layer: 0, - array_layer_count: Some(1), - }); - mip_views.push(view); - } + let base_width = width; + let base_height = height; - let mut encoder = self.device.create_command_encoder(&CommandEncoderDescriptor { - label: Some("TextureLoader"), - }); + assert_eq!(width % 4, 0, "Texture width must be aligned to 4 pixels"); + assert_eq!(height % 4, 0, "Texture height must be aligned to 4 pixels"); - for index in 0..(mips_level - 1) as usize { - let mut pass = - self.mip_map_render_context - .create_pass(&self.device, &mut encoder, &mip_views[index], &mip_views[index + 1]); + let mut total_size = 0; + let mut mip_width = width as usize; + let mut mip_height = height as usize; - self.lanczos3_drawer.draw(&mut pass); - } + for _ in 0..mips_level { + let mip_size = if texture_compression.is_uncompressed() { + mip_width * mip_height * 4 + } else { + (mip_width / 4) * (mip_height / 4) * 16 + }; - self.queue.submit(Some(encoder.finish())); - } + total_size += mip_size; - Arc::new(texture) - } - TextureCompression::BC3 | TextureCompression::BC7 => { - #[cfg(feature = "debug")] - let timer = Timer::new_dynamic(format!("compress texture for {}", name.magenta())); + mip_width = (mip_width / 2).max(4); + mip_height = (mip_height / 2).max(4); + } + + let mut final_buffer = vec![0u8; total_size]; + let mut current_image = image; + let mut offset = 0; - let mut total_size = 0; - let atlas_width = width; - let atlas_height = height; + let mut resizer = Resizer::new(); + let resize_options = ResizeOptions { + algorithm: ResizeAlg::Convolution(FilterType::Lanczos3), + cropping: SrcCropping::None, + mul_div_alpha: true, + }; - assert_eq!(width % 4, 0, "Texture width must be aligned to 4 pixels"); - assert_eq!(height % 4, 0, "Texture height must be aligned to 4 pixels"); + let bc7_settings = intel_tex_2::bc7::alpha_ultra_fast_settings(); - for _ in 0..mips_level { - // Compressed blocks are 16 bytes each, covering 4x4 pixels - let mip_size = (width / 4) * (height / 4) * 16; - total_size += mip_size as usize; - width = (width / 2).max(4); - height = (height / 2).max(4); - } + for level in 0..mips_level { + if level > 0 { + width = (width / 2).max(4); + height = (height / 2).max(4); - let mut final_buffer = vec![0u8; total_size]; - let mut current_image = image; - let mut width = current_image.width(); - let mut height = current_image.height(); - let mut offset = 0; + let mut dst_image = Image::new(width, height, PixelType::U8x4); + resizer + .resize(&DynamicImage::ImageRgba8(current_image), &mut dst_image, &resize_options) + .unwrap(); - for level in 0..mips_level { - if level > 0 { - width = (width / 2).max(4); - height = (height / 2).max(4); - current_image = imageops::resize(¤t_image, width, height, imageops::FilterType::Lanczos3); - } + current_image = RgbaImage::from_raw(width, height, dst_image.into_vec()).unwrap(); + } + match texture_compression.is_uncompressed() { + true => { + let mip_size = width as usize * height as usize * 4; + final_buffer[offset..offset + mip_size].copy_from_slice(current_image.as_raw()); + offset += mip_size; + } + false => { assert_eq!(width % 4, 0, "Mipmap width must be aligned to 4 pixels"); assert_eq!(height % 4, 0, "Mipmap height must be aligned to 4 pixels"); - match texture_compression { - TextureCompression::Off => unreachable!(), - TextureCompression::BC3 => { - let mip_size = intel_tex_2::bc3::calc_output_size(width, height); - intel_tex_2::bc3::compress_blocks_into( - &RgbaSurface { - data: current_image.as_raw(), - width, - height, - stride: width * 4, - }, - &mut final_buffer[offset..offset + mip_size], - ); - offset += mip_size; - } - TextureCompression::BC7 => { - let mip_size = intel_tex_2::bc7::calc_output_size(width, height); - intel_tex_2::bc7::compress_blocks_into( - &intel_tex_2::bc7::alpha_ultra_fast_settings(), - &RgbaSurface { - data: current_image.as_raw(), - width, - height, - stride: width * 4, - }, - &mut final_buffer[offset..offset + mip_size], - ); - offset += mip_size; - } - } + let surface = RgbaSurface { + data: current_image.as_raw(), + width, + height, + stride: width * 4, + }; + + let bytes_per_row = width as usize * 4; + let blocks_per_row = width as usize / 4; + let strip_output_size = blocks_per_row * BC_STRIP_HEIGHT / 4 * BC_BLOCK_SIZE; + let mip_size = (width / 4) as usize * (height / 4) as usize * BC_BLOCK_SIZE; + + final_buffer[offset..offset + mip_size] + .par_chunks_mut(strip_output_size) + .enumerate() + .for_each(|(strip_idx, output_chunk)| { + let strip_y = strip_idx * BC_STRIP_HEIGHT; + let strip_height = (height as usize - strip_y).min(BC_STRIP_HEIGHT); + let strip_height = strip_height - (strip_height % 4); + + let src_offset = strip_y * bytes_per_row; + let strip_surface = RgbaSurface { + data: &surface.data[src_offset..], + width, + height: strip_height as u32, + stride: surface.stride, + }; + + match texture_compression { + TextureCompression::BC3 => { + intel_tex_2::bc3::compress_blocks_into(&strip_surface, output_chunk); + } + TextureCompression::BC7 => { + intel_tex_2::bc7::compress_blocks_into(&bc7_settings, &strip_surface, output_chunk); + } + _ => unreachable!(), + } + }); + + offset += mip_size; } + } + } - let texture = self.create_raw( - name, - atlas_width, - atlas_height, - mips_level, - texture_compression.into(), - transparent, - &final_buffer, - ); + let texture = self.create_raw( + name, + base_width, + base_height, + mips_level, + texture_compression.into(), + transparent, + &final_buffer, + ); - #[cfg(feature = "debug")] - timer.stop(); + #[cfg(feature = "debug")] + timer.stop(); - texture - } - } + texture } pub fn load(&self, path: &str, image_type: ImageType) -> Result, LoadError> {