From 885c7cc033fff987b83108959e089249f024ff48 Mon Sep 17 00:00:00 2001 From: Alex Gemberg Date: Tue, 30 Dec 2025 11:05:45 +1300 Subject: [PATCH 1/2] perf: eliminate overdraw for opaque image fills --- Cargo.lock | 1 + sparse_strips/vello_bench/Cargo.toml | 1 + sparse_strips/vello_bench/benches/main.rs | 6 +- sparse_strips/vello_bench/src/lib.rs | 1 + sparse_strips/vello_bench/src/scene.rs | 97 +++++++++ sparse_strips/vello_common/src/coarse.rs | 202 ++++++++++++++---- sparse_strips/vello_cpu/src/dispatch/mod.rs | 12 +- .../vello_cpu/src/dispatch/multi_threaded.rs | 24 ++- .../vello_cpu/src/dispatch/single_threaded.rs | 34 ++- sparse_strips/vello_cpu/src/render.rs | 17 +- sparse_strips/vello_hybrid/src/scene.rs | 19 +- sparse_strips/vello_toy/src/debug.rs | 1 + 12 files changed, 352 insertions(+), 63 deletions(-) create mode 100644 sparse_strips/vello_bench/src/scene.rs diff --git a/Cargo.lock b/Cargo.lock index f3e5bd346..9565b1c61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3944,6 +3944,7 @@ name = "vello_bench" version = "0.0.0" dependencies = [ "criterion", + "image", "parley", "rand", "smallvec", diff --git a/sparse_strips/vello_bench/Cargo.toml b/sparse_strips/vello_bench/Cargo.toml index d35ebe213..941ca0cae 100644 --- a/sparse_strips/vello_bench/Cargo.toml +++ b/sparse_strips/vello_bench/Cargo.toml @@ -14,6 +14,7 @@ vello_common = { workspace = true } vello_cpu = { workspace = true } vello_dev_macros = { workspace = true } criterion = { workspace = true } +image = { workspace = true, features = ["jpeg"] } parley = { version = "0.5.0", default-features = true } rand = { workspace = true } smallvec = { workspace = true } diff --git a/sparse_strips/vello_bench/benches/main.rs b/sparse_strips/vello_bench/benches/main.rs index daf4b9dfa..bc2b6b7e0 100644 --- a/sparse_strips/vello_bench/benches/main.rs +++ b/sparse_strips/vello_bench/benches/main.rs @@ -5,7 +5,7 @@ #![allow(dead_code, reason = "Might be unused on platforms not supporting SIMD")] use criterion::{criterion_group, criterion_main}; -use vello_bench::{fine, flatten, glyph, strip, tile}; +use vello_bench::{fine, flatten, glyph, scene, strip, tile}; criterion_group!(fine_solid, fine::fill); criterion_group!(fine_strip, fine::strip); @@ -19,6 +19,7 @@ criterion_group!(flatten, flatten::flatten); criterion_group!(strokes, flatten::strokes); criterion_group!(render_strips, strip::render_strips); criterion_group!(glyph, glyph::glyph); +criterion_group!(scene_bench, scene::images); criterion_main!( tile, render_strips, @@ -31,5 +32,6 @@ criterion_main!( fine_gradient, fine_rounded_blurred_rect, fine_blend, - fine_image + fine_image, + scene_bench ); diff --git a/sparse_strips/vello_bench/src/lib.rs b/sparse_strips/vello_bench/src/lib.rs index a0d5f572d..1ac6e7b4c 100644 --- a/sparse_strips/vello_bench/src/lib.rs +++ b/sparse_strips/vello_bench/src/lib.rs @@ -11,6 +11,7 @@ pub mod data; pub mod fine; pub mod flatten; pub mod glyph; +pub mod scene; pub mod strip; pub mod tile; diff --git a/sparse_strips/vello_bench/src/scene.rs b/sparse_strips/vello_bench/src/scene.rs new file mode 100644 index 000000000..09cedd144 --- /dev/null +++ b/sparse_strips/vello_bench/src/scene.rs @@ -0,0 +1,97 @@ +// Copyright 2025 the Vello Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Full scene rendering benchmarks. + +use std::sync::Arc; + +use criterion::Criterion; +use vello_common::kurbo::{Affine, Rect}; +use vello_common::paint::{Image, ImageSource}; +use vello_common::peniko::ImageSampler; +use vello_common::peniko::{Extend, ImageQuality}; +use vello_common::pixmap::Pixmap; +use vello_cpu::RenderContext; + +/// Image scene rendering benchmark. +pub fn images(c: &mut Criterion) { + let mut g = c.benchmark_group("images"); + + let flower_image = load_flower_image(); + + const VIEWPORT_WIDTH: u16 = 1280; + const VIEWPORT_HEIGHT: u16 = 960; + + let ImageSource::Pixmap(ref image_pixmap) = flower_image else { + panic!("Expected Pixmap"); + }; + let original_width = f64::from(image_pixmap.width()); + let original_height = f64::from(image_pixmap.height()); + let image_count = VIEWPORT_WIDTH / 256; + + g.bench_function("overlapping", |b| { + let mut renderer = RenderContext::new(VIEWPORT_WIDTH, VIEWPORT_HEIGHT); + let mut pixmap = Pixmap::new(VIEWPORT_WIDTH, VIEWPORT_HEIGHT); + + b.iter(|| { + renderer.reset(); + + for i in (1..=image_count).rev() { + let width = 256.0 * i as f64; + let scale = width / original_width; + let height = original_height * scale; + + renderer.set_transform(Affine::IDENTITY); + renderer.set_paint_transform(Affine::scale(scale)); + renderer.set_paint(Image { + image: flower_image.clone(), + sampler: ImageSampler { + x_extend: Extend::Pad, + y_extend: Extend::Pad, + quality: ImageQuality::Low, + alpha: 1.0, + }, + }); + renderer.fill_rect(&Rect::new(0.0, 0.0, width, height)); + } + + renderer.flush(); + renderer.render_to_pixmap(&mut pixmap); + std::hint::black_box(&pixmap); + }); + }); + + g.finish(); +} + +fn load_flower_image() -> ImageSource { + let image_data = include_bytes!("../../../examples/assets/splash-flower.jpg"); + let image = image::load_from_memory(image_data).expect("Failed to decode image"); + let width = image.width(); + let height = image.height(); + let rgba_data = image.into_rgba8().into_vec(); + + #[expect( + clippy::cast_possible_truncation, + reason = "Image dimensions fit in u16" + )] + let pixmap = Pixmap::from_parts( + rgba_data + .chunks_exact(4) + .map(|rgba| { + let alpha = u16::from(rgba[3]); + let premultiply = |component| (alpha * u16::from(component) / 255) as u8; + vello_common::color::PremulRgba8 { + r: premultiply(rgba[0]), + g: premultiply(rgba[1]), + b: premultiply(rgba[2]), + a: alpha as u8, + } + }) + .collect(), + width as u16, + height as u16, + ); + + ImageSource::Pixmap(Arc::new(pixmap)) +} diff --git a/sparse_strips/vello_common/src/coarse.rs b/sparse_strips/vello_common/src/coarse.rs index d2ecfaba0..e82721805 100644 --- a/sparse_strips/vello_common/src/coarse.rs +++ b/sparse_strips/vello_common/src/coarse.rs @@ -4,6 +4,7 @@ //! Generating and processing wide tiles. use crate::color::palette::css::TRANSPARENT; +use crate::encode::EncodedPaint; use crate::filter_effects::Filter; use crate::kurbo::{Affine, Rect}; use crate::mask::Mask; @@ -62,6 +63,21 @@ pub const MODE_CPU: u8 = 0; /// generation specific for `vello_hybrid`. pub const MODE_HYBRID: u8 = 1; +/// Optimization hint for fill operations, computed in `Wide::generate` and passed to `WideTile::fill`. +/// +/// This enum communicates whether a fill operation can benefit from overdraw elimination: +/// - For opaque solid colors: we can set the background color directly and skip the fill +/// - For opaque images: we can clear previous commands but still need to emit the fill +#[derive(Debug, Clone, Copy)] +pub enum FillOptimization { + /// No optimization possible, emit fill command normally. + None, + /// Paint is an opaque solid color - can replace background if conditions are met. + OpaqueSolid(PremulColor), + /// Paint is an opaque image - can clear previous commands if conditions are met. + OpaqueImage, +} + /// A container for wide tiles. #[derive(Debug)] pub struct Wide { @@ -406,6 +422,7 @@ impl Wide { blend_mode: BlendMode, thread_idx: u8, mask: Option, + encoded_paints: &[EncodedPaint], ) { if strip_buf.is_empty() { return; @@ -516,10 +533,25 @@ impl Wide { .min(bbox.x1()) .min(WideTile::MAX_WIDE_TILE_COORD); + // Compute fill optimization based on paint type let fill_attrs = &self.attrs.fill[attrs_idx as usize]; - let override_color = match &fill_attrs.paint { - Paint::Solid(s) if s.is_opaque() && fill_attrs.mask.is_none() => Some(*s), - _ => None, + let optimization = if fill_attrs.mask.is_none() { + match &fill_attrs.paint { + Paint::Solid(s) if s.is_opaque() => FillOptimization::OpaqueSolid(*s), + Paint::Indexed(idx) => { + if let Some(EncodedPaint::Image(img)) = encoded_paints.get(idx.index()) + && !img.has_opacities + && img.sampler.alpha == 1.0 + { + FillOptimization::OpaqueImage + } else { + FillOptimization::None + } + } + _ => FillOptimization::None, + } + } else { + FillOptimization::None }; // Generate fill commands for each wide tile in the fill region @@ -537,7 +569,7 @@ impl Wide { width, attrs_idx, current_layer_id, - override_color, + optimization, ); // TODO: This bbox update might be redundant since filled regions are always // bounded by strip regions (which already update the bbox). Consider removing @@ -1207,49 +1239,69 @@ impl WideTile { /// For clipped filter layers, commands are always generated since filters need the full /// layer content rendered before applying the clip as a mask. /// - /// The `override_color` parameter is pre-computed by the caller: if the paint is a solid - /// opaque color with no mask, this contains that color for potential background replacement - /// optimization. + /// The `optimization` parameter is pre-computed by the caller based on paint type: + /// - `OpaqueSolid(color)`: Paint is an opaque solid color, can replace background + /// - `OpaqueImage`: Paint is an opaque image, can clear previous commands + /// - `None`: No optimization available pub(crate) fn fill( &mut self, x: u16, width: u16, attrs_idx: u32, current_layer_id: LayerId, - override_color: Option, + optimization: FillOptimization, ) { if !self.is_zero_clip() || self.in_clipped_filter_layer { match MODE { MODE_CPU => { - // Note that we could be more aggressive in optimizing a whole-tile opaque fill - // even with a clip stack. It would be valid to elide all drawing commands from - // the enclosing clip push up to the fill. Further, we could extend the clip - // push command to include a background color, rather than always starting with - // a transparent buffer. Lastly, a sequence of push(bg); strip/fill; pop could - // be replaced with strip/fill with the color (the latter is true even with a - // non-opaque color). - // - // However, the extra cost of tracking such optimizations may outweigh the - // benefit, especially in hybrid mode with GPU painting. - let bg = override_color.filter(|_| { - x == 0 && width == WideTile::WIDTH && self.n_clip == 0 && self.n_bufs == 0 - }); - - if let Some(bg) = bg { - self.cmds.clear(); - self.bg = bg; - // Clear layer ranges when we clear commands - if let Some(ranges) = self.layer_cmd_ranges.get_mut(¤t_layer_id) { - ranges.clear(); + // Check if we can apply overdraw elimination optimization. + // This requires filling the entire tile width with no clip/buffer stack. + let can_override = + x == 0 && width == WideTile::WIDTH && self.n_clip == 0 && self.n_bufs == 0; + + if can_override { + match optimization { + FillOptimization::OpaqueSolid(color) => { + // Note that we could be more aggressive in optimizing a whole-tile opaque fill + // even with a clip stack. It would be valid to elide all drawing commands from + // the enclosing clip push up to the fill. Further, we could extend the clip + // push command to include a background color, rather than always starting with + // a transparent buffer. Lastly, a sequence of push(bg); strip/fill; pop could + // be replaced with strip/fill with the color (the latter is true even with a + // non-opaque color). + // + // However, the extra cost of tracking such optimizations may outweigh the + // benefit, especially in hybrid mode with GPU painting. + self.cmds.clear(); + self.bg = color; + if let Some(ranges) = + self.layer_cmd_ranges.get_mut(¤t_layer_id) + { + ranges.clear(); + } + return; + } + FillOptimization::OpaqueImage => { + // Opaque image: clear previous commands but still emit the fill. + self.cmds.clear(); + self.bg = PremulColor::from_alpha_color(TRANSPARENT); + if let Some(ranges) = + self.layer_cmd_ranges.get_mut(¤t_layer_id) + { + ranges.clear(); + } + // Fall through to emit the fill command below. + } + FillOptimization::None => {} } - } else { - self.record_fill_cmd(current_layer_id, self.cmds.len()); - self.cmds.push(Cmd::Fill(CmdFill { - x, - width, - attrs_idx, - })); } + + self.record_fill_cmd(current_layer_id, self.cmds.len()); + self.cmds.push(Cmd::Fill(CmdFill { + x, + width, + attrs_idx, + })); } MODE_HYBRID => { self.record_fill_cmd(current_layer_id, self.cmds.len()); @@ -1472,7 +1524,7 @@ impl WideTile { /// // 4: PopBuf /// ``` #[allow(dead_code, reason = "useful for debugging")] - pub(crate) fn list_commands(&self) -> String { + pub fn list_commands(&self) -> String { self.cmds .iter() .enumerate() @@ -1571,7 +1623,8 @@ impl Cmd { /// Returns a human-readable name for this command. /// /// This is useful for debugging, logging, and displaying command information - /// in a user-friendly format. + /// in a user-friendly format. To get detailed paint information, use `name_with_attrs` + /// which can look up the paint from the command attributes. /// /// **Note:** This method is only available in debug builds (`debug_assertions`). pub fn name(&self) -> &'static str { @@ -1594,6 +1647,69 @@ impl Cmd { Self::Mask(_) => "Mask", } } + + /// Returns a human-readable name for this command with detailed paint information. + /// + /// This variant looks up paint details from the command attributes for fill commands. + /// + /// **Note:** This method is only available in debug builds (`debug_assertions`). + pub fn name_with_attrs( + &self, + fill_attrs: &[FillAttrs], + encoded_paints: &[EncodedPaint], + ) -> String { + match self { + Self::Fill(cmd) => { + if let Some(attrs) = fill_attrs.get(cmd.attrs_idx as usize) { + format!("FillPath({})", paint_name(&attrs.paint, encoded_paints)) + } else { + format!("FillPath(attrs_idx={})", cmd.attrs_idx) + } + } + Self::AlphaFill(cmd) => { + if let Some(attrs) = fill_attrs.get(cmd.attrs_idx as usize) { + format!( + "AlphaFillPath({})", + paint_name(&attrs.paint, encoded_paints) + ) + } else { + format!("AlphaFillPath(attrs_idx={})", cmd.attrs_idx) + } + } + _ => self.name().into(), + } + } +} + +/// Returns a human-readable description of a paint. +#[cfg(debug_assertions)] +fn paint_name(paint: &Paint, encoded_paints: &[EncodedPaint]) -> String { + match paint { + Paint::Solid(color) => { + let rgba = color.as_premul_rgba8(); + format!( + "Solid(#{:02x}{:02x}{:02x}{:02x})", + rgba.r, rgba.g, rgba.b, rgba.a + ) + } + Paint::Indexed(idx) => { + let index = idx.index(); + if let Some(encoded) = encoded_paints.get(index) { + let kind = match encoded { + EncodedPaint::Gradient(g) => match &g.kind { + crate::encode::EncodedKind::Linear(_) => "LinearGradient", + crate::encode::EncodedKind::Radial(_) => "RadialGradient", + crate::encode::EncodedKind::Sweep(_) => "SweepGradient", + }, + EncodedPaint::Image(_) => "Image", + EncodedPaint::BlurredRoundedRect(_) => "BlurredRoundedRect", + }; + format!("{}[{}]", kind, index) + } else { + format!("Indexed({})", index) + } + } + } } /// Shared attributes for alpha fill commands. @@ -1772,7 +1888,7 @@ impl LayerCommandRanges { #[cfg(test)] mod tests { - use crate::coarse::{LayerKind, MODE_CPU, Wide, WideTile}; + use crate::coarse::{FillOptimization, LayerKind, MODE_CPU, Wide, WideTile}; use crate::kurbo::Affine; use crate::peniko::{BlendMode, Compose, Mix}; use crate::render_graph::RenderGraph; @@ -1792,8 +1908,8 @@ mod tests { fn basic_layer() { let mut wide = WideTile::::new(0, 0); wide.push_buf(LayerKind::Regular(0)); - wide.fill(0, 10, 0, 0, None); - wide.fill(10, 10, 0, 0, None); + wide.fill(0, 10, 0, 0, FillOptimization::None); + wide.fill(10, 10, 0, 0, FillOptimization::None); wide.pop_buf(); assert_eq!(wide.cmds.len(), 4); @@ -1805,8 +1921,8 @@ mod tests { let mut wide = WideTile::::new(0, 0); wide.push_buf(LayerKind::Regular(0)); - wide.fill(0, 10, 0, 0, None); - wide.fill(10, 10, 0, 0, None); + wide.fill(0, 10, 0, 0, FillOptimization::None); + wide.fill(10, 10, 0, 0, FillOptimization::None); wide.blend(blend_mode); wide.pop_buf(); @@ -1819,7 +1935,7 @@ mod tests { let mut wide = WideTile::::new(0, 0); wide.push_buf(LayerKind::Regular(0)); - wide.fill(0, 10, 0, 0, None); + wide.fill(0, 10, 0, 0, FillOptimization::None); wide.blend(blend_mode); wide.pop_buf(); diff --git a/sparse_strips/vello_cpu/src/dispatch/mod.rs b/sparse_strips/vello_cpu/src/dispatch/mod.rs index 8b84ddca6..25abcd2d9 100644 --- a/sparse_strips/vello_cpu/src/dispatch/mod.rs +++ b/sparse_strips/vello_cpu/src/dispatch/mod.rs @@ -19,7 +19,13 @@ use vello_common::strip_generator::StripStorage; pub(crate) trait Dispatcher: Debug + Send + Sync { fn wide(&self) -> &Wide; - fn generate_wide_cmd(&mut self, strip_buf: &[Strip], paint: Paint, blend_mode: BlendMode); + fn generate_wide_cmd( + &mut self, + strip_buf: &[Strip], + paint: Paint, + blend_mode: BlendMode, + encoded_paints: &[EncodedPaint], + ); fn fill_path( &mut self, path: &BezPath, @@ -29,6 +35,7 @@ pub(crate) trait Dispatcher: Debug + Send + Sync { blend_mode: BlendMode, aliasing_threshold: Option, mask: Option, + encoded_paints: &[EncodedPaint], ); fn stroke_path( &mut self, @@ -39,6 +46,7 @@ pub(crate) trait Dispatcher: Debug + Send + Sync { blend_mode: BlendMode, aliasing_threshold: Option, mask: Option, + encoded_paints: &[EncodedPaint], ); fn push_clip_path( &mut self, @@ -61,7 +69,7 @@ pub(crate) trait Dispatcher: Debug + Send + Sync { ); fn pop_layer(&mut self); fn reset(&mut self); - fn flush(&mut self); + fn flush(&mut self, encoded_paints: &[EncodedPaint]); fn rasterize( &self, buffer: &mut [u8], diff --git a/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs b/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs index ab68e77bd..417de7292 100644 --- a/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs +++ b/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs @@ -270,7 +270,8 @@ impl MultiThreadedDispatcher { allocation_group, }; task_sender.send(task).unwrap(); - self.run_coarse(true); + // TODO: Support encoded_paints in multithreading. + self.run_coarse(true, &[]); } // Currently, we do coarse rasterization in two phases: @@ -285,7 +286,7 @@ impl MultiThreadedDispatcher { // new strips that will be generated. // // This is why we have the `abort_empty`flag. - fn run_coarse(&mut self, abort_empty: bool) { + fn run_coarse(&mut self, abort_empty: bool, encoded_paints: &[EncodedPaint]) { let result_receiver = self.coarse_task_receiver.as_mut().unwrap(); loop { @@ -307,6 +308,7 @@ impl MultiThreadedDispatcher { blend_mode, thread_id, mask, + encoded_paints, ), CoarseTaskType::RenderWideCommand { strips, @@ -320,6 +322,7 @@ impl MultiThreadedDispatcher { blend_mode, thread_id, mask, + encoded_paints, ), CoarseTaskType::PushLayer { thread_id, @@ -429,6 +432,7 @@ impl Dispatcher for MultiThreadedDispatcher { blend_mode: BlendMode, aliasing_threshold: Option, mask: Option, + _encoded_paints: &[EncodedPaint], ) { let start = self.allocation_group.path.len() as u32; self.allocation_group.path.extend(path); @@ -453,6 +457,7 @@ impl Dispatcher for MultiThreadedDispatcher { blend_mode: BlendMode, aliasing_threshold: Option, mask: Option, + _encoded_paints: &[EncodedPaint], ) { let start = self.allocation_group.path.len() as u32; self.allocation_group.path.extend(path); @@ -541,7 +546,7 @@ impl Dispatcher for MultiThreadedDispatcher { self.init(); } - fn flush(&mut self) { + fn flush(&mut self, encoded_paints: &[EncodedPaint]) { if self.flushed { return; } @@ -551,7 +556,7 @@ impl Dispatcher for MultiThreadedDispatcher { // Note that dropping the sender will signal to the workers that no more new paths // can arrive. drop(sender); - self.run_coarse(false); + self.run_coarse(false, encoded_paints); self.alpha_storage.with_inner(|alphas| { // The main thread stores the alphas that are produced by playing a recording. @@ -596,7 +601,13 @@ impl Dispatcher for MultiThreadedDispatcher { } } - fn generate_wide_cmd(&mut self, strip_buf: &[Strip], paint: Paint, blend_mode: BlendMode) { + fn generate_wide_cmd( + &mut self, + strip_buf: &[Strip], + paint: Paint, + blend_mode: BlendMode, + _encoded_paints: &[EncodedPaint], + ) { // Note that we are essentially round-tripping here: The wide container is inside of the // main thread, but we first send a render task to a child thread which basically just // forwards it back to the main thread again. We cannot apply the wide command directly @@ -880,8 +891,9 @@ mod tests { BlendMode::default(), None, None, + &[], ); - dispatcher.flush(); + dispatcher.flush(&[]); } assert_eq!(dispatcher.allocations.paths.entries.len(), 1); diff --git a/sparse_strips/vello_cpu/src/dispatch/single_threaded.rs b/sparse_strips/vello_cpu/src/dispatch/single_threaded.rs index 5cf2b8663..748e40318 100644 --- a/sparse_strips/vello_cpu/src/dispatch/single_threaded.rs +++ b/sparse_strips/vello_cpu/src/dispatch/single_threaded.rs @@ -414,6 +414,7 @@ impl Dispatcher for SingleThreadedDispatcher { blend_mode: BlendMode, aliasing_threshold: Option, mask: Option, + encoded_paints: &[EncodedPaint], ) { let wide = &mut self.wide; @@ -428,7 +429,14 @@ impl Dispatcher for SingleThreadedDispatcher { ); // Generate coarse-level commands from strips (layer_id 0 = root layer). - wide.generate(&self.strip_storage.strips, paint, blend_mode, 0, mask); + wide.generate( + &self.strip_storage.strips, + paint, + blend_mode, + 0, + mask, + encoded_paints, + ); } fn stroke_path( @@ -440,6 +448,7 @@ impl Dispatcher for SingleThreadedDispatcher { blend_mode: BlendMode, aliasing_threshold: Option, mask: Option, + encoded_paints: &[EncodedPaint], ) { let wide = &mut self.wide; @@ -454,7 +463,14 @@ impl Dispatcher for SingleThreadedDispatcher { ); // Generate coarse-level commands from strips (layer_id 0 = root layer). - wide.generate(&self.strip_storage.strips, paint, blend_mode, 0, mask); + wide.generate( + &self.strip_storage.strips, + paint, + blend_mode, + 0, + mask, + encoded_paints, + ); } fn push_layer( @@ -531,7 +547,7 @@ impl Dispatcher for SingleThreadedDispatcher { self.layer_id_next = 0; } - fn flush(&mut self) { + fn flush(&mut self, _encoded_paints: &[EncodedPaint]) { // No-op for single-threaded dispatcher (no work queue to flush). } @@ -578,9 +594,16 @@ impl Dispatcher for SingleThreadedDispatcher { } } - fn generate_wide_cmd(&mut self, strip_buf: &[Strip], paint: Paint, blend_mode: BlendMode) { + fn generate_wide_cmd( + &mut self, + strip_buf: &[Strip], + paint: Paint, + blend_mode: BlendMode, + encoded_paints: &[EncodedPaint], + ) { // Generate coarse-level commands from pre-computed strips (layer_id 0 = root layer). - self.wide.generate(strip_buf, paint, blend_mode, 0, None); + self.wide + .generate(strip_buf, paint, blend_mode, 0, None, encoded_paints); } fn strip_storage_mut(&mut self) -> &mut StripStorage { @@ -652,6 +675,7 @@ mod tests { BlendMode::default(), None, None, + &[], ); // Ensure there is data to clear. diff --git a/sparse_strips/vello_cpu/src/render.rs b/sparse_strips/vello_cpu/src/render.rs index 49025a573..6de8eed93 100644 --- a/sparse_strips/vello_cpu/src/render.rs +++ b/sparse_strips/vello_cpu/src/render.rs @@ -203,6 +203,7 @@ impl RenderContext { ctx.blend_mode, ctx.aliasing_threshold, ctx.mask.clone(), + &ctx.encoded_paints, ); }); } @@ -219,6 +220,7 @@ impl RenderContext { ctx.blend_mode, ctx.aliasing_threshold, ctx.mask.clone(), + &ctx.encoded_paints, ); }); } @@ -236,6 +238,7 @@ impl RenderContext { ctx.blend_mode, ctx.aliasing_threshold, ctx.mask.clone(), + &ctx.encoded_paints, ); }); } @@ -253,6 +256,7 @@ impl RenderContext { ctx.blend_mode, ctx.aliasing_threshold, ctx.mask.clone(), + &ctx.encoded_paints, ); }); } @@ -307,6 +311,7 @@ impl RenderContext { self.blend_mode, self.aliasing_threshold, self.mask.clone(), + &self.encoded_paints, ); } @@ -546,7 +551,7 @@ impl RenderContext { /// For multi-threaded rendering, you _have_ to call this before rasterizing, otherwise /// the program will panic. pub fn flush(&mut self) { - self.dispatcher.flush(); + self.dispatcher.flush(&self.encoded_paints); } /// Render the current context into a buffer. @@ -630,6 +635,7 @@ impl GlyphRenderer for RenderContext { self.blend_mode, self.aliasing_threshold, self.mask.clone(), + &self.encoded_paints, ); } GlyphType::Bitmap(glyph) => { @@ -737,6 +743,7 @@ impl GlyphRenderer for RenderContext { self.blend_mode, self.aliasing_threshold, self.mask.clone(), + &self.encoded_paints, ); } GlyphType::Bitmap(_) | GlyphType::Colr(_) => { @@ -1051,8 +1058,12 @@ impl RenderContext { "Invalid strip range" ); let paint = self.encode_current_paint(); - self.dispatcher - .generate_wide_cmd(&adjusted_strips[start..end], paint, self.blend_mode); + self.dispatcher.generate_wide_cmd( + &adjusted_strips[start..end], + paint, + self.blend_mode, + &self.encoded_paints, + ); } /// Prepare cached strips for rendering by adjusting indices. diff --git a/sparse_strips/vello_hybrid/src/scene.rs b/sparse_strips/vello_hybrid/src/scene.rs index f6226d796..d755ce50d 100644 --- a/sparse_strips/vello_hybrid/src/scene.rs +++ b/sparse_strips/vello_hybrid/src/scene.rs @@ -228,7 +228,14 @@ impl Scene { &mut self.strip_storage, self.clip_context.get(), ); - wide.generate(&self.strip_storage.strips, paint, self.blend_mode, 0, None); + wide.generate( + &self.strip_storage.strips, + paint, + self.blend_mode, + 0, + None, + &self.encoded_paints, + ); } /// Push a new clip path to the clip stack. @@ -287,7 +294,14 @@ impl Scene { self.clip_context.get(), ); - wide.generate(&self.strip_storage.strips, paint, self.blend_mode, 0, None); + wide.generate( + &self.strip_storage.strips, + paint, + self.blend_mode, + 0, + None, + &self.encoded_paints, + ); } /// Set the aliasing threshold. @@ -751,6 +765,7 @@ impl Scene { self.blend_mode, 0, None, + &self.encoded_paints, ); } diff --git a/sparse_strips/vello_toy/src/debug.rs b/sparse_strips/vello_toy/src/debug.rs index 9833c8c2b..ac1df2643 100644 --- a/sparse_strips/vello_toy/src/debug.rs +++ b/sparse_strips/vello_toy/src/debug.rs @@ -94,6 +94,7 @@ fn main() { BlendMode::new(Mix::Normal, Compose::SrcOver), 0, None, + &[], ); } From 5e74593728b08a8f099edc4e549a4917ddf25a02 Mon Sep 17 00:00:00 2001 From: Alex Gemberg Date: Tue, 30 Dec 2025 11:22:43 +1300 Subject: [PATCH 2/2] . --- sparse_strips/vello_bench/benches/main.rs | 6 +- .../src/{scene.rs => integration.rs} | 15 +-- sparse_strips/vello_bench/src/lib.rs | 2 +- sparse_strips/vello_common/src/coarse.rs | 94 ++++++++++--------- .../vello_cpu/src/dispatch/multi_threaded.rs | 5 +- 5 files changed, 61 insertions(+), 61 deletions(-) rename sparse_strips/vello_bench/src/{scene.rs => integration.rs} (85%) diff --git a/sparse_strips/vello_bench/benches/main.rs b/sparse_strips/vello_bench/benches/main.rs index bc2b6b7e0..b9f50d4e5 100644 --- a/sparse_strips/vello_bench/benches/main.rs +++ b/sparse_strips/vello_bench/benches/main.rs @@ -5,7 +5,7 @@ #![allow(dead_code, reason = "Might be unused on platforms not supporting SIMD")] use criterion::{criterion_group, criterion_main}; -use vello_bench::{fine, flatten, glyph, scene, strip, tile}; +use vello_bench::{fine, flatten, glyph, integration, strip, tile}; criterion_group!(fine_solid, fine::fill); criterion_group!(fine_strip, fine::strip); @@ -19,7 +19,7 @@ criterion_group!(flatten, flatten::flatten); criterion_group!(strokes, flatten::strokes); criterion_group!(render_strips, strip::render_strips); criterion_group!(glyph, glyph::glyph); -criterion_group!(scene_bench, scene::images); +criterion_group!(integration_bench, integration::images); criterion_main!( tile, render_strips, @@ -33,5 +33,5 @@ criterion_main!( fine_rounded_blurred_rect, fine_blend, fine_image, - scene_bench + integration_bench ); diff --git a/sparse_strips/vello_bench/src/scene.rs b/sparse_strips/vello_bench/src/integration.rs similarity index 85% rename from sparse_strips/vello_bench/src/scene.rs rename to sparse_strips/vello_bench/src/integration.rs index 09cedd144..1daa02785 100644 --- a/sparse_strips/vello_bench/src/scene.rs +++ b/sparse_strips/vello_bench/src/integration.rs @@ -1,7 +1,7 @@ // Copyright 2025 the Vello Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -//! Full scene rendering benchmarks. +//! Integration benchmarks for full rendering pipelines. use std::sync::Arc; @@ -12,6 +12,7 @@ use vello_common::peniko::ImageSampler; use vello_common::peniko::{Extend, ImageQuality}; use vello_common::pixmap::Pixmap; use vello_cpu::RenderContext; +use vello_cpu::color::AlphaColor; /// Image scene rendering benchmark. pub fn images(c: &mut Criterion) { @@ -41,7 +42,6 @@ pub fn images(c: &mut Criterion) { let scale = width / original_width; let height = original_height * scale; - renderer.set_transform(Affine::IDENTITY); renderer.set_paint_transform(Affine::scale(scale)); renderer.set_paint(Image { image: flower_image.clone(), @@ -79,14 +79,9 @@ fn load_flower_image() -> ImageSource { rgba_data .chunks_exact(4) .map(|rgba| { - let alpha = u16::from(rgba[3]); - let premultiply = |component| (alpha * u16::from(component) / 255) as u8; - vello_common::color::PremulRgba8 { - r: premultiply(rgba[0]), - g: premultiply(rgba[1]), - b: premultiply(rgba[2]), - a: alpha as u8, - } + AlphaColor::from_rgba8(rgba[0], rgba[1], rgba[2], rgba[3]) + .premultiply() + .to_rgba8() }) .collect(), width as u16, diff --git a/sparse_strips/vello_bench/src/lib.rs b/sparse_strips/vello_bench/src/lib.rs index 1ac6e7b4c..dbf89fd43 100644 --- a/sparse_strips/vello_bench/src/lib.rs +++ b/sparse_strips/vello_bench/src/lib.rs @@ -11,7 +11,7 @@ pub mod data; pub mod fine; pub mod flatten; pub mod glyph; -pub mod scene; +pub mod integration; pub mod strip; pub mod tile; diff --git a/sparse_strips/vello_common/src/coarse.rs b/sparse_strips/vello_common/src/coarse.rs index e82721805..887a485e6 100644 --- a/sparse_strips/vello_common/src/coarse.rs +++ b/sparse_strips/vello_common/src/coarse.rs @@ -63,21 +63,6 @@ pub const MODE_CPU: u8 = 0; /// generation specific for `vello_hybrid`. pub const MODE_HYBRID: u8 = 1; -/// Optimization hint for fill operations, computed in `Wide::generate` and passed to `WideTile::fill`. -/// -/// This enum communicates whether a fill operation can benefit from overdraw elimination: -/// - For opaque solid colors: we can set the background color directly and skip the fill -/// - For opaque images: we can clear previous commands but still need to emit the fill -#[derive(Debug, Clone, Copy)] -pub enum FillOptimization { - /// No optimization possible, emit fill command normally. - None, - /// Paint is an opaque solid color - can replace background if conditions are met. - OpaqueSolid(PremulColor), - /// Paint is an opaque image - can clear previous commands if conditions are met. - OpaqueImage, -} - /// A container for wide tiles. #[derive(Debug)] pub struct Wide { @@ -533,25 +518,25 @@ impl Wide { .min(bbox.x1()) .min(WideTile::MAX_WIDE_TILE_COORD); - // Compute fill optimization based on paint type + // Compute fill hint based on paint type let fill_attrs = &self.attrs.fill[attrs_idx as usize]; - let optimization = if fill_attrs.mask.is_none() { + let fill_hint = if fill_attrs.mask.is_none() { match &fill_attrs.paint { - Paint::Solid(s) if s.is_opaque() => FillOptimization::OpaqueSolid(*s), + Paint::Solid(s) if s.is_opaque() => FillHint::OpaqueSolid(*s), Paint::Indexed(idx) => { if let Some(EncodedPaint::Image(img)) = encoded_paints.get(idx.index()) && !img.has_opacities && img.sampler.alpha == 1.0 { - FillOptimization::OpaqueImage + FillHint::OpaqueImage } else { - FillOptimization::None + FillHint::None } } - _ => FillOptimization::None, + _ => FillHint::None, } } else { - FillOptimization::None + FillHint::None }; // Generate fill commands for each wide tile in the fill region @@ -569,7 +554,7 @@ impl Wide { width, attrs_idx, current_layer_id, - optimization, + fill_hint, ); // TODO: This bbox update might be redundant since filled regions are always // bounded by strip regions (which already update the bbox). Consider removing @@ -1239,7 +1224,7 @@ impl WideTile { /// For clipped filter layers, commands are always generated since filters need the full /// layer content rendered before applying the clip as a mask. /// - /// The `optimization` parameter is pre-computed by the caller based on paint type: + /// The `fill_hint` parameter is pre-computed by the caller based on paint type: /// - `OpaqueSolid(color)`: Paint is an opaque solid color, can replace background /// - `OpaqueImage`: Paint is an opaque image, can clear previous commands /// - `None`: No optimization available @@ -1249,29 +1234,30 @@ impl WideTile { width: u16, attrs_idx: u32, current_layer_id: LayerId, - optimization: FillOptimization, + fill_hint: FillHint, ) { if !self.is_zero_clip() || self.in_clipped_filter_layer { match MODE { MODE_CPU => { // Check if we can apply overdraw elimination optimization. // This requires filling the entire tile width with no clip/buffer stack. + // + // Note that we could be more aggressive in optimizing a whole-tile opaque fill + // even with a clip stack. It would be valid to elide all drawing commands from + // the enclosing clip push up to the fill. Further, we could extend the clip + // push command to include a background color, rather than always starting with + // a transparent buffer. Lastly, a sequence of push(bg); strip/fill; pop could + // be replaced with strip/fill with the color (the latter is true even with a + // non-opaque color). + // + // However, the extra cost of tracking such optimizations may outweigh the + // benefit, especially in hybrid mode with GPU painting. let can_override = x == 0 && width == WideTile::WIDTH && self.n_clip == 0 && self.n_bufs == 0; if can_override { - match optimization { - FillOptimization::OpaqueSolid(color) => { - // Note that we could be more aggressive in optimizing a whole-tile opaque fill - // even with a clip stack. It would be valid to elide all drawing commands from - // the enclosing clip push up to the fill. Further, we could extend the clip - // push command to include a background color, rather than always starting with - // a transparent buffer. Lastly, a sequence of push(bg); strip/fill; pop could - // be replaced with strip/fill with the color (the latter is true even with a - // non-opaque color). - // - // However, the extra cost of tracking such optimizations may outweigh the - // benefit, especially in hybrid mode with GPU painting. + match fill_hint { + FillHint::OpaqueSolid(color) => { self.cmds.clear(); self.bg = color; if let Some(ranges) = @@ -1281,7 +1267,7 @@ impl WideTile { } return; } - FillOptimization::OpaqueImage => { + FillHint::OpaqueImage => { // Opaque image: clear previous commands but still emit the fill. self.cmds.clear(); self.bg = PremulColor::from_alpha_color(TRANSPARENT); @@ -1290,9 +1276,10 @@ impl WideTile { { ranges.clear(); } - // Fall through to emit the fill command below. + // Fall through to emit the fill command below, as opposed to + // solid paints where we have a return statement. } - FillOptimization::None => {} + FillHint::None => {} } } @@ -1534,6 +1521,21 @@ impl WideTile { } } +/// Optimization hint for fill operations, computed in `Wide::generate` and passed to `WideTile::fill`. +/// +/// This enum communicates whether a fill operation can benefit from overdraw elimination: +/// - For opaque solid colors: we can set the background color directly and skip the fill +/// - For opaque images: we can clear previous commands but still need to emit the fill +#[derive(Debug, Clone, Copy)] +pub enum FillHint { + /// No optimization possible, emit fill command normally. + None, + /// Paint is an opaque solid color - can replace background if conditions are met. + OpaqueSolid(PremulColor), + /// Paint is an opaque image - can clear previous commands if conditions are met. + OpaqueImage, +} + /// Distinguishes between different types of layers and their storage strategies. /// /// Each layer kind determines how the layer's content is stored and processed: @@ -1888,7 +1890,7 @@ impl LayerCommandRanges { #[cfg(test)] mod tests { - use crate::coarse::{FillOptimization, LayerKind, MODE_CPU, Wide, WideTile}; + use crate::coarse::{FillHint, LayerKind, MODE_CPU, Wide, WideTile}; use crate::kurbo::Affine; use crate::peniko::{BlendMode, Compose, Mix}; use crate::render_graph::RenderGraph; @@ -1908,8 +1910,8 @@ mod tests { fn basic_layer() { let mut wide = WideTile::::new(0, 0); wide.push_buf(LayerKind::Regular(0)); - wide.fill(0, 10, 0, 0, FillOptimization::None); - wide.fill(10, 10, 0, 0, FillOptimization::None); + wide.fill(0, 10, 0, 0, FillHint::None); + wide.fill(10, 10, 0, 0, FillHint::None); wide.pop_buf(); assert_eq!(wide.cmds.len(), 4); @@ -1921,8 +1923,8 @@ mod tests { let mut wide = WideTile::::new(0, 0); wide.push_buf(LayerKind::Regular(0)); - wide.fill(0, 10, 0, 0, FillOptimization::None); - wide.fill(10, 10, 0, 0, FillOptimization::None); + wide.fill(0, 10, 0, 0, FillHint::None); + wide.fill(10, 10, 0, 0, FillHint::None); wide.blend(blend_mode); wide.pop_buf(); @@ -1935,7 +1937,7 @@ mod tests { let mut wide = WideTile::::new(0, 0); wide.push_buf(LayerKind::Regular(0)); - wide.fill(0, 10, 0, 0, FillOptimization::None); + wide.fill(0, 10, 0, 0, FillHint::None); wide.blend(blend_mode); wide.pop_buf(); diff --git a/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs b/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs index 417de7292..c19e532a8 100644 --- a/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs +++ b/sparse_strips/vello_cpu/src/dispatch/multi_threaded.rs @@ -270,7 +270,10 @@ impl MultiThreadedDispatcher { allocation_group, }; task_sender.send(task).unwrap(); - // TODO: Support encoded_paints in multithreading. + // TODO: Pass encoded_paints here to enable overdraw elimination for opaque indexed + // paints. Currently we pass an empty slice, so indexed paints render correctly but miss + // the FillHint::OpaqueImage optimization. The challenge is that encoded_paints is a + // borrowed reference that may not be valid by the time coarse processing runs asynchronously. self.run_coarse(true, &[]); }