From 5488291fe98c8be99eed842d4e8a557a083c1254 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 5 Jan 2026 06:30:44 +0000 Subject: [PATCH 1/2] Pre-allocate output buffers in render_impl Co-authored-by: gemberg --- sparse_strips/vello_common/src/strip.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sparse_strips/vello_common/src/strip.rs b/sparse_strips/vello_common/src/strip.rs index 731b58d38..c1c84df9d 100644 --- a/sparse_strips/vello_common/src/strip.rs +++ b/sparse_strips/vello_common/src/strip.rs @@ -116,6 +116,18 @@ fn render_impl( return; } + // Pre-allocate output buffers. + // + // `alpha_buf` gets one 16-byte (u8x16) column written each time we advance to a new tile + // *location* (x,y). The number of unique locations is <= tiles.len(), so this is a safe upper + // bound (we may over-reserve if many tiles share the same location). + // + // `strip_buf` grows roughly with the number of locations/row transitions; reserving ~tiles.len() + // keeps reallocations out of the hot loop. + let tiles_len = tiles.len() as usize; + alpha_buf.reserve(tiles_len.saturating_mul(16)); + strip_buf.reserve(tiles_len.saturating_add(8)); + let should_fill = |winding: i32| match fill_rule { Fill::NonZero => winding != 0, Fill::EvenOdd => winding % 2 != 0, From af95a7e041f6b672b97e7820fe45ca262bb8f669 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 5 Jan 2026 06:43:00 +0000 Subject: [PATCH 2/2] Add benchmark for first render of strips Co-authored-by: gemberg --- sparse_strips/vello_bench/benches/main.rs | 2 + sparse_strips/vello_bench/src/strip.rs | 48 ++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/sparse_strips/vello_bench/benches/main.rs b/sparse_strips/vello_bench/benches/main.rs index b9f50d4e5..b9603b797 100644 --- a/sparse_strips/vello_bench/benches/main.rs +++ b/sparse_strips/vello_bench/benches/main.rs @@ -18,11 +18,13 @@ criterion_group!(tile, tile::tile); criterion_group!(flatten, flatten::flatten); criterion_group!(strokes, flatten::strokes); criterion_group!(render_strips, strip::render_strips); +criterion_group!(render_strips_first_render, strip::render_strips_first_render); criterion_group!(glyph, glyph::glyph); criterion_group!(integration_bench, integration::images); criterion_main!( tile, render_strips, + render_strips_first_render, flatten, strokes, glyph, diff --git a/sparse_strips/vello_bench/src/strip.rs b/sparse_strips/vello_bench/src/strip.rs index 86909419f..14b97e16d 100644 --- a/sparse_strips/vello_bench/src/strip.rs +++ b/sparse_strips/vello_bench/src/strip.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT use crate::data::get_data_items; -use criterion::Criterion; +use criterion::{BatchSize, Criterion}; use vello_common::fearless_simd::Level; use vello_common::peniko::Fill; @@ -47,3 +47,49 @@ pub fn render_strips(c: &mut Criterion) { } } } + +/// Benchmark strip rendering in a "first render" configuration. +/// +/// This intentionally starts with empty output buffers (capacity == 0) each iteration, so we +/// measure the cost of growing `strip_buf`/`alpha_buf` during `strip::render_impl` (and thus any +/// effect from internal `reserve()` calls). +pub fn render_strips_first_render(c: &mut Criterion) { + let mut g = c.benchmark_group("render_strips_first_render"); + g.sample_size(50); + + macro_rules! strip_single { + ($item:expr, $level:expr, $suffix:expr) => { + let lines = $item.lines(); + let tiles = $item.sorted_tiles(); + + g.bench_function(format!("{}_{}", $item.name.clone(), $suffix), |b| { + b.iter_batched( + || (Vec::new(), Vec::new()), + |(mut strip_buf, mut alpha_buf)| { + vello_common::strip::render( + $level, + &tiles, + &mut strip_buf, + &mut alpha_buf, + Fill::NonZero, + None, + &lines, + ); + std::hint::black_box((&strip_buf, &alpha_buf)); + (strip_buf, alpha_buf) + }, + BatchSize::SmallInput, + ) + }); + }; + } + + for item in get_data_items() { + // Commenting this out by default since SIMD is what we care about most. + // strip_single!(item, Level::fallback(), "fallback"); + let simd_level = Level::new(); + if !matches!(simd_level, Level::Fallback(_)) { + strip_single!(item, simd_level, "simd"); + } + } +}