From 3f608b71cc3e48e875324c4d88ce7d7ef014c5ef Mon Sep 17 00:00:00 2001 From: Norman Feske Date: Thu, 23 Jan 2025 19:12:53 +0100 Subject: [PATCH] fixup "blit: SIMD-based back2front copy" (neon: read sequentially) Issue #5428 --- repos/os/include/blit/internal/neon.h | 41 ++++++++++++++------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/repos/os/include/blit/internal/neon.h b/repos/os/include/blit/internal/neon.h index 52a35af0a87..62d62aeb4b6 100644 --- a/repos/os/include/blit/internal/neon.h +++ b/repos/os/include/blit/internal/neon.h @@ -101,16 +101,16 @@ struct Blit::Neon struct Steps { - int const src_4x, src_y, dst_x, dst_y; - - void incr_x_4 (Src_ptr4 &p) const { p.incr_4x(src_4x ); }; - void incr_x_16(Src_ptr4 &p) const { p.incr_4x(src_4x << 2); }; - void incr_y_4 (Src_ptr4 &p) const { p.incr_y (src_y << 2); }; - void incr_y_16(Src_ptr4 &p) const { p.incr_y (src_y << 4); }; - void incr_x_4 (Dst_ptr4 &p) const { p.incr_x (dst_x << 2); }; - void incr_x_16(Dst_ptr4 &p) const { p.incr_x (dst_x << 4); }; - void incr_y_4 (Dst_ptr4 &p) const { p.incr_y (dst_y << 2); }; - void incr_y_16(Dst_ptr4 &p) const { p.incr_y (dst_y << 4); }; + int const src_y, dst_x, dst_y; + + void incr_x_4 (Src_ptr4 &p) const { p.incr_4x(1); }; + void incr_x_16(Src_ptr4 &p) const { p.incr_4x(4); }; + void incr_y_4 (Src_ptr4 &p) const { p.incr_y (src_y << 2); }; + void incr_y_16(Src_ptr4 &p) const { p.incr_y (src_y << 4); }; + void incr_x_4 (Dst_ptr4 &p) const { p.incr_x (dst_x << 2); }; + void incr_x_16(Dst_ptr4 &p) const { p.incr_x (dst_x << 4); }; + void incr_y_4 (Dst_ptr4 &p) const { p.incr_y (dst_y << 2); }; + void incr_y_16(Dst_ptr4 &p) const { p.incr_y (dst_y << 4); }; }; __attribute__((optimize("-O3"))) @@ -158,8 +158,8 @@ struct Blit::Neon { for (; n > 1; n--) { _rotate_16x16(src, dst, steps); - steps.incr_x_16(dst); - steps.incr_y_16(src); + steps.incr_y_16(dst); + steps.incr_x_16(src); }; _rotate_16x16(src, dst, steps); }; @@ -167,11 +167,12 @@ struct Blit::Neon static inline void _rotate(Src_ptr4 src, Dst_ptr4 dst, Steps const steps, unsigned w, unsigned h) { - for (unsigned i = w; i; i--) { - _rotate_16_lines(src, dst, steps, h); - steps.incr_x_16(src); - steps.incr_y_16(dst); + for (unsigned i = h - 1; i; i--) { + _rotate_16_lines(src, dst, steps, w); + steps.incr_y_16(src); + steps.incr_x_16(dst); } + _rotate_16_lines(src, dst, steps, w); } struct B2f @@ -210,7 +211,7 @@ void Blit::Neon::B2f::r90(uint32_t *dst, unsigned const dst_w, uint32_t const *src, unsigned const src_w, unsigned const w, unsigned const h) { - Steps const steps { 1, -4*int(src_w), 1, 16*int(dst_w) }; + Steps const steps { -4*int(src_w), 1, 16*int(dst_w) }; Src_ptr4 src_ptr4 ((uint32x4_t *)src + 4*src_w*(16*h - 1), steps.src_y); Dst_ptr4 dst_ptr4 (dst, steps.dst_y); @@ -237,7 +238,7 @@ void Blit::Neon::B2f::r270(uint32_t *dst, unsigned const dst_w, uint32_t const *src, unsigned const src_w, unsigned const w, const unsigned h) { - Steps const steps { 1, 4*int(src_w), 1, -16*int(dst_w) }; + Steps const steps { 4*int(src_w), 1, -16*int(dst_w) }; Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y); Dst_ptr4 dst_ptr4 (dst + 16*int(dst_w)*(w*16 - 1), steps.dst_y); @@ -264,7 +265,7 @@ void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned const dst_w, uint32_t const *src, unsigned const src_w, unsigned const w, unsigned const h) { - Steps const steps { 1, 4*int(src_w), 1, 16*int(dst_w) }; + Steps const steps { 4*int(src_w), 1, 16*int(dst_w) }; Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y); Dst_ptr4 dst_ptr4 (dst, steps.dst_y); @@ -291,7 +292,7 @@ void Blit::Neon::B2f_flip::r270(uint32_t *dst, unsigned const dst_w, uint32_t const *src, unsigned const src_w, unsigned const w, const unsigned h) { - Steps const steps { 1, -4*int(src_w), 1, -16*int(dst_w) }; + Steps const steps { -4*int(src_w), 1, -16*int(dst_w) }; Src_ptr4 src_ptr4 ((uint32x4_t *)src + 4*src_w*(16*h - 1), steps.src_y); Dst_ptr4 dst_ptr4 (dst + 16*int(dst_w)*(w*16 - 1), steps.dst_y);