Skip to content

Commit

Permalink
fixup "blit: SIMD-based back2front copy" (neon: read sequentially)
Browse files Browse the repository at this point in the history
  • Loading branch information
nfeske committed Jan 23, 2025
1 parent 4c9ad57 commit 3f608b7
Showing 1 changed file with 21 additions and 20 deletions.
41 changes: 21 additions & 20 deletions repos/os/include/blit/internal/neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,16 @@ struct Blit::Neon

struct Steps
{
int const src_4x, src_y, dst_x, dst_y;

void incr_x_4 (Src_ptr4 &p) const { p.incr_4x(src_4x ); };
void incr_x_16(Src_ptr4 &p) const { p.incr_4x(src_4x << 2); };
void incr_y_4 (Src_ptr4 &p) const { p.incr_y (src_y << 2); };
void incr_y_16(Src_ptr4 &p) const { p.incr_y (src_y << 4); };
void incr_x_4 (Dst_ptr4 &p) const { p.incr_x (dst_x << 2); };
void incr_x_16(Dst_ptr4 &p) const { p.incr_x (dst_x << 4); };
void incr_y_4 (Dst_ptr4 &p) const { p.incr_y (dst_y << 2); };
void incr_y_16(Dst_ptr4 &p) const { p.incr_y (dst_y << 4); };
int const src_y, dst_x, dst_y;

void incr_x_4 (Src_ptr4 &p) const { p.incr_4x(1); };
void incr_x_16(Src_ptr4 &p) const { p.incr_4x(4); };
void incr_y_4 (Src_ptr4 &p) const { p.incr_y (src_y << 2); };
void incr_y_16(Src_ptr4 &p) const { p.incr_y (src_y << 4); };
void incr_x_4 (Dst_ptr4 &p) const { p.incr_x (dst_x << 2); };
void incr_x_16(Dst_ptr4 &p) const { p.incr_x (dst_x << 4); };
void incr_y_4 (Dst_ptr4 &p) const { p.incr_y (dst_y << 2); };
void incr_y_16(Dst_ptr4 &p) const { p.incr_y (dst_y << 4); };
};

__attribute__((optimize("-O3")))
Expand Down Expand Up @@ -158,20 +158,21 @@ struct Blit::Neon
{
for (; n > 1; n--) {
_rotate_16x16(src, dst, steps);
steps.incr_x_16(dst);
steps.incr_y_16(src);
steps.incr_y_16(dst);
steps.incr_x_16(src);
};
_rotate_16x16(src, dst, steps);
};

static inline void _rotate(Src_ptr4 src, Dst_ptr4 dst,
Steps const steps, unsigned w, unsigned h)
{
for (unsigned i = w; i; i--) {
_rotate_16_lines(src, dst, steps, h);
steps.incr_x_16(src);
steps.incr_y_16(dst);
for (unsigned i = h - 1; i; i--) {
_rotate_16_lines(src, dst, steps, w);
steps.incr_y_16(src);
steps.incr_x_16(dst);
}
_rotate_16_lines(src, dst, steps, w);
}

struct B2f
Expand Down Expand Up @@ -210,7 +211,7 @@ void Blit::Neon::B2f::r90(uint32_t *dst, unsigned const dst_w,
uint32_t const *src, unsigned const src_w,
unsigned const w, unsigned const h)
{
Steps const steps { 1, -4*int(src_w), 1, 16*int(dst_w) };
Steps const steps { -4*int(src_w), 1, 16*int(dst_w) };

Src_ptr4 src_ptr4 ((uint32x4_t *)src + 4*src_w*(16*h - 1), steps.src_y);
Dst_ptr4 dst_ptr4 (dst, steps.dst_y);
Expand All @@ -237,7 +238,7 @@ void Blit::Neon::B2f::r270(uint32_t *dst, unsigned const dst_w,
uint32_t const *src, unsigned const src_w,
unsigned const w, const unsigned h)
{
Steps const steps { 1, 4*int(src_w), 1, -16*int(dst_w) };
Steps const steps { 4*int(src_w), 1, -16*int(dst_w) };

Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y);
Dst_ptr4 dst_ptr4 (dst + 16*int(dst_w)*(w*16 - 1), steps.dst_y);
Expand All @@ -264,7 +265,7 @@ void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned const dst_w,
uint32_t const *src, unsigned const src_w,
unsigned const w, unsigned const h)
{
Steps const steps { 1, 4*int(src_w), 1, 16*int(dst_w) };
Steps const steps { 4*int(src_w), 1, 16*int(dst_w) };

Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y);
Dst_ptr4 dst_ptr4 (dst, steps.dst_y);
Expand All @@ -291,7 +292,7 @@ void Blit::Neon::B2f_flip::r270(uint32_t *dst, unsigned const dst_w,
uint32_t const *src, unsigned const src_w,
unsigned const w, const unsigned h)
{
Steps const steps { 1, -4*int(src_w), 1, -16*int(dst_w) };
Steps const steps { -4*int(src_w), 1, -16*int(dst_w) };

Src_ptr4 src_ptr4 ((uint32x4_t *)src + 4*src_w*(16*h - 1), steps.src_y);
Dst_ptr4 dst_ptr4 (dst + 16*int(dst_w)*(w*16 - 1), steps.dst_y);
Expand Down

0 comments on commit 3f608b7

Please sign in to comment.