Skip to content

Commit

Permalink
Added SSE, AVX and AVX512 optimizations for depan_lin and depan_eqpow…
Browse files Browse the repository at this point in the history
… functions
  • Loading branch information
sadko4u committed Nov 27, 2024
1 parent 95d5373 commit 3a7a00f
Show file tree
Hide file tree
Showing 11 changed files with 1,456 additions and 2 deletions.
4 changes: 2 additions & 2 deletions include/private/dsp/arch/generic/pan.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace lsp
const float sl = fabsf(l[i]);
const float sr = fabsf(r[i]);
const float den = sl + sr;
dst[i] = (den > 1e-18f) ? sr / den : dfl;
dst[i] = (den >= 1e-18f) ? sr / den : dfl;
}
}

Expand All @@ -51,7 +51,7 @@ namespace lsp
const float sl = l[i] * l[i];
const float sr = r[i] * r[i];
const float den = sl + sr;
dst[i] = (den > 1e-36f) ? sr / den : dfl;
dst[i] = (den >= 1e-36f) ? sr / den : dfl;
}
}

Expand Down
363 changes: 363 additions & 0 deletions include/private/dsp/arch/x86/avx/pan.h

Large diffs are not rendered by default.

296 changes: 296 additions & 0 deletions include/private/dsp/arch/x86/avx512/pan.h

Large diffs are not rendered by default.

281 changes: 281 additions & 0 deletions include/private/dsp/arch/x86/sse/pan.h

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions src/main/x86/avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include <private/dsp/arch/x86/avx/hmath/hdotp.h>

#include <private/dsp/arch/x86/avx/mix.h>
#include <private/dsp/arch/x86/avx/pan.h>
#include <private/dsp/arch/x86/avx/search/minmax.h>

#include <private/dsp/arch/x86/avx/fft.h>
Expand Down Expand Up @@ -333,6 +334,9 @@
CEXPORT1(favx, mix_copy4);
CEXPORT1(favx, mix_add4);

CEXPORT1(favx, depan_lin);
CEXPORT1(favx, depan_eqpow);

CEXPORT1(favx, min);
CEXPORT1(favx, max);
CEXPORT1(favx, minmax);
Expand Down Expand Up @@ -506,6 +510,8 @@
CEXPORT2(favx, dyn_biquad_process_x2, dyn_biquad_process_x2_fma3);
CEXPORT2(favx, dyn_biquad_process_x4, dyn_biquad_process_x4_fma3);
CEXPORT2(ffma, dyn_biquad_process_x8, dyn_biquad_process_x8_fma3);

CEXPORT2(favx, depan_eqpow, depan_eqpow_fma3);
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/main/x86/avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include <private/dsp/arch/x86/avx512/pcomplex.h>
#include <private/dsp/arch/x86/avx512/pmath.h>
#include <private/dsp/arch/x86/avx512/search.h>
#include <private/dsp/arch/x86/avx512/pan.h>

#include <private/dsp/arch/x86/avx512/correlation.h>
#undef PRIVATE_DSP_ARCH_X86_AVX512_IMPL
Expand Down Expand Up @@ -303,6 +304,9 @@

CEXPORT1(vl, corr_init);
CEXPORT1(vl, corr_incr);

CEXPORT1(vl, depan_lin);
CEXPORT1(vl, depan_eqpow);
}
} /* namespace avx2 */
} /* namespace lsp */
Expand Down
4 changes: 4 additions & 0 deletions src/main/x86/sse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include <private/dsp/arch/x86/sse/hmath/hdotp.h>

#include <private/dsp/arch/x86/sse/mix.h>
#include <private/dsp/arch/x86/sse/pan.h>

#include <private/dsp/arch/x86/sse/search/minmax.h>

Expand Down Expand Up @@ -282,6 +283,9 @@
EXPORT1(mix_copy4);
EXPORT1(mix_add4);

EXPORT1(depan_lin);
EXPORT1(depan_eqpow);

EXPORT1(reverse1);
EXPORT1(reverse2);

Expand Down
125 changes: 125 additions & 0 deletions src/test/ptest/pan/depan_eqpow.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
*
* This file is part of lsp-dsp-lib
* Created on: 27 нояб. 2024 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* lsp-dsp-lib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
*/

#include <lsp-plug.in/common/alloc.h>
#include <lsp-plug.in/common/types.h>
#include <lsp-plug.in/dsp/dsp.h>
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/test-fw/ptest.h>

#define MIN_RANK 8
#define MAX_RANK 16

namespace lsp
{
namespace generic
{
void depan_eqpow(float *dst, const float *l, const float *r, float dfl, size_t count);
}

IF_ARCH_X86(
namespace sse
{
void depan_eqpow(float *dst, const float *l, const float *r, float dfl, size_t count);
}

namespace avx
{
void depan_eqpow(float *dst, const float *l, const float *r, float dfl, size_t count);
void depan_eqpow_fma3(float *dst, const float *l, const float *r, float dfl, size_t count);
}

namespace avx512
{
void depan_eqpow(float *dst, const float *l, const float *r, float dfl, size_t count);
}
)

IF_ARCH_ARM(
namespace neon_d32
{
// void depan_eqpow(float *dst, const float *l, const float *r, float dfl, size_t count);
}
)

IF_ARCH_AARCH64(
namespace asimd
{
// void depan_eqpow(float *dst, const float *l, const float *r, float dfl, size_t count);
}
)

typedef void (* depan_t) (float *dst, const float *l, const float *r, float dfl, size_t count);
}

//-----------------------------------------------------------------------------
// Performance test
PTEST_BEGIN("dsp.pan", depan_eqpow, 5, 1000)

void call(const char *label, float *dst, const float *src1, const float *src2, size_t count, depan_t func)
{
if (!PTEST_SUPPORTED(func))
return;

char buf[80];
sprintf(buf, "%s x %d", label, int(count));
printf("Testing %s numbers...\n", buf);

PTEST_LOOP(buf,
func(dst, src1, src2, 0.5f, count);
);
}

PTEST_MAIN
{
size_t buf_size = 1 << MAX_RANK;
uint8_t *data = NULL;
float *dst = alloc_aligned<float>(data, buf_size * 3, 64);
float *src1 = &dst[buf_size];
float *src2 = &src1[buf_size];

randomize_sign(src1, buf_size);
randomize_sign(src2, buf_size);

#define CALL(func) \
call(#func, dst, src1, src2, count, func);

for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
{
size_t count = 1 << i;

CALL(generic::depan_eqpow);
IF_ARCH_X86(CALL(sse::depan_eqpow));
IF_ARCH_X86(CALL(avx::depan_eqpow));
IF_ARCH_X86(CALL(avx::depan_eqpow_fma3));
IF_ARCH_X86(CALL(avx512::depan_eqpow));
// IF_ARCH_ARM(CALL(neon_d32::depan_eqpow));
// IF_ARCH_AARCH64(CALL(asimd::depan_eqpow));
PTEST_SEPARATOR;
}

free_aligned(data);
}
PTEST_END




123 changes: 123 additions & 0 deletions src/test/ptest/pan/depan_lin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
*
* This file is part of lsp-dsp-lib
* Created on: 27 нояб. 2024 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* lsp-dsp-lib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
*/

#include <lsp-plug.in/common/alloc.h>
#include <lsp-plug.in/common/types.h>
#include <lsp-plug.in/dsp/dsp.h>
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/test-fw/ptest.h>

#define MIN_RANK 8
#define MAX_RANK 16

namespace lsp
{
namespace generic
{
void depan_lin(float *dst, const float *l, const float *r, float dfl, size_t count);
}

IF_ARCH_X86(
namespace sse
{
void depan_lin(float *dst, const float *l, const float *r, float dfl, size_t count);
}

namespace avx
{
void depan_lin(float *dst, const float *l, const float *r, float dfl, size_t count);
}

namespace avx512
{
void depan_lin(float *dst, const float *l, const float *r, float dfl, size_t count);
}
)

IF_ARCH_ARM(
namespace neon_d32
{
// void depan_lin(float *dst, const float *l, const float *r, float dfl, size_t count);
}
)

IF_ARCH_AARCH64(
namespace asimd
{
// void depan_lin(float *dst, const float *l, const float *r, float dfl, size_t count);
}
)

typedef void (* depan_t) (float *dst, const float *l, const float *r, float dfl, size_t count);
}

//-----------------------------------------------------------------------------
// Performance test
PTEST_BEGIN("dsp.pan", depan_lin, 5, 1000)

void call(const char *label, float *dst, const float *src1, const float *src2, size_t count, depan_t func)
{
if (!PTEST_SUPPORTED(func))
return;

char buf[80];
sprintf(buf, "%s x %d", label, int(count));
printf("Testing %s numbers...\n", buf);

PTEST_LOOP(buf,
func(dst, src1, src2, 0.5f, count);
);
}

PTEST_MAIN
{
size_t buf_size = 1 << MAX_RANK;
uint8_t *data = NULL;
float *dst = alloc_aligned<float>(data, buf_size * 3, 64);
float *src1 = &dst[buf_size];
float *src2 = &src1[buf_size];

randomize_sign(src1, buf_size);
randomize_sign(src2, buf_size);

#define CALL(func) \
call(#func, dst, src1, src2, count, func);

for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
{
size_t count = 1 << i;

CALL(generic::depan_lin);
IF_ARCH_X86(CALL(sse::depan_lin));
IF_ARCH_X86(CALL(avx::depan_lin));
IF_ARCH_X86(CALL(avx512::depan_lin));
// IF_ARCH_ARM(CALL(neon_d32::depan_lin));
// IF_ARCH_AARCH64(CALL(asimd::depan_lin));
PTEST_SEPARATOR;
}

free_aligned(data);
}
PTEST_END




Loading

0 comments on commit 3a7a00f

Please sign in to comment.