Skip to content

Commit

Permalink
Improved performance of logarithm values calculations
Browse files Browse the repository at this point in the history
  • Loading branch information
sadko4u committed Nov 25, 2023
1 parent 6d416ad commit 34d96b3
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
=== 1.0.20 ===
* Optimization of compressor and gate functions using AVX-512 instruction set.
* Introduced SIMD-optimized expander curve and gain functions.
* Improved performance of logarithm values calculations.
* Updated build scripts.

=== 1.0.19 ===
Expand Down
33 changes: 9 additions & 24 deletions include/private/dsp/arch/aarch64/asimd/pmath/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,12 @@ namespace lsp
{
LSP_DSP_VEC4(0x007fffff), // MM = frac
LSP_DSP_VEC4(0x0000007f), // ME = 127
LSP_DSP_VEC4(0x3d888889), // C0 = 1/15 = 0.0666666701436043
LSP_DSP_VEC4(0x3d9d89d9), // C1 = 1/13 = 0.0769230797886848
LSP_DSP_VEC4(0x3dba2e8c), // C2 = 1/11 = 0.0909090936183929
LSP_DSP_VEC4(0x3de38e39), // C3 = 1/9 = 0.1111111119389534
LSP_DSP_VEC4(0x3e124925), // C4 = 1/7 = 0.1428571492433548
LSP_DSP_VEC4(0x3e4ccccd), // C5 = 1/5 = 0.2000000029802322
LSP_DSP_VEC4(0x3eaaaaab), // C6 = 1/3 = 0.3333333432674408
LSP_DSP_VEC4(0x3f800000), // C7 = 1.0f
LSP_DSP_VEC4(0x3dba2e8c), // C0 = 1/11 = 0.0909090936183929
LSP_DSP_VEC4(0x3de38e39), // C1 = 1/9 = 0.1111111119389534
LSP_DSP_VEC4(0x3e124925), // C2 = 1/7 = 0.1428571492433548
LSP_DSP_VEC4(0x3e4ccccd), // C3 = 1/5 = 0.2000000029802322
LSP_DSP_VEC4(0x3eaaaaab), // C4 = 1/3 = 0.3333333432674408
LSP_DSP_VEC4(0x3f800000), // C5 = 1.0f
};

static const float LOGB_C[] __lsp_aligned16 =
Expand Down Expand Up @@ -115,15 +113,7 @@ namespace lsp
__ASM_EMIT("fmul v7.4s, v7.4s, v5.4s") \
__ASM_EMIT("fadd v6.4s, v6.4s, " C5 ".4s") /* v6 = C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y)))) */ \
__ASM_EMIT("fadd v7.4s, v7.4s, " C5 ".4s") \
__ASM_EMIT("fmul v6.4s, v6.4s, v4.4s") /* v6 = Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))))) */ \
__ASM_EMIT("fmul v7.4s, v7.4s, v5.4s") \
__ASM_EMIT("fadd v6.4s, v6.4s, " C6 ".4s") /* v6 = C6+Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))))) */ \
__ASM_EMIT("fadd v7.4s, v7.4s, " C6 ".4s") \
__ASM_EMIT("fmul v6.4s, v6.4s, v4.4s") /* v6 = Y*(C6+Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y)))))) */ \
__ASM_EMIT("fmul v7.4s, v7.4s, v5.4s") \
__ASM_EMIT("fadd v6.4s, v6.4s, " C7 ".4s") /* v6 = C7+Y*(C6+Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y)))))) */ \
__ASM_EMIT("fadd v7.4s, v7.4s, " C7 ".4s") \
__ASM_EMIT("fmul v0.4s, v0.4s, v6.4s") /* v0 = y*(C7+Y*(C6+Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))))))) */ \
__ASM_EMIT("fmul v0.4s, v0.4s, v6.4s") /* v0 = y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))))) */ \
__ASM_EMIT("fmul v1.4s, v1.4s, v7.4s") \
/* v0 = y*L, v2 = R */

Expand Down Expand Up @@ -154,20 +144,15 @@ namespace lsp
__ASM_EMIT("fadd v6.4s, v6.4s, " C4 ".4s") /* v6 = C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))) */ \
__ASM_EMIT("fmul v6.4s, v6.4s, v4.4s") /* v6 = Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y)))) */ \
__ASM_EMIT("fadd v6.4s, v6.4s, " C5 ".4s") /* v6 = C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y)))) */ \
__ASM_EMIT("fmul v6.4s, v6.4s, v4.4s") /* v6 = Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))))) */ \
__ASM_EMIT("fadd v6.4s, v6.4s, " C6 ".4s") /* v6 = C6+Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))))) */ \
__ASM_EMIT("fmul v6.4s, v6.4s, v4.4s") /* v6 = Y*(C6+Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y)))))) */ \
__ASM_EMIT("fadd v6.4s, v6.4s, " C7 ".4s") /* v6 = C7+Y*(C6+Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y)))))) */ \
__ASM_EMIT("fmul v0.4s, v0.4s, v6.4s") /* v0 = y*(C7+Y*(C6+Y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))))))) */ \
__ASM_EMIT("fmul v0.4s, v0.4s, v6.4s") /* v0 = y*(C5+Y*(C4+Y*(C3+Y*(C2+Y*(C1+C0*Y))))) */ \
/* v0 = y*L, v2 = R */

#define LOGN_CORE_LOAD \
__ASM_EMIT("ldp q26, q27, [%[LOGC]]") \
__ASM_EMIT("ldp q16, q17, [%[L2C], #0x00]") /* v16 = MM, v17 = ME */ \
__ASM_EMIT("ldp q18, q19, [%[L2C], #0x20]") /* v18 = C0, v19 = C1 */ \
__ASM_EMIT("ldp q20, q21, [%[L2C], #0x40]") /* v20 = C2, v21 = C3 */ \
__ASM_EMIT("ldp q22, q23, [%[L2C], #0x60]") /* v22 = C4, v23 = C5 */ \
__ASM_EMIT("ldp q24, q25, [%[L2C], #0x80]") /* v24 = C6, v25 = C7 */
__ASM_EMIT("ldp q22, q23, [%[L2C], #0x60]") /* v22 = C4, v23 = C5 */

#define LOGB_CORE_X8_NOLOAD \
/* in: v0 = x1, v1 = x2 */ \
Expand Down

0 comments on commit 34d96b3

Please sign in to comment.