Skip to content

Commit 4dbfeb0

Browse files
committed
More optimizations
1 parent 6544567 commit 4dbfeb0

File tree

7 files changed

+16
-24
lines changed

7 files changed

+16
-24
lines changed

include/private/dsp/arch/x86/avx512/hmath/hsum.h

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,8 @@ namespace lsp
244244
(
245245
__ASM_EMIT("vxorps %%zmm0, %%zmm0, %%zmm0")
246246
__ASM_EMIT("vmovaps %[CC], %%zmm6")
247-
__ASM_EMIT("vxorps %%zmm1, %%zmm1, %%zmm1")
248247
__ASM_EMIT("vmovaps %%zmm6, %%zmm7")
248+
__ASM_EMIT("vxorps %%zmm1, %%zmm1, %%zmm1")
249249
/* x128 blocks */
250250
__ASM_EMIT("sub $128, %[count]")
251251
__ASM_EMIT("jb 2f")
@@ -282,35 +282,27 @@ namespace lsp
282282
__ASM_EMIT("sub $32, %[count]")
283283
__ASM_EMIT("jge 3b")
284284
__ASM_EMIT("4:")
285-
__ASM_EMIT("vextractf64x4 $1, %%zmm0, %%ymm2")
286-
__ASM_EMIT("vextractf64x4 $1, %%zmm1, %%ymm3")
287-
__ASM_EMIT("vaddps %%ymm2, %%ymm0, %%ymm0")
288-
__ASM_EMIT("vaddps %%ymm3, %%ymm1, %%ymm1")
285+
__ASM_EMIT("vaddps %%zmm1, %%zmm0, %%zmm0")
289286
/* x16 block */
290287
__ASM_EMIT("add $16, %[count]")
291288
__ASM_EMIT("jl 6f")
292-
__ASM_EMIT("vandps 0x000(%[src]), %%ymm6, %%ymm2")
293-
__ASM_EMIT("vandps 0x020(%[src]), %%ymm7, %%ymm3")
294-
__ASM_EMIT("vaddps %%ymm2, %%ymm0, %%ymm0")
295-
__ASM_EMIT("vaddps %%ymm3, %%ymm1, %%ymm1")
289+
__ASM_EMIT("vandps 0x000(%[src]), %%zmm6, %%zmm2")
290+
__ASM_EMIT("vaddps %%zmm2, %%zmm0, %%zmm0")
296291
__ASM_EMIT("add $0x40, %[src]")
297292
__ASM_EMIT("sub $16, %[count]")
298293
__ASM_EMIT("6:")
299-
__ASM_EMIT("vextractf128 $1, %%ymm0, %%xmm2")
300-
__ASM_EMIT("vextractf128 $1, %%ymm1, %%xmm3")
301-
__ASM_EMIT("vaddps %%xmm2, %%xmm0, %%xmm0")
302-
__ASM_EMIT("vaddps %%xmm3, %%xmm1, %%xmm1")
294+
__ASM_EMIT("vextractf64x4 $1, %%zmm0, %%ymm2")
295+
__ASM_EMIT("vaddps %%ymm2, %%ymm0, %%ymm0")
303296
/* x8 block */
304297
__ASM_EMIT("add $8, %[count]")
305298
__ASM_EMIT("jl 8f")
306-
__ASM_EMIT("vandps 0x000(%[src]), %%xmm6, %%xmm2")
307-
__ASM_EMIT("vandps 0x010(%[src]), %%xmm7, %%xmm3")
308-
__ASM_EMIT("vaddps %%xmm2, %%xmm0, %%xmm0")
309-
__ASM_EMIT("vaddps %%xmm3, %%xmm1, %%xmm1")
299+
__ASM_EMIT("vandps 0x000(%[src]), %%ymm6, %%ymm2")
300+
__ASM_EMIT("vaddps %%ymm2, %%ymm0, %%ymm0")
310301
__ASM_EMIT("add $0x20, %[src]")
311302
__ASM_EMIT("sub $8, %[count]")
312303
__ASM_EMIT("8:")
313-
__ASM_EMIT("vaddps %%xmm1, %%xmm0, %%xmm0")
304+
__ASM_EMIT("vextractf128 $1, %%ymm0, %%xmm2")
305+
__ASM_EMIT("vaddps %%xmm2, %%xmm0, %%xmm0")
314306
/* x4 block */
315307
__ASM_EMIT("add $4, %[count]")
316308
__ASM_EMIT("jl 10f")

src/test/ptest/hmath/h_abs_dotp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ namespace lsp
6969
typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
7070
}
7171

72-
PTEST_BEGIN("dsp.hmath", h_abs_dotp, 5, 5000)
72+
PTEST_BEGIN("dsp.hmath", h_abs_dotp, 2, 10000)
7373

7474
void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
7575
{

src/test/ptest/hmath/h_abs_sum.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ namespace lsp
6868
typedef float (* h_sum_t)(const float *src, size_t count);
6969
}
7070

71-
PTEST_BEGIN("dsp.hmath", h_abs_sum, 5, 5000)
71+
PTEST_BEGIN("dsp.hmath", h_abs_sum, 2, 10000)
7272

7373
void call(const char *label, float *src, size_t count, h_sum_t func)
7474
{

src/test/ptest/hmath/h_dotp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ namespace lsp
6969
typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
7070
}
7171

72-
PTEST_BEGIN("dsp.hmath", h_dotp, 5, 5000)
72+
PTEST_BEGIN("dsp.hmath", h_dotp, 2, 10000)
7373

7474
void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
7575
{

src/test/ptest/hmath/h_sqr_dotp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ namespace lsp
6969
typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
7070
}
7171

72-
PTEST_BEGIN("dsp.hmath", h_sqr_dotp, 5, 5000)
72+
PTEST_BEGIN("dsp.hmath", h_sqr_dotp, 2, 10000)
7373

7474
void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
7575
{

src/test/ptest/hmath/h_sqr_sum.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ namespace lsp
6969
typedef float (* h_sum_t)(const float *src, size_t count);
7070
}
7171

72-
PTEST_BEGIN("dsp.hmath", h_sqr_sum, 5, 5000)
72+
PTEST_BEGIN("dsp.hmath", h_sqr_sum, 2, 10000)
7373

7474
void call(const char *label, float *src, size_t count, h_sum_t func)
7575
{

src/test/ptest/hmath/h_sum.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ namespace lsp
6868
typedef float (* h_sum_t)(const float *src, size_t count);
6969
}
7070

71-
PTEST_BEGIN("dsp.hmath", h_sum, 5, 5000)
71+
PTEST_BEGIN("dsp.hmath", h_sum, 2, 10000)
7272

7373
void call(const char *label, float *src, size_t count, h_sum_t func)
7474
{

0 commit comments

Comments
 (0)