Skip to content

Commit 11b1a18

Browse files
authored
Merge pull request #16 from haskell-works/move-loading-of-simd-word-to-outside-of-inner-loop
Move loading of simd word to outside of inner loop
2 parents c595cd6 + e9b959e commit 11b1a18

File tree

1 file changed

+8
-13
lines changed

1 file changed

+8
-13
lines changed

cbits/simd_avx2.c

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -66,21 +66,16 @@ void avx2_cmpeq8_para(
6666
for (i = 0; i < targets_length * 2; ++i) {
6767
size_t j;
6868

69-
for (j = 0; j < bytes_length; ++j) {
70-
uint8_t *target = targets[j];
71-
72-
uint32_t *target32 = (uint32_t *)target;
73-
74-
uint8_t byte = bytes[j];
75-
76-
__m256i v_comparand = _mm256_set1_epi8(byte);
77-
78-
uint32_t *out_mask = (uint32_t*)target;
69+
__m256i v_data_a = *(__m256i *)(source + (i * 32));
7970

80-
__m256i v_data_a = *(__m256i *)(source + (i * 32));
71+
for (j = 0; j < bytes_length; ++j) {
72+
uint8_t *target = targets[j];
73+
uint32_t *target32 = (uint32_t *)target;
74+
__m256i v_comparand = _mm256_set1_epi8(bytes[j]);
75+
uint32_t *out_mask = (uint32_t*)target;
8176
__m256i v_results_a = _mm256_cmpeq_epi8(v_data_a, v_comparand);
82-
uint32_t mask = (uint32_t)_mm256_movemask_epi8(v_results_a);
83-
target32[i] = mask;
77+
uint32_t mask = (uint32_t)_mm256_movemask_epi8(v_results_a);
78+
target32[i] = mask;
8479
}
8580
}
8681
#endif

0 commit comments

Comments
 (0)