Skip to content

Commit a5a78f6

Browse files
xiangwang1fatchanghao
authored andcommitted
simd_utils: fix undefined instruction issue for 32-bit system
fixes github issue #292
1 parent 4437829 commit a5a78f6

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

src/util/simd_utils.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015-2020, Intel Corporation
2+
* Copyright (c) 2015-2021, Intel Corporation
33
*
44
* Redistribution and use in source and binary forms, with or without
55
* modification, are permitted provided that the following conditions are met:
@@ -156,6 +156,16 @@ static really_inline u32 movd(const m128 in) {
156156
return _mm_cvtsi128_si32(in);
157157
}
158158

159+
static really_inline u64a movq(const m128 in) {
160+
#if defined(ARCH_X86_64)
161+
return _mm_cvtsi128_si64(in);
162+
#else // 32-bit - this is horrific
163+
u32 lo = movd(in);
164+
u32 hi = movd(_mm_srli_epi64(in, 32));
165+
return (u64a)hi << 32 | lo;
166+
#endif
167+
}
168+
159169
#if defined(HAVE_AVX512)
160170
static really_inline u32 movd512(const m512 in) {
161171
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
@@ -166,20 +176,10 @@ static really_inline u32 movd512(const m512 in) {
166176
static really_inline u64a movq512(const m512 in) {
167177
// NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in),
168178
// so we use 2-step convertions to work around.
169-
return _mm_cvtsi128_si64(_mm512_castsi512_si128(in));
179+
return movq(_mm512_castsi512_si128(in));
170180
}
171181
#endif
172182

173-
static really_inline u64a movq(const m128 in) {
174-
#if defined(ARCH_X86_64)
175-
return _mm_cvtsi128_si64(in);
176-
#else // 32-bit - this is horrific
177-
u32 lo = movd(in);
178-
u32 hi = movd(_mm_srli_epi64(in, 32));
179-
return (u64a)hi << 32 | lo;
180-
#endif
181-
}
182-
183183
/* another form of movq */
184184
static really_inline
185185
m128 load_m128_from_u64a(const u64a *p) {
@@ -791,7 +791,7 @@ m128 movdq_lo(m256 x) {
791791
#define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed)
792792
#define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
793793
#define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
794-
#define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a))
794+
#define extractlow64from256(a) movq(cast256to128(a))
795795
#define extractlow32from256(a) movd(cast256to128(a))
796796
#define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b)
797797
#define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b)

0 commit comments

Comments
 (0)