11/*
2- * Copyright (c) 2015-2020 , Intel Corporation
2+ * Copyright (c) 2015-2021 , Intel Corporation
33 *
44 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions are met:
@@ -156,6 +156,16 @@ static really_inline u32 movd(const m128 in) {
156156 return _mm_cvtsi128_si32 (in );
157157}
158158
159+ static really_inline u64a movq (const m128 in ) {
160+ #if defined(ARCH_X86_64 )
161+ return _mm_cvtsi128_si64 (in );
162+ #else // 32-bit - this is horrific
163+ u32 lo = movd (in );
164+ u32 hi = movd (_mm_srli_epi64 (in , 32 ));
165+ return (u64a )hi << 32 | lo ;
166+ #endif
167+ }
168+
159169#if defined(HAVE_AVX512 )
160170static really_inline u32 movd512 (const m512 in ) {
161171 // NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
@@ -166,20 +176,10 @@ static really_inline u32 movd512(const m512 in) {
166176static really_inline u64a movq512 (const m512 in ) {
167177 // NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in),
168178 // so we use 2-step convertions to work around.
169- return _mm_cvtsi128_si64 (_mm512_castsi512_si128 (in ));
179+ return movq (_mm512_castsi512_si128 (in ));
170180}
171181#endif
172182
173- static really_inline u64a movq (const m128 in ) {
174- #if defined(ARCH_X86_64 )
175- return _mm_cvtsi128_si64 (in );
176- #else // 32-bit - this is horrific
177- u32 lo = movd (in );
178- u32 hi = movd (_mm_srli_epi64 (in , 32 ));
179- return (u64a )hi << 32 | lo ;
180- #endif
181- }
182-
183183/* another form of movq */
184184static really_inline
185185m128 load_m128_from_u64a (const u64a * p ) {
@@ -791,7 +791,7 @@ m128 movdq_lo(m256 x) {
791791#define lshift128_m256 (a , count_immed ) _mm256_slli_si256(a, count_immed)
792792#define extract64from256 (a , imm ) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
793793#define extract32from256 (a , imm ) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
794- #define extractlow64from256 (a ) _mm_cvtsi128_si64 (cast256to128(a))
794+ #define extractlow64from256 (a ) movq (cast256to128(a))
795795#define extractlow32from256 (a ) movd(cast256to128(a))
796796#define interleave256hi (a , b ) _mm256_unpackhi_epi8(a, b)
797797#define interleave256lo (a , b ) _mm256_unpacklo_epi8(a, b)
0 commit comments