From e0a9e4e145dda2886c842b418571f0217cb2fd60 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 1 Dec 2024 08:34:01 +0000 Subject: [PATCH] Fix: Missing `_mm_cvtsi128_si64x` in Clang --- include/stringzilla/stringzilla.h | 8 ++++---- scripts/test.cpp | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/stringzilla/stringzilla.h b/include/stringzilla/stringzilla.h index fc5ab517..054f02e8 100644 --- a/include/stringzilla/stringzilla.h +++ b/include/stringzilla/stringzilla.h @@ -4022,7 +4022,7 @@ SZ_PUBLIC sz_u64_t sz_checksum_avx2(sz_cptr_t text, sz_size_t length) { __m128i low_xmm = _mm256_castsi256_si128(sums_vec.ymm); __m128i high_xmm = _mm256_extracti128_si256(sums_vec.ymm, 1); __m128i sums_xmm = _mm_add_epi64(low_xmm, high_xmm); - sz_u64_t low = (sz_u64_t)_mm_cvtsi128_si64x(sums_xmm); + sz_u64_t low = (sz_u64_t)_mm_cvtsi128_si64(sums_xmm); sz_u64_t high = (sz_u64_t)_mm_extract_epi64(sums_xmm, 1); sz_u64_t result = low + high; if (length) result += sz_checksum_serial(text, length); @@ -4073,7 +4073,7 @@ SZ_PUBLIC sz_u64_t sz_checksum_avx2(sz_cptr_t text, sz_size_t length) { __m128i low_xmm = _mm256_castsi256_si128(sums_vec.ymm); __m128i high_xmm = _mm256_extracti128_si256(sums_vec.ymm, 1); __m128i sums_xmm = _mm_add_epi64(low_xmm, high_xmm); - sz_u64_t low = (sz_u64_t)_mm_cvtsi128_si64x(sums_xmm); + sz_u64_t low = (sz_u64_t)_mm_cvtsi128_si64(sums_xmm); sz_u64_t high = (sz_u64_t)_mm_extract_epi64(sums_xmm, 1); result += low + high; return result; @@ -5306,7 +5306,7 @@ SZ_PUBLIC sz_u64_t sz_checksum_avx512(sz_cptr_t text, sz_size_t length) { __mmask16 mask = _sz_u16_mask_until(length); text_vec.xmms[0] = _mm_maskz_loadu_epi8(mask, text); sums_vec.xmms[0] = _mm_sad_epu8(text_vec.xmms[0], _mm_setzero_si128()); - sz_u64_t low = (sz_u64_t)_mm_cvtsi128_si64x(sums_vec.xmms[0]); + sz_u64_t low = (sz_u64_t)_mm_cvtsi128_si64(sums_vec.xmms[0]); sz_u64_t high = (sz_u64_t)_mm_extract_epi64(sums_vec.xmms[0], 1); return low + high; } @@ -5318,7 +5318,7 @@ SZ_PUBLIC sz_u64_t sz_checksum_avx512(sz_cptr_t text, sz_size_t length) { __m128i low_xmm = _mm256_castsi256_si128(sums_vec.ymms[0]); __m128i high_xmm = _mm256_extracti128_si256(sums_vec.ymms[0], 1); __m128i sums_xmm = _mm_add_epi64(low_xmm, high_xmm); - sz_u64_t low = (sz_u64_t)_mm_cvtsi128_si64x(sums_xmm); + sz_u64_t low = (sz_u64_t)_mm_cvtsi128_si64(sums_xmm); sz_u64_t high = (sz_u64_t)_mm_extract_epi64(sums_xmm, 1); return low + high; } diff --git a/scripts/test.cpp b/scripts/test.cpp index 104ca032..47ef46d2 100644 --- a/scripts/test.cpp +++ b/scripts/test.cpp @@ -1546,12 +1546,10 @@ int main(int argc, char const **argv) { std::printf("- Uses NEON: %s \n", SZ_USE_ARM_NEON ? "yes" : "no"); std::printf("- Uses SVE: %s \n", SZ_USE_ARM_SVE ? "yes" : "no"); -#if 0 // Basic utilities test_arithmetical_utilities(); test_memory_utilities(); test_replacements(); -#endif // Compatibility with STL #if SZ_DETECT_CPP_17 && __cpp_lib_string_view