From 29e2794651c50ccf60a28c2e08639913a68cd71c Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Wed, 26 Nov 2025 17:05:45 +0200 Subject: [PATCH 01/25] add tests that should pass: clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp --- .../constexpr-x86-intrinsics-pd2ps.cpp | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp new file mode 100644 index 0000000000000..a082b23bfae03 --- /dev/null +++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp @@ -0,0 +1,120 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s + +// HACK: Prevent immintrin.h from pulling in standard library headers +// that don't exist in this test environment. +#define __MM_MALLOC_H + +#include + +namespace ExactFinite { +constexpr __m128d d2 = { -1.0, +2.0 }; +constexpr __m128 r128 = _mm_cvtpd_ps(d2); +static_assert(r128[0] == -1.0f && r128[1] == +2.0f, ""); +static_assert(r128[2] == 0.0f && r128[3] == 0.0f, ""); + +constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2); +static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, ""); +static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, ""); + +constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2); +static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, ""); + +constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2); +static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, ""); +static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, ""); + +constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 }; +constexpr __m128 r256 = _mm256_cvtpd_ps(d4); +static_assert(r256[0] == 0.0f && r256[1] == -1.0f, ""); +static_assert(r256[2] == +2.0f && r256[3] == +3.5f, ""); + +constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r512 = _mm512_cvtpd_ps(d8); +static_assert(r512[0] == -1.0f && r512[7] == +128.0f, ""); + +constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8); +static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, ""); +static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, ""); + +constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8); +static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, ""); +static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, ""); + +constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8); +static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, ""); +static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, ""); + +constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8); +static_assert(r512lo_mask[0] == -1.0f, ""); +static_assert(r512lo_mask[1] == +2.0f, ""); +static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, ""); + +constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128d b_ss = { -1.0, 42.0 }; +constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss); +static_assert(r_ss[0] == -1.0f, ""); +static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, ""); + +constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss); +static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, ""); +constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss); +static_assert(r_ss_mask_off[0] == 9.0f, ""); +constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss); +static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, ""); +} + +namespace InexactOrSpecialReject { +constexpr __m128d inexact = { 1.0000000000000002, 0.0 }; +constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an integral constant expression}} +static_assert(r_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}} + +constexpr __m128d dinf = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral constant expression}} +static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression not valid in a constant expression}} + +constexpr __m128d dnan = { __builtin_nan(""), 0.0 }; +constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral constant expression}} +static_assert(r_nan[0] != r_nan[0], ""); // both-note {{subexpression not valid in a constant expression}} + +constexpr __m128d dsub = { 1e-310, 0.0 }; +constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral constant expression}} +static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in a constant expression}} + +constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 }; +constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // both-error {{not an integral constant expression}} +static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}} +} + +namespace MaskedSpecialCasesAllowed { +constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 }; +constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact); +static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, ""); + +constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact); +static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, ""); + +constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 }; +constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact); +static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && ok256m[3] == 9.0f, ""); + +constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact); +static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && ok256z[3] == 0.0f, ""); + +constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 1.0000000000000002, 64.0, 128.0 }; +constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact); +static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, ""); + +constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact); +static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, ""); + +constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // both-error {{not an integral constant expression}} +static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid in a constant expression}} +} From 30c0dc75714191e31625bb074e6e62d54aeece7f Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Wed, 26 Nov 2025 22:20:48 +0200 Subject: [PATCH 02/25] added __DEFAULT_FN_ATTRS_CONSTEXPR To Headers --- clang/lib/Headers/avx512fintrin.h | 16 ++++++++-------- clang/lib/Headers/avx512vlintrin.h | 8 ++++---- clang/lib/Headers/avxintrin.h | 4 ++-- clang/lib/Headers/emmintrin.h | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e1de56069870b..b9f1d1eecc09f 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -207,7 +207,7 @@ _mm512_undefined(void) return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_undefined_ps(void) { return (__m512)__builtin_ia32_undef512(); @@ -3489,7 +3489,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) { (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m256 __DEFAULT_FN_ATTRS512 +static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps (__m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3498,7 +3498,7 @@ _mm512_cvtpd_ps (__m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS512 +static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3507,7 +3507,7 @@ _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS512 +static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3516,7 +3516,7 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_pslo (__m512d __A) { return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), @@ -3524,7 +3524,7 @@ _mm512_cvtpd_pslo (__m512d __A) 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) { return (__m512) __builtin_shufflevector ( @@ -8672,7 +8672,7 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, @@ -8681,7 +8681,7 @@ _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 99c057030a4cc..82a06edd28ba2 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -1791,14 +1791,14 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { (__v4si)_mm_setzero_si128()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) @@ -1806,14 +1806,14 @@ _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 44ef88db5cbce..f3f444083edbf 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2190,7 +2190,7 @@ _mm256_cvtepi32_ps(__m256i __a) { /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 128-bit vector of [4 x float] containing the converted values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtpd_ps(__m256d __a) { return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); @@ -3610,7 +3610,7 @@ _mm256_undefined_pd(void) /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit vector of [8 x float] containing undefined values. -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_undefined_ps(void) { return (__m256)__builtin_ia32_undef256(); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index dbe5ca0379cf5..1701effedc5ce 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -1279,7 +1279,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted values. The upper 64 bits are set to zero. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtpd_ps(__m128d __a) { return __builtin_ia32_cvtpd2ps((__v2df)__a); } @@ -1384,7 +1384,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the /// converted value from the second parameter. The upper 96 bits are copied /// from the upper 96 bits of the first parameter. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsd_ss(__m128 __a, __m128d __b) { return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); } From 9f1020ecf3a706df9537b38464b61748aa0278f0 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Wed, 26 Nov 2025 22:24:54 +0200 Subject: [PATCH 03/25] added Constexpr to necessary builtins --- clang/include/clang/Basic/BuiltinsX86.td | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 4aa3d51931980..283a0a3e6ae0c 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -24,12 +24,12 @@ def undef128 : X86Builtin<"_Vector<2, double>()"> { let Attributes = [Const, NoThrow, RequiredVectorWidth<128>]; } -def undef256 : X86Builtin<"_Vector<4, double>()"> { - let Attributes = [Const, NoThrow, RequiredVectorWidth<256>]; +def undef256 : X86Builtin<"_Vector<4, double>()" > { + let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>]; } def undef512 : X86Builtin<"_Vector<8, double>()"> { - let Attributes = [Const, NoThrow, RequiredVectorWidth<512>]; + let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>]; } // FLAGS @@ -168,7 +168,7 @@ let Features = "sse2", Attributes = [NoThrow] in { def movnti : X86Builtin<"void(int *, int)">; } -let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; @@ -468,7 +468,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">; def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; @@ -1009,7 +1009,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128 def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">; def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">; def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">; @@ -1457,7 +1457,7 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in { def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">; def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">; def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">; @@ -3301,7 +3301,7 @@ let Features = "avx512bw,avx512vl", def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">; def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">; def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">; From d28d6d8c7cc6e816f772a78dd0d177f0248d3178 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Sat, 29 Nov 2025 11:08:42 +0200 Subject: [PATCH 04/25] added FULL tests for pd2ps constexpr --- .../constexpr-x86-intrinsics-pd2ps.cpp | 559 ++++++++++++++---- 1 file changed, 459 insertions(+), 100 deletions(-) diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp index a082b23bfae03..4a1e9a9c5ae2c 100644 --- a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp +++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp @@ -1,120 +1,479 @@ // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s -// HACK: Prevent immintrin.h from pulling in standard library headers -// that don't exist in this test environment. -#define __MM_MALLOC_H - +#define __MM_MALLOC_H #include -namespace ExactFinite { -constexpr __m128d d2 = { -1.0, +2.0 }; -constexpr __m128 r128 = _mm_cvtpd_ps(d2); -static_assert(r128[0] == -1.0f && r128[1] == +2.0f, ""); -static_assert(r128[2] == 0.0f && r128[3] == 0.0f, ""); - -constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2); -static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, ""); -static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, ""); - -constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2); -static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, ""); - -constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2); -static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, ""); -static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, ""); - -constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 }; -constexpr __m128 r256 = _mm256_cvtpd_ps(d4); -static_assert(r256[0] == 0.0f && r256[1] == -1.0f, ""); -static_assert(r256[2] == +2.0f && r256[3] == +3.5f, ""); - -constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m256 r512 = _mm512_cvtpd_ps(d8); -static_assert(r512[0] == -1.0f && r512[7] == +128.0f, ""); - -constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8); -static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, ""); -static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, ""); - -constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8); -static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, ""); -static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, ""); - -constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8); -static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, ""); -static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, ""); - -constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, - 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8); -static_assert(r512lo_mask[0] == -1.0f, ""); -static_assert(r512lo_mask[1] == +2.0f, ""); -static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, ""); - -constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128d b_ss = { -1.0, 42.0 }; -constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss); -static_assert(r_ss[0] == -1.0f, ""); -static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, ""); +namespace Test_mm_cvtsd_ss { +namespace OK { +constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, ""); +} +namespace Inexact { +constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d b = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}} +} +namespace Inf { +constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d b = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}} +} +namespace NaN { +constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d b = { __builtin_nan(""), 0.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}} +} +namespace Subnormal { +constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d b = { 1e-310, 0.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}} +} +} -constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss); -static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, ""); -constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss); -static_assert(r_ss_mask_off[0] == 9.0f, ""); -constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss); -static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, ""); +namespace Test_mm_mask_cvtsd_ss { +namespace OK { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b); +static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, ""); +} +namespace MaskOff { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b); +static_assert(r[0] == 9.0f && r[1] == 2.0f, ""); +} +namespace MaskOffInexact { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact); +static_assert(r[0] == 9.0f, ""); +} +namespace MaskOnInexact { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}} +} +namespace MaskOnInf { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}} +} +namespace MaskOnNaN { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_nan = { __builtin_nan(""), 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}} +} +namespace MaskOnSubnormal { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_sub = { 1e-310, 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}} +} } -namespace InexactOrSpecialReject { -constexpr __m128d inexact = { 1.0000000000000002, 0.0 }; -constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an integral constant expression}} -static_assert(r_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}} +namespace Test_mm_maskz_cvtsd_ss { +namespace OK { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b); +static_assert(r[0] == -1.0f && r[1] == 2.0f, ""); +} +namespace MaskOff { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b); +static_assert(r[0] == 0.0f && r[1] == 2.0f, ""); +} +namespace MaskOffInexact { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b_inexact); +static_assert(r[0] == 0.0f, ""); +} +namespace MaskOnInf { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}} +} +namespace MaskOnNaN { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_nan = { __builtin_nan(""), 0.0 }; +constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}} +} +} -constexpr __m128d dinf = { __builtin_huge_val(), 0.0 }; -constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral constant expression}} -static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression not valid in a constant expression}} +namespace Test_mm_cvtpd_ps { +namespace OK { +constexpr __m128d a = { -1.0, +2.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +static_assert(r[0] == -1.0f && r[1] == +2.0f, ""); +static_assert(r[2] == 0.0f && r[3] == 0.0f, ""); +} +namespace Inexact { +constexpr __m128d a = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}} +} +namespace Inf { +constexpr __m128d a = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}} +} +namespace NaN { +constexpr __m128d a = { __builtin_nan(""), 0.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}} +} +namespace Subnormal { +constexpr __m128d a = { 1e-310, 0.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}} +} +} -constexpr __m128d dnan = { __builtin_nan(""), 0.0 }; -constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral constant expression}} -static_assert(r_nan[0] != r_nan[0], ""); // both-note {{subexpression not valid in a constant expression}} +namespace Test_mm_mask_cvtpd_ps { +namespace OK { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a = { -1.0, +2.0 }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x3, a); +static_assert(r[0] == -1.0f && r[1] == +2.0f, ""); +static_assert(r[2] == 9.0f && r[3] == 9.0f, ""); +} +namespace Partial { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a = { -1.0, +2.0 }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a); +static_assert(r[0] == -1.0f && r[1] == 9.0f, ""); +} +namespace MaskOffInexact { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a_inexact); +static_assert(r[0] == -1.0f && r[1] == 9.0f, ""); +} +namespace MaskOnInexact { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}} +} +namespace MaskOnInf { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a_inf = { -1.0, __builtin_huge_val() }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}} +} +namespace MaskOnNaN { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a_nan = { -1.0, __builtin_nan("") }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}} +} +} -constexpr __m128d dsub = { 1e-310, 0.0 }; -constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral constant expression}} -static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in a constant expression}} +namespace Test_mm_maskz_cvtpd_ps { +namespace OK { +constexpr __m128d a = { -1.0, +2.0 }; +constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a); +static_assert(r[0] == -1.0f && r[1] == 0.0f, ""); +static_assert(r[2] == 0.0f && r[3] == 0.0f, ""); +} +namespace MaskOffInexact { +constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; +constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a_inexact); +static_assert(r[0] == -1.0f && r[1] == 0.0f, ""); +} +namespace MaskOnInf { +constexpr __m128d a_inf = { -1.0, __builtin_huge_val() }; +constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}} +} +namespace MaskOnNaN { +constexpr __m128d a_nan = { -1.0, __builtin_nan("") }; +constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}} +} +} -constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f }; -constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 }; -constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // both-error {{not an integral constant expression}} -static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}} +namespace Test_mm256_cvtpd_ps { +namespace OK { +constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; +constexpr __m128 r = _mm256_cvtpd_ps(a); +static_assert(r[0] == 0.0f && r[1] == -1.0f, ""); +static_assert(r[2] == +2.0f && r[3] == +3.5f, ""); +} +namespace Inexact { +constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 }; +constexpr __m128 r = _mm256_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}} +} } -namespace MaskedSpecialCasesAllowed { -constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 }; -constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact); -static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, ""); +namespace Test_mm256_mask_cvtpd_ps { +namespace OK { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; +constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0xF, a); +static_assert(r[0] == 0.0f && r[1] == -1.0f && r[2] == +2.0f && r[3] == +3.5f, ""); +} +namespace MaskOffInf { +// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking +// So even masked-off Inf/NaN values cause errors (architectural limitation) +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 }; +constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} +// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} +} +namespace MaskOffNaN { +// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking +// So even masked-off Inf/NaN values cause errors (architectural limitation) +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") }; +constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} +// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} +} +} -constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact); -static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, ""); +namespace Test_mm256_maskz_cvtpd_ps { +namespace OK { +constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; +constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x5, a); +static_assert(r[0] == 0.0f && r[1] == 0.0f && r[2] == +2.0f && r[3] == 0.0f, ""); +} +namespace MaskOffInf { +// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking +// So even masked-off Inf/NaN values cause errors (architectural limitation) +constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 }; +constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} +// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} +} +namespace MaskOffNaN { +// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking +// So even masked-off Inf/NaN values cause errors (architectural limitation) +constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") }; +constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} +// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} +} +} -constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 }; -constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact); -static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && ok256m[3] == 9.0f, ""); +namespace Test_mm512_cvtpd_ps { +namespace OK { +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_cvtpd_ps(a); +static_assert(r[0] == -1.0f && r[7] == +128.0f, ""); +} +namespace Inexact { +constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +constexpr __m256 r = _mm512_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}} +} +} -constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact); -static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && ok256z[3] == 0.0f, ""); +namespace Test_mm512_mask_cvtpd_ps { +namespace OK { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x05, a); +static_assert(r[0] == -1.0f && r[2] == +4.0f, ""); +static_assert(r[1] == 9.0f && r[3] == 9.0f, ""); +} +namespace MaskOffInexact { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 }; +constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact); +static_assert(r[0] == -1.0f && r[5] == 9.0f && r[6] == 64.0f && r[7] == 128.0f, ""); +} +namespace MaskOffInf { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 }; +constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_inf); +static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, ""); +} +namespace MaskOffNaN { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 }; +constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_nan); +static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, ""); +} +namespace MaskOnInf { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +namespace MaskOnNaN { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +} -constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 1.0000000000000002, 64.0, 128.0 }; -constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact); -static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, ""); +namespace Test_mm512_maskz_cvtpd_ps { +namespace OK { +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x81, a); +static_assert(r[0] == -1.0f && r[7] == +128.0f, ""); +static_assert(r[1] == 0.0f && r[6] == 0.0f, ""); +} +namespace MaskOffInexact { +constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 }; +constexpr __m256 r = _mm512_maskz_cvtpd_ps(0b11011111, a_inexact); +static_assert(r[0] == -1.0f && r[5] == 0.0f && r[6] == 64.0f && r[7] == 128.0f, ""); +} +namespace MaskOffInf { +constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 }; +constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_inf); +static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, ""); +} +namespace MaskOffNaN { +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 }; +constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_nan); +static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, ""); +} +namespace MaskOnInf { +constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +namespace MaskOnNaN { +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +} -constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact); -static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, ""); +namespace Test_mm512_cvtpd_pslo { +namespace OK { +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m512 r = _mm512_cvtpd_pslo(a); +static_assert(r[0] == -1.0f && r[7] == +128.0f, ""); +static_assert(r[8] == 0.0f && r[15] == 0.0f, ""); +} +} -constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // both-error {{not an integral constant expression}} -static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid in a constant expression}} +namespace Test_mm512_mask_cvtpd_pslo { +namespace OK { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a); +static_assert(r[0] == -1.0f && r[1] == +2.0f, ""); +static_assert(r[2] == 9.0f && r[3] == 9.0f, ""); +} +namespace MaskOffInf { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a_inf); +static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 9.0f, ""); +} +namespace MaskOffNaN { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x7, a_nan); +static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 4.0f && r[3] == 9.0f, ""); +} +namespace MaskOnInf { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +namespace MaskOnNaN { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} } From 4a2f59bee574efec48ac87e74dae356dc72fb2ae Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Sat, 29 Nov 2025 11:09:57 +0200 Subject: [PATCH 05/25] fully implmeneted features in ExprConstant visiting logic --- clang/lib/AST/ExprConstant.cpp | 143 +++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3b91678f7d400..065d5c2e33a9c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12165,7 +12165,37 @@ static bool evalShuffleGeneric( Out = APValue(ResultElements.data(), ResultElements.size()); return true; } +static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E, + APFloat OrigVal, APValue &Result) { + if (OrigVal.isInfinity()) { + Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0; + return false; + } + if (OrigVal.isNaN()) { + Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 1; + return false; + } + + APFloat Val = OrigVal; + bool LosesInfo = false; + APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(), + APFloat::rmNearestTiesToEven, + &LosesInfo); + + if(LosesInfo || Val.isDenormal()) { + Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict); + return false; + } + + if(Status != APFloat::opOK) { + Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); + return false; + } + + Result = APValue(Val); + return true; +} bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!IsConstantEvaluatedBuiltinCall(E)) return ExprEvaluatorBaseTy::VisitCallExpr(E); @@ -12878,6 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + + case X86::BI__builtin_ia32_cvtsd2ss: { + APValue VecA, VecB; + if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || + !EvaluateAsRValue(Info, E->getArg(1), VecB)) + return false; + + SmallVector Elements; + + APValue ResultVal; + if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal)) + return false; + + Elements.push_back(ResultVal); + + unsigned NumEltsA = VecA.getVectorLength(); + for (unsigned I = 1; I < NumEltsA; ++I) { + Elements.push_back(VecA.getVectorElt(I)); + } + + return Success(Elements, E); + } + case X86::BI__builtin_ia32_cvtsd2ss_round_mask: { + APValue VecA, VecB, VecSrc, MaskValue; + + if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || + !EvaluateAsRValue(Info, E->getArg(1), VecB) || + !EvaluateAsRValue(Info, E->getArg(2), VecSrc) || + !EvaluateAsRValue(Info, E->getArg(3), MaskValue)) + return false; + + unsigned Mask = MaskValue.getInt().getZExtValue(); + SmallVector Elements; + + + if (Mask & 1) { + APValue ResultVal; + if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal)) + return false; + Elements.push_back(ResultVal); + } else { + Elements.push_back(VecSrc.getVectorElt(0)); + } + + unsigned NumEltsA = VecA.getVectorLength(); + for (unsigned I = 1; I < NumEltsA; ++I) { + Elements.push_back(VecA.getVectorElt(I)); + } + + return Success(Elements, E); + } + case X86::BI__builtin_ia32_cvtpd2ps: + case X86::BI__builtin_ia32_cvtpd2ps256: + case X86::BI__builtin_ia32_cvtpd2ps_mask: + case X86::BI__builtin_ia32_cvtpd2ps512_mask: { + + + const auto BuiltinID = E->getBuiltinCallee(); + bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || + BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); + + APValue InputValue; + if (!EvaluateAsRValue(Info, E->getArg(0), InputValue)) + return false; + + APValue MergeValue; + unsigned Mask = 0xFFFFFFFF; + bool NeedsMerge = false; + if (IsMasked) { + APValue MaskValue; + if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue)) + return false; + Mask = MaskValue.getInt().getZExtValue(); + auto NumEltsResult = E->getType()->getAs()->getNumElements(); + for (unsigned I = 0; I < NumEltsResult; ++I) { + if (!((Mask >> I) & 1)) { + NeedsMerge = true; + break; + } + } + if (NeedsMerge) { + if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue)) + return false; + } + } + + unsigned NumEltsResult = E->getType()->getAs()->getNumElements(); + unsigned NumEltsInput = InputValue.getVectorLength(); + SmallVector Elements; + for (unsigned I = 0; I < NumEltsResult; ++I) { + if (IsMasked && !((Mask >> I) & 1)) { + if (!NeedsMerge) { + return false; + } + Elements.push_back(MergeValue.getVectorElt(I)); + continue; + } + + if (I >= NumEltsInput) { + Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle()))); + continue; + } + + APValue ResultVal; + if (!ConvertDoubleToFloatStrict(Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal)) + return false; + + Elements.push_back(ResultVal); + } + return Success(Elements, E); + } + + case X86::BI__builtin_ia32_shufps: case X86::BI__builtin_ia32_shufps256: case X86::BI__builtin_ia32_shufps512: { From 0fb3292fe860e30de61d2df3a90912f27f04f143 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Sat, 29 Nov 2025 11:21:17 +0200 Subject: [PATCH 06/25] Ran the git clang-format command --- clang/lib/AST/ExprConstant.cpp | 192 ++++++++++++++++----------------- 1 file changed, 96 insertions(+), 96 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 065d5c2e33a9c..6f512dd538e7d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12169,7 +12169,7 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E, APFloat OrigVal, APValue &Result) { if (OrigVal.isInfinity()) { - Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0; + Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0; return false; } if (OrigVal.isNaN()) { @@ -12177,18 +12177,17 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E, return false; } - APFloat Val = OrigVal; + APFloat Val = OrigVal; bool LosesInfo = false; - APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(), - APFloat::rmNearestTiesToEven, - &LosesInfo); + APFloat::opStatus Status = Val.convert( + APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo); - if(LosesInfo || Val.isDenormal()) { + if (LosesInfo || Val.isDenormal()) { Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict); return false; } - if(Status != APFloat::opOK) { + if (Status != APFloat::opOK) { Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } @@ -12909,118 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } - case X86::BI__builtin_ia32_cvtsd2ss: { - APValue VecA, VecB; - if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || - !EvaluateAsRValue(Info, E->getArg(1), VecB)) - return false; + case X86::BI__builtin_ia32_cvtsd2ss: { + APValue VecA, VecB; + if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || + !EvaluateAsRValue(Info, E->getArg(1), VecB)) + return false; - SmallVector Elements; + SmallVector Elements; - APValue ResultVal; - if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal)) - return false; - - Elements.push_back(ResultVal); + APValue ResultVal; + if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), + ResultVal)) + return false; - unsigned NumEltsA = VecA.getVectorLength(); - for (unsigned I = 1; I < NumEltsA; ++I) { - Elements.push_back(VecA.getVectorElt(I)); - } + Elements.push_back(ResultVal); - return Success(Elements, E); + unsigned NumEltsA = VecA.getVectorLength(); + for (unsigned I = 1; I < NumEltsA; ++I) { + Elements.push_back(VecA.getVectorElt(I)); } - case X86::BI__builtin_ia32_cvtsd2ss_round_mask: { - APValue VecA, VecB, VecSrc, MaskValue; - - if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || - !EvaluateAsRValue(Info, E->getArg(1), VecB) || - !EvaluateAsRValue(Info, E->getArg(2), VecSrc) || - !EvaluateAsRValue(Info, E->getArg(3), MaskValue)) - return false; - unsigned Mask = MaskValue.getInt().getZExtValue(); - SmallVector Elements; + return Success(Elements, E); + } + case X86::BI__builtin_ia32_cvtsd2ss_round_mask: { + APValue VecA, VecB, VecSrc, MaskValue; + if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || + !EvaluateAsRValue(Info, E->getArg(1), VecB) || + !EvaluateAsRValue(Info, E->getArg(2), VecSrc) || + !EvaluateAsRValue(Info, E->getArg(3), MaskValue)) + return false; - if (Mask & 1) { - APValue ResultVal; - if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal)) - return false; - Elements.push_back(ResultVal); - } else { - Elements.push_back(VecSrc.getVectorElt(0)); - } + unsigned Mask = MaskValue.getInt().getZExtValue(); + SmallVector Elements; - unsigned NumEltsA = VecA.getVectorLength(); - for (unsigned I = 1; I < NumEltsA; ++I) { - Elements.push_back(VecA.getVectorElt(I)); - } + if (Mask & 1) { + APValue ResultVal; + if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), + ResultVal)) + return false; + Elements.push_back(ResultVal); + } else { + Elements.push_back(VecSrc.getVectorElt(0)); + } - return Success(Elements, E); + unsigned NumEltsA = VecA.getVectorLength(); + for (unsigned I = 1; I < NumEltsA; ++I) { + Elements.push_back(VecA.getVectorElt(I)); } - case X86::BI__builtin_ia32_cvtpd2ps: - case X86::BI__builtin_ia32_cvtpd2ps256: - case X86::BI__builtin_ia32_cvtpd2ps_mask: - case X86::BI__builtin_ia32_cvtpd2ps512_mask: { + return Success(Elements, E); + } + case X86::BI__builtin_ia32_cvtpd2ps: + case X86::BI__builtin_ia32_cvtpd2ps256: + case X86::BI__builtin_ia32_cvtpd2ps_mask: + case X86::BI__builtin_ia32_cvtpd2ps512_mask: { - const auto BuiltinID = E->getBuiltinCallee(); - bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || - BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); + const auto BuiltinID = E->getBuiltinCallee(); + bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || + BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); - APValue InputValue; - if (!EvaluateAsRValue(Info, E->getArg(0), InputValue)) - return false; - - APValue MergeValue; - unsigned Mask = 0xFFFFFFFF; - bool NeedsMerge = false; - if (IsMasked) { - APValue MaskValue; - if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue)) - return false; - Mask = MaskValue.getInt().getZExtValue(); - auto NumEltsResult = E->getType()->getAs()->getNumElements(); - for (unsigned I = 0; I < NumEltsResult; ++I) { - if (!((Mask >> I) & 1)) { - NeedsMerge = true; - break; - } - } - if (NeedsMerge) { - if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue)) - return false; - } - } + APValue InputValue; + if (!EvaluateAsRValue(Info, E->getArg(0), InputValue)) + return false; - unsigned NumEltsResult = E->getType()->getAs()->getNumElements(); - unsigned NumEltsInput = InputValue.getVectorLength(); - SmallVector Elements; + APValue MergeValue; + unsigned Mask = 0xFFFFFFFF; + bool NeedsMerge = false; + if (IsMasked) { + APValue MaskValue; + if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue)) + return false; + Mask = MaskValue.getInt().getZExtValue(); + auto NumEltsResult = E->getType()->getAs()->getNumElements(); for (unsigned I = 0; I < NumEltsResult; ++I) { - if (IsMasked && !((Mask >> I) & 1)) { - if (!NeedsMerge) { - return false; - } - Elements.push_back(MergeValue.getVectorElt(I)); - continue; + if (!((Mask >> I) & 1)) { + NeedsMerge = true; + break; } + } + if (NeedsMerge) { + if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue)) + return false; + } + } - if (I >= NumEltsInput) { - Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle()))); - continue; + unsigned NumEltsResult = + E->getType()->getAs()->getNumElements(); + unsigned NumEltsInput = InputValue.getVectorLength(); + SmallVector Elements; + for (unsigned I = 0; I < NumEltsResult; ++I) { + if (IsMasked && !((Mask >> I) & 1)) { + if (!NeedsMerge) { + return false; } + Elements.push_back(MergeValue.getVectorElt(I)); + continue; + } - APValue ResultVal; - if (!ConvertDoubleToFloatStrict(Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal)) - return false; - - Elements.push_back(ResultVal); + if (I >= NumEltsInput) { + Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle()))); + continue; } - return Success(Elements, E); + + APValue ResultVal; + if (!ConvertDoubleToFloatStrict( + Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal)) + return false; + + Elements.push_back(ResultVal); } + return Success(Elements, E); + } - case X86::BI__builtin_ia32_shufps: case X86::BI__builtin_ia32_shufps256: case X86::BI__builtin_ia32_shufps512: { From 75c76719bfe4116e79140388fd52fa47df8da96b Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 06:44:04 +0200 Subject: [PATCH 07/25] removed constexpr form _mm512_undefined_ps --- clang/lib/Headers/avx512fintrin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 63031c2fcfd82..85d54bc8eff8c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -207,7 +207,7 @@ _mm512_undefined(void) return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) { return (__m512)__builtin_ia32_undef512(); } From 04dabc03228514825a07fa3648e3d8a646cdc33c Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 11:07:51 +0200 Subject: [PATCH 08/25] added constexpr to __builtin_ia32_undef, and updated BuiltinsX86.td --- clang/include/clang/Basic/BuiltinsX86.td | 37 ++++++++++++++++-------- clang/lib/AST/ExprConstant.cpp | 24 +++++++++++++++ clang/lib/Headers/avx512fintrin.h | 3 +- clang/lib/Headers/xmmintrin.h | 2 +- 4 files changed, 51 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index df6ec01959bd4..097e980989941 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -21,15 +21,15 @@ def rdtscp : X86Builtin<"unsigned long long int(unsigned int*)">; // Undefined Values def undef128 : X86Builtin<"_Vector<2, double>()"> { - let Attributes = [Const, NoThrow, RequiredVectorWidth<128>]; + let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<128>]; } -def undef256 : X86Builtin<"_Vector<4, double>()" > { - let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>]; +def undef256 : X86Builtin<"_Vector<4, double>()"> { + let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>]; } def undef512 : X86Builtin<"_Vector<8, double>()"> { - let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>]; + let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<512>]; } // FLAGS @@ -167,13 +167,19 @@ let Features = "sse2", Attributes = [NoThrow] in { } let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">; + def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">; +} +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">; +} + +let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">; - def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">; def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">; def cvtsd2si : X86Builtin<"int(_Vector<2, double>)">; def cvttsd2si : X86Builtin<"int(_Vector<2, double>)">; - def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">; def cvtps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">; def cvttps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">; } @@ -463,10 +469,13 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid } let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">; +} + +let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">; def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; - def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">; def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">; def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">; def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">; @@ -474,7 +483,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; - foreach Op = ["max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; @@ -1005,6 +1013,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128 } let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">; +} + +let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">; def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">; def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">; @@ -1017,7 +1029,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">; def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">; - def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">; def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">; def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">; } @@ -1453,8 +1464,11 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in { } let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { - def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">; def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">; +} + +let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { + def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">; def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">; } @@ -3287,8 +3301,7 @@ let Features = "avx512bw,avx512vl", def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { - def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">; +let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">; def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">; def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ee82398d7ac2a..0868237d52404 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12954,6 +12954,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_undef128: + case X86::BI__builtin_ia32_undef256: + case X86::BI__builtin_ia32_undef512: { + // Undefined builtins return zero-initialized vectors in constexpr contexts + const auto *VTy = E->getType()->castAs(); + unsigned NumElts = VTy->getNumElements(); + QualType EltTy = VTy->getElementType(); + + SmallVector Elements; + Elements.reserve(NumElts); + + if (EltTy->isIntegerType()) { + APSInt Zero(Info.Ctx.getTypeSize(EltTy), EltTy->isUnsignedIntegerType()); + for (unsigned I = 0; I < NumElts; ++I) + Elements.push_back(APValue(Zero)); + } else { + APFloat Zero(Info.Ctx.getFloatTypeSemantics(EltTy)); + for (unsigned I = 0; I < NumElts; ++I) + Elements.push_back(APValue(Zero)); + } + + return Success(APValue(Elements.data(), Elements.size()), E); + } + case X86::BI__builtin_ia32_cvtsd2ss: { APValue VecA, VecB; if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 85d54bc8eff8c..9dcc4bea24a37 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -207,8 +207,7 @@ _mm512_undefined(void) return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_undefined_ps(void) { +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_undefined_ps(void) { return (__m512)__builtin_ia32_undef512(); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 72a643948bed6..b6487bed9facb 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -1892,7 +1892,7 @@ _mm_loadr_ps(const float *__p) /// This intrinsic has no corresponding instruction. /// /// \returns A 128-bit vector of [4 x float] containing undefined values. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_undefined_ps(void) { return (__m128)__builtin_ia32_undef128(); From 880b06029a68e75722326ebd62afb153b5724664 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 12:22:01 +0200 Subject: [PATCH 09/25] Removed undef as constexpr and used _mm256_setzero_ps instead --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ExprConstant.cpp | 23 ----------------------- clang/lib/Headers/avx512fintrin.h | 4 ++-- clang/lib/Headers/avxintrin.h | 2 +- clang/lib/Headers/xmmintrin.h | 2 +- 5 files changed, 7 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 097e980989941..9754f839fc803 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -21,15 +21,15 @@ def rdtscp : X86Builtin<"unsigned long long int(unsigned int*)">; // Undefined Values def undef128 : X86Builtin<"_Vector<2, double>()"> { - let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<128>]; + let Attributes = [Const, NoThrow, RequiredVectorWidth<128>]; } def undef256 : X86Builtin<"_Vector<4, double>()"> { - let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>]; + let Attributes = [Const, NoThrow, RequiredVectorWidth<256>]; } def undef512 : X86Builtin<"_Vector<8, double>()"> { - let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<512>]; + let Attributes = [Const, NoThrow, RequiredVectorWidth<512>]; } // FLAGS diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 0868237d52404..c0a719e578332 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12954,29 +12954,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } - case X86::BI__builtin_ia32_undef128: - case X86::BI__builtin_ia32_undef256: - case X86::BI__builtin_ia32_undef512: { - // Undefined builtins return zero-initialized vectors in constexpr contexts - const auto *VTy = E->getType()->castAs(); - unsigned NumElts = VTy->getNumElements(); - QualType EltTy = VTy->getElementType(); - - SmallVector Elements; - Elements.reserve(NumElts); - - if (EltTy->isIntegerType()) { - APSInt Zero(Info.Ctx.getTypeSize(EltTy), EltTy->isUnsignedIntegerType()); - for (unsigned I = 0; I < NumElts; ++I) - Elements.push_back(APValue(Zero)); - } else { - APFloat Zero(Info.Ctx.getFloatTypeSemantics(EltTy)); - for (unsigned I = 0; I < NumElts; ++I) - Elements.push_back(APValue(Zero)); - } - - return Success(APValue(Elements.data(), Elements.size()), E); - } case X86::BI__builtin_ia32_cvtsd2ss: { APValue VecA, VecB; diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9dcc4bea24a37..edcbdba908522 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -207,7 +207,7 @@ _mm512_undefined(void) return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_undefined_ps(void) { +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) { return (__m512)__builtin_ia32_undef512(); } @@ -3490,7 +3490,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) { static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, - (__v8sf) _mm256_undefined_ps (), + (__v8sf) _mm256_setzero_ps (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 605e70307cfc9..126ba30bcca7e 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3605,7 +3605,7 @@ _mm256_undefined_pd(void) /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit vector of [8 x float] containing undefined values. -static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void) { return (__m256)__builtin_ia32_undef256(); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index b6487bed9facb..72a643948bed6 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -1892,7 +1892,7 @@ _mm_loadr_ps(const float *__p) /// This intrinsic has no corresponding instruction. /// /// \returns A 128-bit vector of [4 x float] containing undefined values. -static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void) { return (__m128)__builtin_ia32_undef128(); From 4efe60af5e75b7c51320b66a3bc764a34b757df3 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 12:41:57 +0200 Subject: [PATCH 10/25] Implemented InterpBuiltin cpp implmentaions --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 176 +++++++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 8496b58105c7a..ad49eb14b911f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -167,6 +167,37 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) { return Result; } +// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics. +// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions. +static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState &S, + const Expr *DiagExpr) { + if (Src.isInfinity()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0; + return false; + } + if (Src.isNaN()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 1; + return false; + } + APFloat Val = Src; + bool LosesInfo = false; + APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo); + if (LosesInfo || Val.isDenormal()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict); + return false; + } + if (Status != APFloat::opOK) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_invalid_subexpr_in_const_expr); + return false; + } + Dst.copy(Val); + return true; +} + static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -3359,6 +3390,140 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, pushInteger(S, RetMask, Call->getType()); return true; } +static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 2); + + const Pointer &B = S.Stk.pop(); + const Pointer &A = S.Stk.pop(); + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B)) + return false; + + const auto *DstVTy = Call->getType()->castAs(); + unsigned NumElems = DstVTy->getNumElements(); + const Pointer &Dst = S.Stk.peek(); + + // Copy all elements from A to Dst + for (unsigned I = 0; I != NumElems; ++I) + Dst.elem(I) = A.elem(I); + + // Convert element 0 from double to float + Floating Conv = S.allocFloat( + S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy)); + APFloat SrcD = B.elem(0).getAPFloat(); + if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call)) + return false; + Dst.elem(0) = Conv; + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, + CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 5); + + // Pop in reverse order: rounding, mask, src, b, a + APSInt Rounding = popToAPSInt(S, Call->getArg(4)->getType()); + APSInt MaskInt = popToAPSInt(S, Call->getArg(3)->getType()); + const Pointer &Src = S.Stk.pop(); + const Pointer &B = S.Stk.pop(); + const Pointer &A = S.Stk.pop(); + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || !CheckLoad(S, OpPC, Src)) + return false; + + const auto *DstVTy = Call->getType()->castAs(); + unsigned NumElems = DstVTy->getNumElements(); + const Pointer &Dst = S.Stk.peek(); + + // Copy all elements from A to Dst + for (unsigned I = 0; I != NumElems; ++I) + Dst.elem(I) = A.elem(I); + + // If mask bit 0 is set, convert element 0 from double to float; otherwise use Src + if (MaskInt.getZExtValue() & 0x1) { + Floating Conv = S.allocFloat( + S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy)); + APFloat SrcD = B.elem(0).getAPFloat(); + if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call)) + return false; + Dst.elem(0) = Conv; + } else { + Dst.elem(0) = Src.elem(0); + } + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned BuiltinID) { + bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || + BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); + bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); + + APSInt MaskVal(1, false); + Pointer PassThrough; + Pointer SrcPd; + APSInt Rounding; + + if (IsMasked) { + // Pop in reverse order + if (HasRounding) { + // For 512: rounding, mask, passthrough, source + Rounding = popToAPSInt(S, Call->getArg(3)->getType()); + MaskVal = popToAPSInt(S, Call->getArg(2)->getType()); + PassThrough = S.Stk.pop(); + SrcPd = S.Stk.pop(); + } else { + // For VL: mask, passthrough, source + MaskVal = popToAPSInt(S, Call->getArg(2)->getType()); + PassThrough = S.Stk.pop(); + SrcPd = S.Stk.pop(); + } + + if (!CheckLoad(S, OpPC, PassThrough)) + return false; + } else { + // Pop source only + SrcPd = S.Stk.pop(); + } + + if (!CheckLoad(S, OpPC, SrcPd)) + return false; + + const auto *RetVTy = Call->getType()->castAs(); + unsigned RetElems = RetVTy->getNumElements(); + unsigned SrcElems = SrcPd.getNumElems(); + const Pointer &Dst = S.Stk.peek(); + + // Initialize destination with passthrough or zeros + for (unsigned I = 0; I != RetElems; ++I) { + if (IsMasked) { + Dst.elem(I) = PassThrough.elem(I); + } else { + Dst.elem(I) = Floating(APFloat(0.0f)); + } + } + + // Convert double to float for enabled elements (only process source elements that exist) + for (unsigned I = 0; I != SrcElems; ++I) { + if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0)) + continue; + + APFloat SrcD = SrcPd.elem(I).getAPFloat(); + Floating Conv = S.allocFloat( + S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy)); + if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call)) + return false; + Dst.elem(I) = Conv; + } + + Dst.initializeAllElements(); + return true; +} static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, @@ -5169,6 +5334,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_cvtq2mask512: return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_cvtsd2ss: + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call); + + case X86::BI__builtin_ia32_cvtsd2ss_round_mask: + return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call); + case X86::BI__builtin_ia32_cvtpd2ps: + case X86::BI__builtin_ia32_cvtpd2ps256: + case X86::BI__builtin_ia32_cvtpd2ps_mask: + case X86::BI__builtin_ia32_cvtpd2ps512_mask: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_cmpb128_mask: case X86::BI__builtin_ia32_cmpw128_mask: case X86::BI__builtin_ia32_cmpd128_mask: From d5084f7beedd37a6cec81558b2c00224dbc5d8d6 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 13:14:03 +0200 Subject: [PATCH 11/25] styled The tests --- clang/test/CodeGen/X86/avx-builtins.c | 2 + clang/test/CodeGen/X86/avx512f-builtins.c | 12 + clang/test/CodeGen/X86/avx512vl-builtins.c | 8 + clang/test/CodeGen/X86/sse2-builtins.c | 4 + .../SemaCXX/constexpr-x86-avx-builtins.cpp | 18 + .../constexpr-x86-avx512f-builtins.cpp | 230 +++++++++ .../constexpr-x86-avx512vl-builtins.cpp | 120 +++++ .../constexpr-x86-intrinsics-pd2ps.cpp | 479 ------------------ .../SemaCXX/constexpr-x86-sse2-builtins.cpp | 79 +++ 9 files changed, 473 insertions(+), 479 deletions(-) create mode 100644 clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp create mode 100644 clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp create mode 100644 clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp delete mode 100644 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp create mode 100644 clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 00bcf9cc1da58..13da4292c5b92 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -968,6 +968,8 @@ __m128 test_mm256_cvtpd_ps(__m256d A) { return _mm256_cvtpd_ps(A); } +TEST_CONSTEXPR(match_m128(_mm256_cvtpd_ps((__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, -1.0f, +2.0f, +3.5f)); + __m256i test_mm256_cvtps_epi32(__m256 A) { // CHECK-LABEL: test_mm256_cvtps_epi32 // CHECK: call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 6401a0e55a83b..499cbd9dee30a 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -10615,6 +10615,8 @@ __m256 test_mm512_cvtpd_ps (__m512d __A) return _mm512_cvtpd_ps (__A); } +TEST_CONSTEXPR(match_m256(_mm512_cvtpd_ps((__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f)); + __m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_cvtpd_ps @@ -10622,6 +10624,8 @@ __m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) return _mm512_mask_cvtpd_ps (__W,__U,__A); } +TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps((__m256){ 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f }, 0x05, (__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, 9.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f)); + __m512 test_mm512_cvtpd_pslo(__m512d __A) { // CHECK-LABEL: test_mm512_cvtpd_pslo @@ -10631,6 +10635,8 @@ __m512 test_mm512_cvtpd_pslo(__m512d __A) return _mm512_cvtpd_pslo(__A); } +TEST_CONSTEXPR(match_m512(_mm512_cvtpd_pslo((__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); + __m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_cvtpd_pslo // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 @@ -10639,6 +10645,8 @@ __m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) { return _mm512_mask_cvtpd_pslo(__W, __U, __A); } +TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo((__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }, 0x3, (__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); + __m256 test_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_maskz_cvtpd_ps @@ -11860,12 +11868,16 @@ __m128 test_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) return _mm_mask_cvtsd_ss(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss((__m128){ 9.0f, 5.0f, 6.0f, 7.0f }, 0x1, (__m128){ 1.0f, 2.0f, 3.0f, 4.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 2.0f, 3.0f, 4.0f)); + __m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) { // CHECK-LABEL: test_mm_maskz_cvtsd_ss // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round return _mm_maskz_cvtsd_ss(__U, __A, __B); } +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x1, (__m128){ 1.0f, 2.0f, 3.0f, 4.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 2.0f, 3.0f, 4.0f)); + __m512i test_mm512_setzero_epi32(void) { // CHECK-LABEL: test_mm512_setzero_epi32 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 5f6d8360888f5..013c19ba7a929 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -3999,23 +3999,31 @@ __m128 test_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) { // CHECK: @llvm.x86.avx512.mask.cvtpd2ps return _mm_mask_cvtpd_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps((__m128){ 9.0f, 9.0f, 9.0f, 9.0f }, 0x3, (__m128d){ -1.0, +2.0 }), -1.0f, +2.0f, 9.0f, 9.0f)); __m128 test_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_maskz_cvtpd_ps // CHECK: @llvm.x86.avx512.mask.cvtpd2ps return _mm_maskz_cvtpd_ps(__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, (__m128d){ -1.0, +2.0 }), -1.0f, 0.0f, 0.0f, 0.0f)); __m128 test_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_mask_cvtpd_ps // CHECK: @llvm.x86.avx.cvt.pd2.ps.256 // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} return _mm256_mask_cvtpd_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm256_mask_cvtpd_ps((__m128){ 9.0f, 9.0f, 9.0f, 9.0f }, 0xF, (__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, -1.0f, +2.0f, +3.5f)); __m128 test_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_maskz_cvtpd_ps // CHECK: @llvm.x86.avx.cvt.pd2.ps.256 // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} return _mm256_maskz_cvtpd_ps(__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtpd_ps(0x5, (__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, 0.0f, +2.0f, 0.0f)); __m128i test_mm_cvtpd_epu32(__m128d __A) { // CHECK-LABEL: test_mm_cvtpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index ed1ac84b8c4a3..c4975b456ba22 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -573,6 +573,8 @@ __m128 test_mm_cvtpd_ps(__m128d A) { return _mm_cvtpd_ps(A); } +TEST_CONSTEXPR(match_m128(_mm_cvtpd_ps((__m128d){ -1.0, +2.0 }), -1.0f, +2.0f, 0.0f, 0.0f)); + __m128i test_mm_cvtps_epi32(__m128 A) { // CHECK-LABEL: test_mm_cvtps_epi32 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}}) @@ -614,6 +616,8 @@ __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) { return _mm_cvtsd_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_cvtsd_ss((__m128){ 9.0f, 5.0f, 6.0f, 7.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 5.0f, 6.0f, 7.0f)); + int test_mm_cvtsi128_si32(__m128i A) { // CHECK-LABEL: test_mm_cvtsi128_si32 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 diff --git a/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp new file mode 100644 index 0000000000000..724aff3011ded --- /dev/null +++ b/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx -verify %s + +#include +#include "../CodeGen/X86/builtin_test_helpers.h" + +namespace Test_mm256_cvtpd_ps { +namespace OK { +constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; +TEST_CONSTEXPR(match_m128(_mm256_cvtpd_ps(a), 0.0f, -1.0f, +2.0f, +3.5f)); +} +namespace Inexact { +constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 }; +constexpr __m128 r = _mm256_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}} +} +} diff --git a/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp new file mode 100644 index 0000000000000..0d2a82cbbb83c --- /dev/null +++ b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp @@ -0,0 +1,230 @@ +// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx512f -verify %s + +#include +#include "../CodeGen/X86/builtin_test_helpers.h" + +namespace Test_mm_mask_cvtsd_ss { +namespace OK { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss(src, 0x1, a, b), -1.0f, 2.0f, 3.0f, 4.0f)); +} +namespace MaskOff { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss(src, 0x0, a, b), 9.0f, 2.0f, 3.0f, 4.0f)); +} +namespace MaskOffInexact { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact); +TEST_CONSTEXPR(match_m128(r, 9.0f, 2.0f, 3.0f, 4.0f)); +} +namespace MaskOnInexact { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}} +} +namespace MaskOnInf { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}} +} +namespace MaskOnNaN { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_nan = { __builtin_nan(""), 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}} +} +namespace MaskOnSubnormal { +constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_sub = { 1e-310, 0.0 }; +constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}} +} +} + +namespace Test_mm_maskz_cvtsd_ss { +namespace OK { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x1, a, b), -1.0f, 2.0f, 3.0f, 4.0f)); +} +namespace MaskOff { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x0, a, b), 0.0f, 2.0f, 3.0f, 4.0f)); +} +namespace MaskOffInexact { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x0, a, b_inexact), 0.0f, 2.0f, 3.0f, 4.0f)); +} +namespace MaskOnInf { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}} +} +namespace MaskOnNaN { +constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; +constexpr __m128d b_nan = { __builtin_nan(""), 0.0 }; +constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}} +} +} + +namespace Test_mm512_cvtpd_ps { +namespace OK { +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_cvtpd_ps(a), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f)); +} +namespace Inexact { +constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +constexpr __m256 r = _mm512_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}} +} +} + +namespace Test_mm512_mask_cvtpd_ps { +namespace OK { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x05, a), -1.0f, 9.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f)); +} +namespace MaskOffInexact { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, +64.0f, +128.0f)); +} +namespace MaskOffInf { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x1F, a_inf), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, 9.0f, 9.0f)); +} +namespace MaskOffNaN { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x1F, a_nan), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, 9.0f, 9.0f)); +} +namespace MaskOnInf { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +namespace MaskOnNaN { +constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +} + +namespace Test_mm512_maskz_cvtpd_ps { +namespace OK { +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x81, a), -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, +128.0f)); +} +namespace MaskOffInexact { +constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0b11011111, a_inexact), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, +64.0f, +128.0f)); +} +namespace MaskOffInf { +constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x1F, a_inf), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, 0.0f, 0.0f)); +} +namespace MaskOffNaN { +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 }; +TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x1F, a_nan), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, 0.0f, 0.0f)); +} +namespace MaskOnInf { +constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +namespace MaskOnNaN { +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; +constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +} + +namespace Test_mm512_cvtpd_pslo { +namespace OK { +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m512(_mm512_cvtpd_pslo(a), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); +} +} + +namespace Test_mm512_mask_cvtpd_pslo { +namespace OK { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x3, a), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); +} +namespace MaskOffInf { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x3, a_inf), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); +} +namespace MaskOffNaN { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; +TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x7, a_nan), -1.0f, +2.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); +} +namespace MaskOnInf { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +namespace MaskOnNaN { +constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, + 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; +constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 }; +constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} +} +} diff --git a/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp new file mode 100644 index 0000000000000..bdce60a357f13 --- /dev/null +++ b/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp @@ -0,0 +1,120 @@ +// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -verify %s + +#include +#include "../CodeGen/X86/builtin_test_helpers.h" + +namespace Test_mm_mask_cvtpd_ps { +namespace OK { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a = { -1.0, +2.0 }; +TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x3, a), -1.0f, +2.0f, 9.0f, 9.0f)); +} +namespace Partial { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a = { -1.0, +2.0 }; +TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x1, a), -1.0f, 9.0f, 9.0f, 9.0f)); +} +namespace MaskOffInexact { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; +TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x1, a_inexact), -1.0f, 9.0f, 9.0f, 9.0f)); +} +namespace MaskOnInexact { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}} +} +namespace MaskOnInf { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a_inf = { -1.0, __builtin_huge_val() }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}} +} +namespace MaskOnNaN { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m128d a_nan = { -1.0, __builtin_nan("") }; +constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}} +} +} + +namespace Test_mm_maskz_cvtpd_ps { +namespace OK { +constexpr __m128d a = { -1.0, +2.0 }; +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, a), -1.0f, 0.0f, 0.0f, 0.0f)); +} +namespace MaskOffInexact { +constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, a_inexact), -1.0f, 0.0f, 0.0f, 0.0f)); +} +namespace MaskOnInf { +constexpr __m128d a_inf = { -1.0, __builtin_huge_val() }; +constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}} +} +namespace MaskOnNaN { +constexpr __m128d a_nan = { -1.0, __builtin_nan("") }; +constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}} +} +} + +namespace Test_mm256_mask_cvtpd_ps { +namespace OK { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; +TEST_CONSTEXPR(match_m128(_mm256_mask_cvtpd_ps(src, 0xF, a), 0.0f, -1.0f, +2.0f, +3.5f)); +} +namespace MaskOffInf { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 }; +constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} +// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} +} +namespace MaskOffNaN { +constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; +constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") }; +constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} +// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} +} +} + +namespace Test_mm256_maskz_cvtpd_ps { +namespace OK { +constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; +TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtpd_ps(0x5, a), 0.0f, 0.0f, +2.0f, 0.0f)); +} +namespace MaskOffInf { +constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 }; +constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} +// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} +} +namespace MaskOffNaN { +constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") }; +constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} +// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} +} +} diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp deleted file mode 100644 index 4a1e9a9c5ae2c..0000000000000 --- a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp +++ /dev/null @@ -1,479 +0,0 @@ -// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s - -#define __MM_MALLOC_H -#include - -namespace Test_mm_cvtsd_ss { -namespace OK { -constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128d b = { -1.0, 42.0 }; -constexpr __m128 r = _mm_cvtsd_ss(a, b); -static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, ""); -} -namespace Inexact { -constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; -constexpr __m128d b = { 1.0000000000000002, 0.0 }; -constexpr __m128 r = _mm_cvtsd_ss(a, b); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}} -} -namespace Inf { -constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; -constexpr __m128d b = { __builtin_huge_val(), 0.0 }; -constexpr __m128 r = _mm_cvtsd_ss(a, b); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}} -} -namespace NaN { -constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; -constexpr __m128d b = { __builtin_nan(""), 0.0 }; -constexpr __m128 r = _mm_cvtsd_ss(a, b); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}} -} -namespace Subnormal { -constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; -constexpr __m128d b = { 1e-310, 0.0 }; -constexpr __m128 r = _mm_cvtsd_ss(a, b); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}} -} -} - -namespace Test_mm_mask_cvtsd_ss { -namespace OK { -constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b = { -1.0, 42.0 }; -constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b); -static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, ""); -} -namespace MaskOff { -constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b = { -1.0, 42.0 }; -constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b); -static_assert(r[0] == 9.0f && r[1] == 2.0f, ""); -} -namespace MaskOffInexact { -constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; -constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact); -static_assert(r[0] == 9.0f, ""); -} -namespace MaskOnInexact { -constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; -constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}} -} -namespace MaskOnInf { -constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 }; -constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}} -} -namespace MaskOnNaN { -constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b_nan = { __builtin_nan(""), 0.0 }; -constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}} -} -namespace MaskOnSubnormal { -constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f }; -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b_sub = { 1e-310, 0.0 }; -constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}} -} -} - -namespace Test_mm_maskz_cvtsd_ss { -namespace OK { -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b = { -1.0, 42.0 }; -constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b); -static_assert(r[0] == -1.0f && r[1] == 2.0f, ""); -} -namespace MaskOff { -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b = { -1.0, 42.0 }; -constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b); -static_assert(r[0] == 0.0f && r[1] == 2.0f, ""); -} -namespace MaskOffInexact { -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 }; -constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b_inexact); -static_assert(r[0] == 0.0f, ""); -} -namespace MaskOnInf { -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 }; -constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}} -} -namespace MaskOnNaN { -constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f }; -constexpr __m128d b_nan = { __builtin_nan(""), 0.0 }; -constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}} -} -} - -namespace Test_mm_cvtpd_ps { -namespace OK { -constexpr __m128d a = { -1.0, +2.0 }; -constexpr __m128 r = _mm_cvtpd_ps(a); -static_assert(r[0] == -1.0f && r[1] == +2.0f, ""); -static_assert(r[2] == 0.0f && r[3] == 0.0f, ""); -} -namespace Inexact { -constexpr __m128d a = { 1.0000000000000002, 0.0 }; -constexpr __m128 r = _mm_cvtpd_ps(a); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}} -} -namespace Inf { -constexpr __m128d a = { __builtin_huge_val(), 0.0 }; -constexpr __m128 r = _mm_cvtpd_ps(a); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@-3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}} -} -namespace NaN { -constexpr __m128d a = { __builtin_nan(""), 0.0 }; -constexpr __m128 r = _mm_cvtpd_ps(a); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@-3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}} -} -namespace Subnormal { -constexpr __m128d a = { 1e-310, 0.0 }; -constexpr __m128 r = _mm_cvtpd_ps(a); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}} -} -} - -namespace Test_mm_mask_cvtpd_ps { -namespace OK { -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128d a = { -1.0, +2.0 }; -constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x3, a); -static_assert(r[0] == -1.0f && r[1] == +2.0f, ""); -static_assert(r[2] == 9.0f && r[3] == 9.0f, ""); -} -namespace Partial { -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128d a = { -1.0, +2.0 }; -constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a); -static_assert(r[0] == -1.0f && r[1] == 9.0f, ""); -} -namespace MaskOffInexact { -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; -constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a_inexact); -static_assert(r[0] == -1.0f && r[1] == 9.0f, ""); -} -namespace MaskOnInexact { -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; -constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}} -} -namespace MaskOnInf { -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128d a_inf = { -1.0, __builtin_huge_val() }; -constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}} -} -namespace MaskOnNaN { -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m128d a_nan = { -1.0, __builtin_nan("") }; -constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}} -} -} - -namespace Test_mm_maskz_cvtpd_ps { -namespace OK { -constexpr __m128d a = { -1.0, +2.0 }; -constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a); -static_assert(r[0] == -1.0f && r[1] == 0.0f, ""); -static_assert(r[2] == 0.0f && r[3] == 0.0f, ""); -} -namespace MaskOffInexact { -constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 }; -constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a_inexact); -static_assert(r[0] == -1.0f && r[1] == 0.0f, ""); -} -namespace MaskOnInf { -constexpr __m128d a_inf = { -1.0, __builtin_huge_val() }; -constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}} -} -namespace MaskOnNaN { -constexpr __m128d a_nan = { -1.0, __builtin_nan("") }; -constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}} -} -} - -namespace Test_mm256_cvtpd_ps { -namespace OK { -constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; -constexpr __m128 r = _mm256_cvtpd_ps(a); -static_assert(r[0] == 0.0f && r[1] == -1.0f, ""); -static_assert(r[2] == +2.0f && r[3] == +3.5f, ""); -} -namespace Inexact { -constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 }; -constexpr __m128 r = _mm256_cvtpd_ps(a); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}} -} -} - -namespace Test_mm256_mask_cvtpd_ps { -namespace OK { -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; -constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0xF, a); -static_assert(r[0] == 0.0f && r[1] == -1.0f && r[2] == +2.0f && r[3] == +3.5f, ""); -} -namespace MaskOffInf { -// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking -// So even masked-off Inf/NaN values cause errors (architectural limitation) -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 }; -constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} -// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} -} -namespace MaskOffNaN { -// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking -// So even masked-off Inf/NaN values cause errors (architectural limitation) -constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f }; -constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") }; -constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} -// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} -} -} - -namespace Test_mm256_maskz_cvtpd_ps { -namespace OK { -constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 }; -constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x5, a); -static_assert(r[0] == 0.0f && r[1] == 0.0f && r[2] == +2.0f && r[3] == 0.0f, ""); -} -namespace MaskOffInf { -// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking -// So even masked-off Inf/NaN values cause errors (architectural limitation) -constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 }; -constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} -// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}} -} -namespace MaskOffNaN { -// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking -// So even masked-off Inf/NaN values cause errors (architectural limitation) -constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") }; -constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} -// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}} -} -} - -namespace Test_mm512_cvtpd_ps { -namespace OK { -constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m256 r = _mm512_cvtpd_ps(a); -static_assert(r[0] == -1.0f && r[7] == +128.0f, ""); -} -namespace Inexact { -constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; -constexpr __m256 r = _mm512_cvtpd_ps(a); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} -// expected-note@-3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}} -} -} - -namespace Test_mm512_mask_cvtpd_ps { -namespace OK { -constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x05, a); -static_assert(r[0] == -1.0f && r[2] == +4.0f, ""); -static_assert(r[1] == 9.0f && r[3] == 9.0f, ""); -} -namespace MaskOffInexact { -constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 }; -constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact); -static_assert(r[0] == -1.0f && r[5] == 9.0f && r[6] == 64.0f && r[7] == 128.0f, ""); -} -namespace MaskOffInf { -constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 }; -constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_inf); -static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, ""); -} -namespace MaskOffNaN { -constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 }; -constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_nan); -static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, ""); -} -namespace MaskOnInf { -constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 }; -constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} -} -namespace MaskOnNaN { -constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; -constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} -} -} - -namespace Test_mm512_maskz_cvtpd_ps { -namespace OK { -constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x81, a); -static_assert(r[0] == -1.0f && r[7] == +128.0f, ""); -static_assert(r[1] == 0.0f && r[6] == 0.0f, ""); -} -namespace MaskOffInexact { -constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 }; -constexpr __m256 r = _mm512_maskz_cvtpd_ps(0b11011111, a_inexact); -static_assert(r[0] == -1.0f && r[5] == 0.0f && r[6] == 64.0f && r[7] == 128.0f, ""); -} -namespace MaskOffInf { -constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 }; -constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_inf); -static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, ""); -} -namespace MaskOffNaN { -constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 }; -constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_nan); -static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, ""); -} -namespace MaskOnInf { -constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 }; -constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} -} -namespace MaskOnNaN { -constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; -constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} -} -} - -namespace Test_mm512_cvtpd_pslo { -namespace OK { -constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m512 r = _mm512_cvtpd_pslo(a); -static_assert(r[0] == -1.0f && r[7] == +128.0f, ""); -static_assert(r[8] == 0.0f && r[15] == 0.0f, ""); -} -} - -namespace Test_mm512_mask_cvtpd_pslo { -namespace OK { -constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, - 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a); -static_assert(r[0] == -1.0f && r[1] == +2.0f, ""); -static_assert(r[2] == 9.0f && r[3] == 9.0f, ""); -} -namespace MaskOffInf { -constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, - 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a_inf); -static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 9.0f, ""); -} -namespace MaskOffNaN { -constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, - 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 }; -constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x7, a_nan); -static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 4.0f && r[3] == 9.0f, ""); -} -namespace MaskOnInf { -constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, - 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}} -// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} -// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} -} -namespace MaskOnNaN { -constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, - 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }; -constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 }; -constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan); -// expected-error@-1 {{must be initialized by a constant expression}} -// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}} -// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} -// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}} -} -} diff --git a/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp new file mode 100644 index 0000000000000..319a3b02a94f9 --- /dev/null +++ b/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp @@ -0,0 +1,79 @@ +// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +sse2 -verify %s + +#include +#include "../CodeGen/X86/builtin_test_helpers.h" + +namespace Test_mm_cvtsd_ss { +namespace OK { +constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f }; +constexpr __m128d b = { -1.0, 42.0 }; +TEST_CONSTEXPR(match_m128(_mm_cvtsd_ss(a, b), -1.0f, 5.0f, 6.0f, 7.0f)); +} +namespace Inexact { +constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d b = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}} +} +namespace Inf { +constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d b = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}} +} +namespace NaN { +constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d b = { __builtin_nan(""), 0.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}} +} +namespace Subnormal { +constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f }; +constexpr __m128d b = { 1e-310, 0.0 }; +constexpr __m128 r = _mm_cvtsd_ss(a, b); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}} +} +} + +namespace Test_mm_cvtpd_ps { +namespace OK { +constexpr __m128d a = { -1.0, +2.0 }; +TEST_CONSTEXPR(match_m128(_mm_cvtpd_ps(a), -1.0f, +2.0f, 0.0f, 0.0f)); +} +namespace Inexact { +constexpr __m128d a = { 1.0000000000000002, 0.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}} +} +namespace Inf { +constexpr __m128d a = { __builtin_huge_val(), 0.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}} +// expected-note@-3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}} +} +namespace NaN { +constexpr __m128d a = { __builtin_nan(""), 0.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}} +// expected-note@-3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}} +} +namespace Subnormal { +constexpr __m128d a = { 1e-310, 0.0 }; +constexpr __m128 r = _mm_cvtpd_ps(a); +// expected-error@-1 {{must be initialized by a constant expression}} +// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}} +// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}} +} +} From 51d213d48fcb7d8bf72d835aeda051e94653dc86 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 13:46:38 +0200 Subject: [PATCH 12/25] ran the format commands --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 ++++++++++++++---------- clang/lib/AST/ExprConstant.cpp | 1 - clang/lib/Headers/avx512fintrin.h | 7 +++--- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ad49eb14b911f..5e8b8e0e31bb6 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -169,8 +169,8 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) { // Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics. // Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions. -static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState &S, - const Expr *DiagExpr) { +static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, + InterpState &S, const Expr *DiagExpr) { if (Src.isInfinity()) { if (S.diagnosing()) S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0; @@ -183,7 +183,8 @@ static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState & } APFloat Val = Src; bool LosesInfo = false; - APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo); + APFloat::opStatus Status = Val.convert( + APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo); if (LosesInfo || Val.isDenormal()) { if (S.diagnosing()) S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict); @@ -3391,7 +3392,7 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, return true; } static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, - const CallExpr *Call) { + const CallExpr *Call) { assert(Call->getNumArgs() == 2); const Pointer &B = S.Stk.pop(); @@ -3420,8 +3421,8 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, } static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, - CodePtr OpPC, - const CallExpr *Call) { + CodePtr OpPC, + const CallExpr *Call) { assert(Call->getNumArgs() == 5); // Pop in reverse order: rounding, mask, src, b, a @@ -3430,7 +3431,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, const Pointer &Src = S.Stk.pop(); const Pointer &B = S.Stk.pop(); const Pointer &A = S.Stk.pop(); - if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || !CheckLoad(S, OpPC, Src)) + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || + !CheckLoad(S, OpPC, Src)) return false; const auto *DstVTy = Call->getType()->castAs(); @@ -3441,7 +3443,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, for (unsigned I = 0; I != NumElems; ++I) Dst.elem(I) = A.elem(I); - // If mask bit 0 is set, convert element 0 from double to float; otherwise use Src + // If mask bit 0 is set, convert element 0 from double to float; otherwise use + // Src if (MaskInt.getZExtValue() & 0x1) { Floating Conv = S.allocFloat( S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy)); @@ -3463,12 +3466,12 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); - + APSInt MaskVal(1, false); Pointer PassThrough; Pointer SrcPd; APSInt Rounding; - + if (IsMasked) { // Pop in reverse order if (HasRounding) { @@ -3483,14 +3486,14 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, PassThrough = S.Stk.pop(); SrcPd = S.Stk.pop(); } - + if (!CheckLoad(S, OpPC, PassThrough)) return false; } else { // Pop source only SrcPd = S.Stk.pop(); } - + if (!CheckLoad(S, OpPC, SrcPd)) return false; @@ -3508,7 +3511,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, } } - // Convert double to float for enabled elements (only process source elements that exist) + // Convert double to float for enabled elements (only process source elements + // that exist) for (unsigned I = 0; I != SrcElems; ++I) { if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0)) continue; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3778b0af80b5a..0a9776cb9ee87 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12951,7 +12951,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } - case X86::BI__builtin_ia32_cvtsd2ss: { APValue VecA, VecB; if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index edcbdba908522..f019fb45e2eea 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3489,10 +3489,9 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) { static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) { - return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, - (__v8sf) _mm256_setzero_ps (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m256)__builtin_ia32_cvtpd2ps512_mask( + (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR From 154eea17cd5f9db2de7a6e366cbda917e867d716 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 14:39:09 +0200 Subject: [PATCH 13/25] Fixed The Formates! --- clang/lib/Headers/avx512fintrin.h | 8 ++++---- clang/lib/Headers/avxintrin.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index f019fb45e2eea..88dfb2fa29878 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3487,8 +3487,8 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) { (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_cvtpd_ps(__m512d __A) { +static __inline__ __m256 +__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) { return (__m256)__builtin_ia32_cvtpd2ps512_mask( (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); @@ -5374,8 +5374,8 @@ _mm512_kmov (__mmask16 __A) ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sll_epi32(__m512i __A, __m128i __B) { +static __inline__ __m512i +__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); } diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 126ba30bcca7e..9b45bc3e56bdb 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3605,8 +3605,7 @@ _mm256_undefined_pd(void) /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit vector of [8 x float] containing undefined values. -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_undefined_ps(void) { +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void) { return (__m256)__builtin_ia32_undef256(); } From c5ecb01aa231e7fefbb09a5a6d90dff60963426a Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 14:48:02 +0200 Subject: [PATCH 14/25] Formatted avx512f header using the projects current clang-format --- clang/lib/Headers/avx512fintrin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 88dfb2fa29878..7dbf137d8cac8 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3487,8 +3487,8 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) { (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m256 -__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) { +static __inline__ __m256 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) { return (__m256)__builtin_ia32_cvtpd2ps512_mask( (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); @@ -5374,8 +5374,8 @@ _mm512_kmov (__mmask16 __A) ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) #endif -static __inline__ __m512i -__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) { +static __inline__ __m512i + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); } From 28a823a970255496b48629ebad12cd313cfc71a7 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain <53662962+HamzaHassanain@users.noreply.github.com> Date: Mon, 1 Dec 2025 17:55:38 +0200 Subject: [PATCH 15/25] Update clang/lib/AST/ByteCode/InterpBuiltin.cpp Co-authored-by: Timm Baeder --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 5e8b8e0e31bb6..819c3f27239b3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3404,7 +3404,7 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, unsigned NumElems = DstVTy->getNumElements(); const Pointer &Dst = S.Stk.peek(); - // Copy all elements from A to Dst + // Copy all elements from A to Dst. for (unsigned I = 0; I != NumElems; ++I) Dst.elem(I) = A.elem(I); From b9b71bd174b4628d4c025c67fc6ca1c5a030ae73 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 22:45:01 +0200 Subject: [PATCH 16/25] Did the Reuqested Changes --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 70 +++++++++++------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 5e8b8e0e31bb6..32238c877aad9 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3404,13 +3404,13 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, unsigned NumElems = DstVTy->getNumElements(); const Pointer &Dst = S.Stk.peek(); - // Copy all elements from A to Dst - for (unsigned I = 0; I != NumElems; ++I) + // Copy all elements except lane 0 (overwritten below) from A to Dst. + for (unsigned I = 1; I < NumElems; ++I) Dst.elem(I) = A.elem(I); - // Convert element 0 from double to float + // Convert element 0 from double to float. Floating Conv = S.allocFloat( - S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy)); + S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType())); APFloat SrcD = B.elem(0).getAPFloat(); if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call)) return false; @@ -3425,9 +3425,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, const CallExpr *Call) { assert(Call->getNumArgs() == 5); - // Pop in reverse order: rounding, mask, src, b, a - APSInt Rounding = popToAPSInt(S, Call->getArg(4)->getType()); - APSInt MaskInt = popToAPSInt(S, Call->getArg(3)->getType()); + APSInt Rounding = popToAPSInt(S, Call->getArg(4)); + APSInt MaskInt = popToAPSInt(S, Call->getArg(3)); const Pointer &Src = S.Stk.pop(); const Pointer &B = S.Stk.pop(); const Pointer &A = S.Stk.pop(); @@ -3439,17 +3438,17 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, unsigned NumElems = DstVTy->getNumElements(); const Pointer &Dst = S.Stk.peek(); - // Copy all elements from A to Dst - for (unsigned I = 0; I != NumElems; ++I) + // Copy all elements except lane 0 (overwritten below) from A to Dst. + for (unsigned I = 1; I < NumElems; ++I) Dst.elem(I) = A.elem(I); // If mask bit 0 is set, convert element 0 from double to float; otherwise use - // Src + // Src. if (MaskInt.getZExtValue() & 0x1) { Floating Conv = S.allocFloat( - S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy)); - APFloat SrcD = B.elem(0).getAPFloat(); - if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call)) + S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType())); + APFloat Src = B.elem(0).getAPFloat(); + if (!convertDoubleToFloatStrict(Src, Conv, S, Call)) return false; Dst.elem(0) = Conv; } else { @@ -3467,60 +3466,55 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); - APSInt MaskVal(1, false); + APSInt MaskVal; Pointer PassThrough; - Pointer SrcPd; + Pointer Src; APSInt Rounding; if (IsMasked) { - // Pop in reverse order + // Pop in reverse order. if (HasRounding) { - // For 512: rounding, mask, passthrough, source - Rounding = popToAPSInt(S, Call->getArg(3)->getType()); - MaskVal = popToAPSInt(S, Call->getArg(2)->getType()); + Rounding = popToAPSInt(S, Call->getArg(3)); + MaskVal = popToAPSInt(S, Call->getArg(2)); PassThrough = S.Stk.pop(); - SrcPd = S.Stk.pop(); + Src = S.Stk.pop(); } else { - // For VL: mask, passthrough, source MaskVal = popToAPSInt(S, Call->getArg(2)->getType()); PassThrough = S.Stk.pop(); - SrcPd = S.Stk.pop(); + Src = S.Stk.pop(); } if (!CheckLoad(S, OpPC, PassThrough)) return false; } else { - // Pop source only - SrcPd = S.Stk.pop(); + // Pop source only. + Src = S.Stk.pop(); } - if (!CheckLoad(S, OpPC, SrcPd)) + if (!CheckLoad(S, OpPC, Src)) return false; const auto *RetVTy = Call->getType()->castAs(); unsigned RetElems = RetVTy->getNumElements(); - unsigned SrcElems = SrcPd.getNumElems(); + unsigned SrcElems = Src.getNumElems(); const Pointer &Dst = S.Stk.peek(); - // Initialize destination with passthrough or zeros - for (unsigned I = 0; I != RetElems; ++I) { - if (IsMasked) { + // Initialize destination with passthrough or zeros. + for (unsigned I = 0; I != RetElems; ++I) + if (IsMasked) Dst.elem(I) = PassThrough.elem(I); - } else { + else Dst.elem(I) = Floating(APFloat(0.0f)); - } - } - // Convert double to float for enabled elements (only process source elements - // that exist) + // Convert double to float for enabled elements (only process source elements that exist). for (unsigned I = 0; I != SrcElems; ++I) { - if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0)) + if (IsMasked && !MaskVal[I]) continue; - APFloat SrcD = SrcPd.elem(I).getAPFloat(); + APFloat Src = Src.elem(I).getAPFloat(); Floating Conv = S.allocFloat( - S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy)); - if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call)) + S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType())); + if (!convertDoubleToFloatStrict(Src, Conv, S, Call)) return false; Dst.elem(I) = Conv; } From 21ab33c3ece7a2daba30e7adc4f6e52672bf2e6a Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Mon, 1 Dec 2025 22:45:37 +0200 Subject: [PATCH 17/25] Formated The InterpBuiltin --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 32238c877aad9..89eae9369eb80 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3506,7 +3506,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, else Dst.elem(I) = Floating(APFloat(0.0f)); - // Convert double to float for enabled elements (only process source elements that exist). + // Convert double to float for enabled elements (only process source elements + // that exist). for (unsigned I = 0; I != SrcElems; ++I) { if (IsMasked && !MaskVal[I]) continue; From 4957b30793f4965f3afd24fd96bb0adc4663bb88 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Tue, 2 Dec 2025 11:13:19 +0200 Subject: [PATCH 18/25] fixed a naming confilcts --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 89eae9369eb80..aabf0b8fc4f03 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3411,8 +3411,8 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, // Convert element 0 from double to float. Floating Conv = S.allocFloat( S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType())); - APFloat SrcD = B.elem(0).getAPFloat(); - if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call)) + APFloat SrcVal = B.elem(0).getAPFloat(); + if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) return false; Dst.elem(0) = Conv; @@ -3447,8 +3447,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, if (MaskInt.getZExtValue() & 0x1) { Floating Conv = S.allocFloat( S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType())); - APFloat Src = B.elem(0).getAPFloat(); - if (!convertDoubleToFloatStrict(Src, Conv, S, Call)) + APFloat SrcVal = B.elem(0).getAPFloat(); + if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) return false; Dst.elem(0) = Conv; } else { @@ -3512,10 +3512,10 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, if (IsMasked && !MaskVal[I]) continue; - APFloat Src = Src.elem(I).getAPFloat(); + APFloat SrcVal = Src.elem(I).getAPFloat(); Floating Conv = S.allocFloat( S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType())); - if (!convertDoubleToFloatStrict(Src, Conv, S, Call)) + if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) return false; Dst.elem(I) = Conv; } From 8b786f02cb2f512969f9b6965f918447f2dd5f8a Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Tue, 2 Dec 2025 11:25:07 +0200 Subject: [PATCH 19/25] added assertion on getElementType() and getASTContext().FloatTy --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index aabf0b8fc4f03..b92454d49bfa8 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3445,6 +3445,10 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, // If mask bit 0 is set, convert element 0 from double to float; otherwise use // Src. if (MaskInt.getZExtValue() & 0x1) { + + assert(S.getASTContext().FloatTy == DstVTy->getElementType() && + "cvtsd2ss requires float element type in destination vector"); + Floating Conv = S.allocFloat( S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType())); APFloat SrcVal = B.elem(0).getAPFloat(); @@ -3506,6 +3510,9 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, else Dst.elem(I) = Floating(APFloat(0.0f)); + assert(S.getASTContext().FloatTy == RetVTy->getElementType() && + "cvtpd2ps requires float element type in return vector"); + // Convert double to float for enabled elements (only process source elements // that exist). for (unsigned I = 0; I != SrcElems; ++I) { @@ -3513,6 +3520,7 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, continue; APFloat SrcVal = Src.elem(I).getAPFloat(); + Floating Conv = S.allocFloat( S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType())); if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) From 2bab71ecc6aa5a22c129d678dde6e93ef9ec9e41 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Tue, 2 Dec 2025 11:39:33 +0200 Subject: [PATCH 20/25] Ran The formatter Again --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b92454d49bfa8..9a2fe83e98a1e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3511,8 +3511,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, Dst.elem(I) = Floating(APFloat(0.0f)); assert(S.getASTContext().FloatTy == RetVTy->getElementType() && - "cvtpd2ps requires float element type in return vector"); - + "cvtpd2ps requires float element type in return vector"); + // Convert double to float for enabled elements (only process source elements // that exist). for (unsigned I = 0; I != SrcElems; ++I) { From cc1dadad7570c092019ddbc7d8eb3b308e5cb42e Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Tue, 2 Dec 2025 12:33:52 +0200 Subject: [PATCH 21/25] Did the requested changes --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 9a2fe83e98a1e..44c5a66ad3431 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3439,7 +3439,7 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, const Pointer &Dst = S.Stk.peek(); // Copy all elements except lane 0 (overwritten below) from A to Dst. - for (unsigned I = 1; I < NumElems; ++I) + for (unsigned I = 1; I != NumElems; ++I) Dst.elem(I) = A.elem(I); // If mask bit 0 is set, convert element 0 from double to float; otherwise use @@ -3483,7 +3483,7 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, PassThrough = S.Stk.pop(); Src = S.Stk.pop(); } else { - MaskVal = popToAPSInt(S, Call->getArg(2)->getType()); + MaskVal = popToAPSInt(S, Call->getArg(2)); PassThrough = S.Stk.pop(); Src = S.Stk.pop(); } @@ -5346,6 +5346,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_cvtsd2ss_round_mask: return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call); + case X86::BI__builtin_ia32_cvtpd2ps: case X86::BI__builtin_ia32_cvtpd2ps256: case X86::BI__builtin_ia32_cvtpd2ps_mask: From b2b68d9166fdcb0af6e2cc7a3cea82e320036ca3 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain <53662962+HamzaHassanain@users.noreply.github.com> Date: Tue, 2 Dec 2025 12:45:36 +0200 Subject: [PATCH 22/25] Fix loop condition in element copy operation --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 44c5a66ad3431..4e85ba020fe07 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3405,7 +3405,7 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, const Pointer &Dst = S.Stk.peek(); // Copy all elements except lane 0 (overwritten below) from A to Dst. - for (unsigned I = 1; I < NumElems; ++I) + for (unsigned I = 1; I != NumElems; ++I) Dst.elem(I) = A.elem(I); // Convert element 0 from double to float. From c430491e46b9477d2fa6bfb9bc0c99b1b211f652 Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Tue, 2 Dec 2025 16:13:22 +0200 Subject: [PATCH 23/25] Fixed The Whitespace --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 44c5a66ad3431..314b5258f1a4d 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -5346,7 +5346,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_cvtsd2ss_round_mask: return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call); - + case X86::BI__builtin_ia32_cvtpd2ps: case X86::BI__builtin_ia32_cvtpd2ps256: case X86::BI__builtin_ia32_cvtpd2ps_mask: From 88488aa890ea5e37b06cf186de4701a46b43764e Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Tue, 2 Dec 2025 19:27:58 +0200 Subject: [PATCH 24/25] Did the required changes --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 84 +++++++++--------------- 1 file changed, 31 insertions(+), 53 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6104e331468a0..438c64e919b82 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3392,47 +3392,28 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, return true; } static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, - const CallExpr *Call) { - assert(Call->getNumArgs() == 2); - - const Pointer &B = S.Stk.pop(); - const Pointer &A = S.Stk.pop(); - if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B)) - return false; - - const auto *DstVTy = Call->getType()->castAs(); - unsigned NumElems = DstVTy->getNumElements(); - const Pointer &Dst = S.Stk.peek(); - - // Copy all elements except lane 0 (overwritten below) from A to Dst. - for (unsigned I = 1; I != NumElems; ++I) - Dst.elem(I) = A.elem(I); - - // Convert element 0 from double to float. - Floating Conv = S.allocFloat( - S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType())); - APFloat SrcVal = B.elem(0).getAPFloat(); - if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) - return false; - Dst.elem(0) = Conv; - - Dst.initializeAllElements(); - return true; -} - -static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, - CodePtr OpPC, - const CallExpr *Call) { - assert(Call->getNumArgs() == 5); - - APSInt Rounding = popToAPSInt(S, Call->getArg(4)); - APSInt MaskInt = popToAPSInt(S, Call->getArg(3)); - const Pointer &Src = S.Stk.pop(); - const Pointer &B = S.Stk.pop(); - const Pointer &A = S.Stk.pop(); - if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || - !CheckLoad(S, OpPC, Src)) - return false; + const CallExpr *Call, bool HasMask, + bool HasRounding) { + APSInt Rounding, MaskInt; + Pointer Src, B, A; + + if (HasMask) { + assert(Call->getNumArgs() == 5); + Rounding = popToAPSInt(S, Call->getArg(4)); + MaskInt = popToAPSInt(S, Call->getArg(3)); + Src = S.Stk.pop(); + B = S.Stk.pop(); + A = S.Stk.pop(); + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || + !CheckLoad(S, OpPC, Src)) + return false; + } else { + assert(Call->getNumArgs() == 2); + B = S.Stk.pop(); + A = S.Stk.pop(); + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B)) + return false; + } const auto *DstVTy = Call->getType()->castAs(); unsigned NumElems = DstVTy->getNumElements(); @@ -3442,10 +3423,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, for (unsigned I = 1; I != NumElems; ++I) Dst.elem(I) = A.elem(I); - // If mask bit 0 is set, convert element 0 from double to float; otherwise use - // Src. - if (MaskInt.getZExtValue() & 0x1) { - + // Convert element 0 from double to float, or use Src if masked off. + if (!HasMask || (MaskInt.getZExtValue() & 0x1)) { assert(S.getASTContext().FloatTy == DstVTy->getElementType() && "cvtsd2ss requires float element type in destination vector"); @@ -3464,11 +3443,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S, } static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, - const CallExpr *Call, - unsigned BuiltinID) { - bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || - BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); - bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); + const CallExpr *Call, bool IsMasked, + bool HasRounding) { APSInt MaskVal; Pointer PassThrough; @@ -5342,16 +5318,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID); case X86::BI__builtin_ia32_cvtsd2ss: - return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call); + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false, false); case X86::BI__builtin_ia32_cvtsd2ss_round_mask: - return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call); + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true, true); case X86::BI__builtin_ia32_cvtpd2ps: case X86::BI__builtin_ia32_cvtpd2ps256: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, false, false); case X86::BI__builtin_ia32_cvtpd2ps_mask: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, false); case X86::BI__builtin_ia32_cvtpd2ps512_mask: - return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, BuiltinID); + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, true); case X86::BI__builtin_ia32_cmpb128_mask: case X86::BI__builtin_ia32_cmpw128_mask: From a08303def439deae76e8ff336f2e00b4fcd6bbbb Mon Sep 17 00:00:00 2001 From: Hamza Hassanain Date: Tue, 2 Dec 2025 22:04:24 +0200 Subject: [PATCH 25/25] Did the HasRoundingMask change --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 438c64e919b82..9a301ffcf28ed 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3392,12 +3392,12 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, return true; } static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, - const CallExpr *Call, bool HasMask, - bool HasRounding) { + const CallExpr *Call, + bool HasRoundingMask) { APSInt Rounding, MaskInt; Pointer Src, B, A; - if (HasMask) { + if (HasRoundingMask) { assert(Call->getNumArgs() == 5); Rounding = popToAPSInt(S, Call->getArg(4)); MaskInt = popToAPSInt(S, Call->getArg(3)); @@ -3424,7 +3424,7 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, Dst.elem(I) = A.elem(I); // Convert element 0 from double to float, or use Src if masked off. - if (!HasMask || (MaskInt.getZExtValue() & 0x1)) { + if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) { assert(S.getASTContext().FloatTy == DstVTy->getElementType() && "cvtsd2ss requires float element type in destination vector"); @@ -5318,10 +5318,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID); case X86::BI__builtin_ia32_cvtsd2ss: - return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false, false); + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false); case X86::BI__builtin_ia32_cvtsd2ss_round_mask: - return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true, true); + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true); case X86::BI__builtin_ia32_cvtpd2ps: case X86::BI__builtin_ia32_cvtpd2ps256: