From 29e2794651c50ccf60a28c2e08639913a68cd71c Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Wed, 26 Nov 2025 17:05:45 +0200
Subject: [PATCH 01/25] add tests that should pass:
 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp

---
 .../constexpr-x86-intrinsics-pd2ps.cpp        | 120 ++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp

diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
new file mode 100644
index 0000000000000..a082b23bfae03
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
+
+// HACK: Prevent immintrin.h from pulling in standard library headers
+// that don't exist in this test environment.
+#define __MM_MALLOC_H
+
+#include <immintrin.h>
+
+namespace ExactFinite {
+constexpr __m128d d2 = { -1.0, +2.0 };
+constexpr __m128 r128 = _mm_cvtpd_ps(d2);
+static_assert(r128[0] == -1.0f && r128[1] == +2.0f, "");
+static_assert(r128[2] == 0.0f && r128[3] == 0.0f, "");
+
+constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2);
+static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, "");
+static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, "");
+
+constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2);
+static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, "");
+
+constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2);
+static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, "");
+static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, "");
+
+constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r256 = _mm256_cvtpd_ps(d4);
+static_assert(r256[0] == 0.0f && r256[1] == -1.0f, "");
+static_assert(r256[2] == +2.0f && r256[3] == +3.5f, "");
+
+constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r512 = _mm512_cvtpd_ps(d8);
+static_assert(r512[0] == -1.0f && r512[7] == +128.0f, "");
+
+constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8);
+static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, "");
+static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, "");
+
+constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8);
+static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, "");
+static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, "");
+
+constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8);
+static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, "");
+static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, "");
+
+constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8);
+static_assert(r512lo_mask[0] == -1.0f, "");
+static_assert(r512lo_mask[1] == +2.0f, "");
+static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, "");
+
+constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b_ss = { -1.0, 42.0 };
+constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss);
+static_assert(r_ss[0] == -1.0f, "");
+static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, "");
+
+constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss);
+static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, "");
+constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss);
+static_assert(r_ss_mask_off[0] == 9.0f, "");
+constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss);
+static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, "");
+}
+
+namespace InexactOrSpecialReject {
+constexpr __m128d inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an integral constant expression}}
+static_assert(r_inexact[0] == 1.0f, "");           // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dinf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral constant expression}}
+static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dnan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral constant expression}}
+static_assert(r_nan[0] != r_nan[0], "");  // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dsub = { 1e-310, 0.0 };
+constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral constant expression}}
+static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 };
+constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // both-error {{not an integral constant expression}}
+static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+}
+
+namespace MaskedSpecialCasesAllowed {
+constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact);
+static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, "");
+
+constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact);
+static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, "");
+
+constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 };
+constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact);
+static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && ok256m[3] == 9.0f, "");
+
+constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact);
+static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && ok256z[3] == 0.0f, "");
+
+constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 1.0000000000000002, 64.0, 128.0 };
+constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact);
+static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, "");
+
+constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact);
+static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, "");
+
+constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // both-error {{not an integral constant expression}}
+static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+}

From 30c0dc75714191e31625bb074e6e62d54aeece7f Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Wed, 26 Nov 2025 22:20:48 +0200
Subject: [PATCH 02/25] added __DEFAULT_FN_ATTRS_CONSTEXPR To Headers

---
 clang/lib/Headers/avx512fintrin.h  | 16 ++++++++--------
 clang/lib/Headers/avx512vlintrin.h |  8 ++++----
 clang/lib/Headers/avxintrin.h      |  4 ++--
 clang/lib/Headers/emmintrin.h      |  4 ++--
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e1de56069870b..b9f1d1eecc09f 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
   return (__m512)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_undefined_ps(void)
 {
   return (__m512)__builtin_ia32_undef512();
@@ -3489,7 +3489,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
                                            (__v8sf)_mm256_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_cvtpd_ps (__m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3498,7 +3498,7 @@ _mm512_cvtpd_ps (__m512d __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3507,7 +3507,7 @@ _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3516,7 +3516,7 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_cvtpd_pslo (__m512d __A)
 {
   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
@@ -3524,7 +3524,7 @@ _mm512_cvtpd_pslo (__m512d __A)
                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
 {
   return (__m512) __builtin_shufflevector (
@@ -8672,7 +8672,7 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
                                               (__v4sf)_mm_setzero_ps(), \
                                               (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
 {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
@@ -8681,7 +8681,7 @@ _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
 {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 99c057030a4cc..82a06edd28ba2 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -1791,14 +1791,14 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
             (__v4sf) __W,
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
             (__v4sf)
@@ -1806,14 +1806,14 @@ _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtpd_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtpd_ps(__A),
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 44ef88db5cbce..f3f444083edbf 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2190,7 +2190,7 @@ _mm256_cvtepi32_ps(__m256i __a) {
 /// \param __a
 ///    A 256-bit vector of [4 x double].
 /// \returns A 128-bit vector of [4 x float] containing the converted values.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_cvtpd_ps(__m256d __a)
 {
   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
@@ -3610,7 +3610,7 @@ _mm256_undefined_pd(void)
 /// This intrinsic has no corresponding instruction.
 ///
 /// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_undefined_ps(void)
 {
   return (__m256)__builtin_ia32_undef256();
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index dbe5ca0379cf5..1701effedc5ce 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -1279,7 +1279,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a,
 ///    A 128-bit vector of [2 x double].
 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
 ///    converted values. The upper 64 bits are set to zero.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtpd_ps(__m128d __a) {
   return __builtin_ia32_cvtpd2ps((__v2df)__a);
 }
 
@@ -1384,7 +1384,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) {
 /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
 ///    converted value from the second parameter. The upper 96 bits are copied
 ///    from the upper 96 bits of the first parameter.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a,
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsd_ss(__m128 __a,
                                                          __m128d __b) {
   return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
 }

From 9f1020ecf3a706df9537b38464b61748aa0278f0 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Wed, 26 Nov 2025 22:24:54 +0200
Subject: [PATCH 03/25] added Constexpr to necessary builtins

---
 clang/include/clang/Basic/BuiltinsX86.td | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4aa3d51931980..283a0a3e6ae0c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -24,12 +24,12 @@ def undef128 : X86Builtin<"_Vector<2, double>()"> {
   let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
 }
 
-def undef256 : X86Builtin<"_Vector<4, double>()"> {
-  let Attributes = [Const, NoThrow, RequiredVectorWidth<256>];
+def undef256 : X86Builtin<"_Vector<4, double>()"  > {
+  let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>];
 }
 
 def undef512 : X86Builtin<"_Vector<8, double>()"> {
-  let Attributes = [Const, NoThrow, RequiredVectorWidth<512>];
+  let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>];
 }
 
 // FLAGS
@@ -168,7 +168,7 @@ let Features = "sse2", Attributes = [NoThrow] in {
   def movnti : X86Builtin<"void(int *, int)">;
 }
 
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
   def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
   def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -468,7 +468,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
   def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
 }
 
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
   def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
   def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -1009,7 +1009,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
   def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
   def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
   def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
@@ -1457,7 +1457,7 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
   def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
   def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
   def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
@@ -3301,7 +3301,7 @@ let Features = "avx512bw,avx512vl",
   def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
   def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
   def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;

From d28d6d8c7cc6e816f772a78dd0d177f0248d3178 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Sat, 29 Nov 2025 11:08:42 +0200
Subject: [PATCH 04/25] added FULL tests for pd2ps constexpr

---
 .../constexpr-x86-intrinsics-pd2ps.cpp        | 559 ++++++++++++++----
 1 file changed, 459 insertions(+), 100 deletions(-)

diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
index a082b23bfae03..4a1e9a9c5ae2c 100644
--- a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -1,120 +1,479 @@
 // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
 
-// HACK: Prevent immintrin.h from pulling in standard library headers
-// that don't exist in this test environment.
-#define __MM_MALLOC_H
-
+#define __MM_MALLOC_H 
 #include <immintrin.h>
 
-namespace ExactFinite {
-constexpr __m128d d2 = { -1.0, +2.0 };
-constexpr __m128 r128 = _mm_cvtpd_ps(d2);
-static_assert(r128[0] == -1.0f && r128[1] == +2.0f, "");
-static_assert(r128[2] == 0.0f && r128[3] == 0.0f, "");
-
-constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2);
-static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, "");
-static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, "");
-
-constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2);
-static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, "");
-
-constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2);
-static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, "");
-static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, "");
-
-constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r256 = _mm256_cvtpd_ps(d4);
-static_assert(r256[0] == 0.0f && r256[1] == -1.0f, "");
-static_assert(r256[2] == +2.0f && r256[3] == +3.5f, "");
-
-constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r512 = _mm512_cvtpd_ps(d8);
-static_assert(r512[0] == -1.0f && r512[7] == +128.0f, "");
-
-constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8);
-static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, "");
-static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, "");
-
-constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8);
-static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, "");
-static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, "");
-
-constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8);
-static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, "");
-static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, "");
-
-constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
-                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8);
-static_assert(r512lo_mask[0] == -1.0f, "");
-static_assert(r512lo_mask[1] == +2.0f, "");
-static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, "");
-
-constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128d b_ss = { -1.0, 42.0 };
-constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss);
-static_assert(r_ss[0] == -1.0f, "");
-static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, "");
+namespace Test_mm_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, "");
+}
+namespace Inexact {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
 
-constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss);
-static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, "");
-constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss);
-static_assert(r_ss_mask_off[0] == 9.0f, "");
-constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss);
-static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, "");
+namespace Test_mm_mask_cvtsd_ss {
+namespace OK {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, "");
+}
+namespace MaskOff {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b);
+static_assert(r[0] == 9.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
+static_assert(r[0] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace MaskOnSubnormal {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_sub = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
 }
 
-namespace InexactOrSpecialReject {
-constexpr __m128d inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an integral constant expression}}
-static_assert(r_inexact[0] == 1.0f, "");           // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_maskz_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f, "");
+}
+namespace MaskOff {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b);
+static_assert(r[0] == 0.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b_inexact);
+static_assert(r[0] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+}
 
-constexpr __m128d dinf = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral constant expression}}
-static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
+}
+namespace Inexact {
+constexpr __m128d a = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128d a = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128d a = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128d a = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}}
+}
+}
 
-constexpr __m128d dnan = { __builtin_nan(""), 0.0 };
-constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral constant expression}}
-static_assert(r_nan[0] != r_nan[0], "");  // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x3, a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
+}
+namespace Partial {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a);
+static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a_inexact);
+static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}}
+}
+}
 
-constexpr __m128d dsub = { 1e-310, 0.0 };
-constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral constant expression}}
-static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a);
+static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
+static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a_inexact);
+static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}}
+}
+}
 
-constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 };
-constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // both-error {{not an integral constant expression}}
-static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm256_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+static_assert(r[0] == 0.0f && r[1] == -1.0f, "");
+static_assert(r[2] == +2.0f && r[3] == +3.5f, "");
+}
+namespace Inexact {
+constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
 }
 
-namespace MaskedSpecialCasesAllowed {
-constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact);
-static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, "");
+namespace Test_mm256_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0xF, a);
+static_assert(r[0] == 0.0f && r[1] == -1.0f && r[2] == +2.0f && r[3] == +3.5f, "");
+}
+namespace MaskOffInf {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
 
-constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact);
-static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, "");
+namespace Test_mm256_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x5, a);
+static_assert(r[0] == 0.0f && r[1] == 0.0f && r[2] == +2.0f && r[3] == 0.0f, "");
+}
+namespace MaskOffInf {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
 
-constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 };
-constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact);
-static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && ok256m[3] == 9.0f, "");
+namespace Test_mm512_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+}
+namespace Inexact {
+constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
+}
 
-constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact);
-static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && ok256z[3] == 0.0f, "");
+namespace Test_mm512_mask_cvtpd_ps {
+namespace OK {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x05, a);
+static_assert(r[0] == -1.0f && r[2] == +4.0f, "");
+static_assert(r[1] == 9.0f && r[3] == 9.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact);
+static_assert(r[0] == -1.0f && r[5] == 9.0f && r[6] == 64.0f && r[7] == 128.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_inf);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_nan);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
 
-constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 1.0000000000000002, 64.0, 128.0 };
-constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact);
-static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, "");
+namespace Test_mm512_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x81, a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+static_assert(r[1] == 0.0f && r[6] == 0.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0b11011111, a_inexact);
+static_assert(r[0] == -1.0f && r[5] == 0.0f && r[6] == 64.0f && r[7] == 128.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_inf);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_nan);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
 
-constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact);
-static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, "");
+namespace Test_mm512_cvtpd_pslo {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_cvtpd_pslo(a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+static_assert(r[8] == 0.0f && r[15] == 0.0f, "");
+}
+}
 
-constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // both-error {{not an integral constant expression}}
-static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm512_mask_cvtpd_pslo {
+namespace OK {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a_inf);
+static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 9.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x7, a_nan);
+static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 4.0f && r[3] == 9.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
 }

From 4a2f59bee574efec48ac87e74dae356dc72fb2ae Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Sat, 29 Nov 2025 11:09:57 +0200
Subject: [PATCH 05/25] fully implmeneted features in ExprConstant visiting
 logic

---
 clang/lib/AST/ExprConstant.cpp | 143 +++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..065d5c2e33a9c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12165,7 +12165,37 @@ static bool evalShuffleGeneric(
   Out = APValue(ResultElements.data(), ResultElements.size());
   return true;
 }
+static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
+                                       APFloat OrigVal, APValue &Result) {
 
+  if (OrigVal.isInfinity()) {
+    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0; 
+    return false;
+  }
+  if (OrigVal.isNaN()) {
+    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 1;
+    return false;
+  }
+
+  APFloat Val = OrigVal; 
+  bool LosesInfo = false;
+  APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(),
+                                         APFloat::rmNearestTiesToEven,
+                                         &LosesInfo);
+
+  if(LosesInfo || Val.isDenormal()) {
+    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
+    return false;
+  }
+
+  if(Status != APFloat::opOK) {
+    Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
+    return false;
+  }
+
+  Result = APValue(Val);
+  return true;
+}
 bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   if (!IsConstantEvaluatedBuiltinCall(E))
     return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -12878,6 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+
+    case X86::BI__builtin_ia32_cvtsd2ss: {
+      APValue VecA, VecB;
+      if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+          !EvaluateAsRValue(Info, E->getArg(1), VecB))
+        return false;
+
+      SmallVector<APValue, 4> Elements;
+
+      APValue ResultVal;
+      if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
+        return false;
+    
+      Elements.push_back(ResultVal);
+
+      unsigned NumEltsA = VecA.getVectorLength();
+      for (unsigned I = 1; I < NumEltsA; ++I) {
+          Elements.push_back(VecA.getVectorElt(I));
+      }
+
+      return Success(Elements, E);
+    }
+    case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+      APValue VecA, VecB, VecSrc, MaskValue;
+
+      if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+          !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+          !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+          !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+        return false;
+
+      unsigned Mask = MaskValue.getInt().getZExtValue();
+      SmallVector<APValue, 4> Elements;
+
+
+      if (Mask & 1) {
+        APValue ResultVal;
+        if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
+           return false;
+        Elements.push_back(ResultVal);
+      } else {
+        Elements.push_back(VecSrc.getVectorElt(0));
+      }
+
+      unsigned NumEltsA = VecA.getVectorLength();
+      for (unsigned I = 1; I < NumEltsA; ++I) {
+          Elements.push_back(VecA.getVectorElt(I));
+      }
+
+      return Success(Elements, E);
+    }
+    case X86::BI__builtin_ia32_cvtpd2ps:
+    case X86::BI__builtin_ia32_cvtpd2ps256: 
+    case X86::BI__builtin_ia32_cvtpd2ps_mask:     
+    case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
+
+
+      const auto BuiltinID = E->getBuiltinCallee();
+      bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || 
+                       BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+
+      APValue InputValue;
+      if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+        return false;
+
+      APValue MergeValue;
+      unsigned Mask = 0xFFFFFFFF;
+      bool NeedsMerge = false;
+      if (IsMasked) {
+          APValue MaskValue;
+          if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+              return false;
+          Mask = MaskValue.getInt().getZExtValue();
+          auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
+          for (unsigned I = 0; I < NumEltsResult; ++I) {
+            if (!((Mask >> I) & 1)) {
+              NeedsMerge = true;
+              break;
+            }
+          }
+          if (NeedsMerge) {
+            if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+              return false;
+          }
+      }
+
+      unsigned NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
+      unsigned NumEltsInput = InputValue.getVectorLength();
+      SmallVector<APValue, 8> Elements;
+      for (unsigned I = 0; I < NumEltsResult; ++I) {
+        if (IsMasked && !((Mask >> I) & 1)) {
+            if (!NeedsMerge) {
+              return false;
+            }
+            Elements.push_back(MergeValue.getVectorElt(I));
+            continue; 
+        }
+
+        if (I >= NumEltsInput) {
+           Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+           continue;
+        }
+
+        APValue ResultVal;
+        if (!ConvertDoubleToFloatStrict(Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
+           return false;
+        
+        Elements.push_back(ResultVal);
+      }
+      return Success(Elements, E);
+    }
+
+  
   case X86::BI__builtin_ia32_shufps:
   case X86::BI__builtin_ia32_shufps256:
   case X86::BI__builtin_ia32_shufps512: {

From 0fb3292fe860e30de61d2df3a90912f27f04f143 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Sat, 29 Nov 2025 11:21:17 +0200
Subject: [PATCH 06/25] Ran the git clang-format command

---
 clang/lib/AST/ExprConstant.cpp | 192 ++++++++++++++++-----------------
 1 file changed, 96 insertions(+), 96 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 065d5c2e33a9c..6f512dd538e7d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12169,7 +12169,7 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
                                        APFloat OrigVal, APValue &Result) {
 
   if (OrigVal.isInfinity()) {
-    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0; 
+    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
     return false;
   }
   if (OrigVal.isNaN()) {
@@ -12177,18 +12177,17 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
     return false;
   }
 
-  APFloat Val = OrigVal; 
+  APFloat Val = OrigVal;
   bool LosesInfo = false;
-  APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(),
-                                         APFloat::rmNearestTiesToEven,
-                                         &LosesInfo);
+  APFloat::opStatus Status = Val.convert(
+      APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
 
-  if(LosesInfo || Val.isDenormal()) {
+  if (LosesInfo || Val.isDenormal()) {
     Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
     return false;
   }
 
-  if(Status != APFloat::opOK) {
+  if (Status != APFloat::opOK) {
     Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
     return false;
   }
@@ -12909,118 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
-    case X86::BI__builtin_ia32_cvtsd2ss: {
-      APValue VecA, VecB;
-      if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
-          !EvaluateAsRValue(Info, E->getArg(1), VecB))
-        return false;
+  case X86::BI__builtin_ia32_cvtsd2ss: {
+    APValue VecA, VecB;
+    if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+        !EvaluateAsRValue(Info, E->getArg(1), VecB))
+      return false;
 
-      SmallVector<APValue, 4> Elements;
+    SmallVector<APValue, 4> Elements;
 
-      APValue ResultVal;
-      if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
-        return false;
-    
-      Elements.push_back(ResultVal);
+    APValue ResultVal;
+    if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+                                    ResultVal))
+      return false;
 
-      unsigned NumEltsA = VecA.getVectorLength();
-      for (unsigned I = 1; I < NumEltsA; ++I) {
-          Elements.push_back(VecA.getVectorElt(I));
-      }
+    Elements.push_back(ResultVal);
 
-      return Success(Elements, E);
+    unsigned NumEltsA = VecA.getVectorLength();
+    for (unsigned I = 1; I < NumEltsA; ++I) {
+      Elements.push_back(VecA.getVectorElt(I));
     }
-    case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
-      APValue VecA, VecB, VecSrc, MaskValue;
-
-      if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
-          !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
-          !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
-          !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
-        return false;
 
-      unsigned Mask = MaskValue.getInt().getZExtValue();
-      SmallVector<APValue, 4> Elements;
+    return Success(Elements, E);
+  }
+  case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+    APValue VecA, VecB, VecSrc, MaskValue;
 
+    if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+        !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+        !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+        !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+      return false;
 
-      if (Mask & 1) {
-        APValue ResultVal;
-        if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
-           return false;
-        Elements.push_back(ResultVal);
-      } else {
-        Elements.push_back(VecSrc.getVectorElt(0));
-      }
+    unsigned Mask = MaskValue.getInt().getZExtValue();
+    SmallVector<APValue, 4> Elements;
 
-      unsigned NumEltsA = VecA.getVectorLength();
-      for (unsigned I = 1; I < NumEltsA; ++I) {
-          Elements.push_back(VecA.getVectorElt(I));
-      }
+    if (Mask & 1) {
+      APValue ResultVal;
+      if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+                                      ResultVal))
+        return false;
+      Elements.push_back(ResultVal);
+    } else {
+      Elements.push_back(VecSrc.getVectorElt(0));
+    }
 
-      return Success(Elements, E);
+    unsigned NumEltsA = VecA.getVectorLength();
+    for (unsigned I = 1; I < NumEltsA; ++I) {
+      Elements.push_back(VecA.getVectorElt(I));
     }
-    case X86::BI__builtin_ia32_cvtpd2ps:
-    case X86::BI__builtin_ia32_cvtpd2ps256: 
-    case X86::BI__builtin_ia32_cvtpd2ps_mask:     
-    case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
 
+    return Success(Elements, E);
+  }
+  case X86::BI__builtin_ia32_cvtpd2ps:
+  case X86::BI__builtin_ia32_cvtpd2ps256:
+  case X86::BI__builtin_ia32_cvtpd2ps_mask:
+  case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
 
-      const auto BuiltinID = E->getBuiltinCallee();
-      bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || 
-                       BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+    const auto BuiltinID = E->getBuiltinCallee();
+    bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+                     BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
 
-      APValue InputValue;
-      if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
-        return false;
-
-      APValue MergeValue;
-      unsigned Mask = 0xFFFFFFFF;
-      bool NeedsMerge = false;
-      if (IsMasked) {
-          APValue MaskValue;
-          if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
-              return false;
-          Mask = MaskValue.getInt().getZExtValue();
-          auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
-          for (unsigned I = 0; I < NumEltsResult; ++I) {
-            if (!((Mask >> I) & 1)) {
-              NeedsMerge = true;
-              break;
-            }
-          }
-          if (NeedsMerge) {
-            if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
-              return false;
-          }
-      }
+    APValue InputValue;
+    if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+      return false;
 
-      unsigned NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
-      unsigned NumEltsInput = InputValue.getVectorLength();
-      SmallVector<APValue, 8> Elements;
+    APValue MergeValue;
+    unsigned Mask = 0xFFFFFFFF;
+    bool NeedsMerge = false;
+    if (IsMasked) {
+      APValue MaskValue;
+      if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+        return false;
+      Mask = MaskValue.getInt().getZExtValue();
+      auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
       for (unsigned I = 0; I < NumEltsResult; ++I) {
-        if (IsMasked && !((Mask >> I) & 1)) {
-            if (!NeedsMerge) {
-              return false;
-            }
-            Elements.push_back(MergeValue.getVectorElt(I));
-            continue; 
+        if (!((Mask >> I) & 1)) {
+          NeedsMerge = true;
+          break;
         }
+      }
+      if (NeedsMerge) {
+        if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+          return false;
+      }
+    }
 
-        if (I >= NumEltsInput) {
-           Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
-           continue;
+    unsigned NumEltsResult =
+        E->getType()->getAs<VectorType>()->getNumElements();
+    unsigned NumEltsInput = InputValue.getVectorLength();
+    SmallVector<APValue, 8> Elements;
+    for (unsigned I = 0; I < NumEltsResult; ++I) {
+      if (IsMasked && !((Mask >> I) & 1)) {
+        if (!NeedsMerge) {
+          return false;
         }
+        Elements.push_back(MergeValue.getVectorElt(I));
+        continue;
+      }
 
-        APValue ResultVal;
-        if (!ConvertDoubleToFloatStrict(Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
-           return false;
-        
-        Elements.push_back(ResultVal);
+      if (I >= NumEltsInput) {
+        Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+        continue;
       }
-      return Success(Elements, E);
+
+      APValue ResultVal;
+      if (!ConvertDoubleToFloatStrict(
+              Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
+        return false;
+
+      Elements.push_back(ResultVal);
     }
+    return Success(Elements, E);
+  }
 
-  
   case X86::BI__builtin_ia32_shufps:
   case X86::BI__builtin_ia32_shufps256:
   case X86::BI__builtin_ia32_shufps512: {

From 75c76719bfe4116e79140388fd52fa47df8da96b Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 06:44:04 +0200
Subject: [PATCH 07/25] removed constexpr form _mm512_undefined_ps

---
 clang/lib/Headers/avx512fintrin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 63031c2fcfd82..85d54bc8eff8c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
   return (__m512)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_undefined_ps(void) {
   return (__m512)__builtin_ia32_undef512();
 }

From 04dabc03228514825a07fa3648e3d8a646cdc33c Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 11:07:51 +0200
Subject: [PATCH 08/25] added constexpr to __builtin_ia32_undef, and updated
 BuiltinsX86.td

---
 clang/include/clang/Basic/BuiltinsX86.td | 37 ++++++++++++++++--------
 clang/lib/AST/ExprConstant.cpp           | 24 +++++++++++++++
 clang/lib/Headers/avx512fintrin.h        |  3 +-
 clang/lib/Headers/xmmintrin.h            |  2 +-
 4 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index df6ec01959bd4..097e980989941 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -21,15 +21,15 @@ def rdtscp : X86Builtin<"unsigned long long int(unsigned int*)">;
 
 // Undefined Values
 def undef128 : X86Builtin<"_Vector<2, double>()"> {
-  let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
+  let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<128>];
 }
 
-def undef256 : X86Builtin<"_Vector<4, double>()"  > {
-  let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>];
+def undef256 : X86Builtin<"_Vector<4, double>()"> {
+  let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>];
 }
 
 def undef512 : X86Builtin<"_Vector<8, double>()"> {
-  let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>];
+  let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<512>];
 }
 
 // FLAGS
@@ -167,13 +167,19 @@ let Features = "sse2", Attributes = [NoThrow] in {
 }
 
 let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+  def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
+  def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
+}
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+  def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
   def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
-  def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
   def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
   def cvtsd2si : X86Builtin<"int(_Vector<2, double>)">;
   def cvttsd2si : X86Builtin<"int(_Vector<2, double>)">;
-  def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
   def cvtps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
   def cvttps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
 }
@@ -463,10 +469,13 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
 }
 
 let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
   def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
   def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
   def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
-  def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
   def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
   def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
   def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
@@ -474,7 +483,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
   def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
   def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
   def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
-
   foreach Op = ["max", "min"] in {
     def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
     def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
@@ -1005,6 +1013,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
 }
 
 let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+  def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
   def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
   def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
   def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
@@ -1017,7 +1029,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
   def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
   def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
   def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
-  def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
   def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
   def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
 }
@@ -1453,8 +1464,11 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
 }
 
 let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
-  def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
   def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
   def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
 }
 
@@ -3287,8 +3301,7 @@ let Features = "avx512bw,avx512vl",
   def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
-  def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
   def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
   def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ee82398d7ac2a..0868237d52404 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12954,6 +12954,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
+  case X86::BI__builtin_ia32_undef128:
+  case X86::BI__builtin_ia32_undef256:
+  case X86::BI__builtin_ia32_undef512: {
+    // Undefined builtins return zero-initialized vectors in constexpr contexts
+    const auto *VTy = E->getType()->castAs<VectorType>();
+    unsigned NumElts = VTy->getNumElements();
+    QualType EltTy = VTy->getElementType();
+    
+    SmallVector<APValue, 16> Elements;
+    Elements.reserve(NumElts);
+    
+    if (EltTy->isIntegerType()) {
+      APSInt Zero(Info.Ctx.getTypeSize(EltTy), EltTy->isUnsignedIntegerType());
+      for (unsigned I = 0; I < NumElts; ++I)
+        Elements.push_back(APValue(Zero));
+    } else {
+      APFloat Zero(Info.Ctx.getFloatTypeSemantics(EltTy));
+      for (unsigned I = 0; I < NumElts; ++I)
+        Elements.push_back(APValue(Zero));
+    }
+    
+    return Success(APValue(Elements.data(), Elements.size()), E);
+  }
+
   case X86::BI__builtin_ia32_cvtsd2ss: {
     APValue VecA, VecB;
     if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 85d54bc8eff8c..9dcc4bea24a37 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,8 +207,7 @@ _mm512_undefined(void)
   return (__m512)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_undefined_ps(void) {
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_undefined_ps(void) {
   return (__m512)__builtin_ia32_undef512();
 }
 
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 72a643948bed6..b6487bed9facb 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -1892,7 +1892,7 @@ _mm_loadr_ps(const float *__p)
 /// This intrinsic has no corresponding instruction.
 ///
 /// \returns A 128-bit vector of [4 x float] containing undefined values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_undefined_ps(void)
 {
   return (__m128)__builtin_ia32_undef128();

From 880b06029a68e75722326ebd62afb153b5724664 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 12:22:01 +0200
Subject: [PATCH 09/25] Removed undef as constexpr and used _mm256_setzero_ps
 instead

---
 clang/include/clang/Basic/BuiltinsX86.td |  6 +++---
 clang/lib/AST/ExprConstant.cpp           | 23 -----------------------
 clang/lib/Headers/avx512fintrin.h        |  4 ++--
 clang/lib/Headers/avxintrin.h            |  2 +-
 clang/lib/Headers/xmmintrin.h            |  2 +-
 5 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 097e980989941..9754f839fc803 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -21,15 +21,15 @@ def rdtscp : X86Builtin<"unsigned long long int(unsigned int*)">;
 
 // Undefined Values
 def undef128 : X86Builtin<"_Vector<2, double>()"> {
-  let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<128>];
+  let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
 }
 
 def undef256 : X86Builtin<"_Vector<4, double>()"> {
-  let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>];
+  let Attributes = [Const, NoThrow, RequiredVectorWidth<256>];
 }
 
 def undef512 : X86Builtin<"_Vector<8, double>()"> {
-  let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<512>];
+  let Attributes = [Const, NoThrow, RequiredVectorWidth<512>];
 }
 
 // FLAGS
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 0868237d52404..c0a719e578332 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12954,29 +12954,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
-  case X86::BI__builtin_ia32_undef128:
-  case X86::BI__builtin_ia32_undef256:
-  case X86::BI__builtin_ia32_undef512: {
-    // Undefined builtins return zero-initialized vectors in constexpr contexts
-    const auto *VTy = E->getType()->castAs<VectorType>();
-    unsigned NumElts = VTy->getNumElements();
-    QualType EltTy = VTy->getElementType();
-    
-    SmallVector<APValue, 16> Elements;
-    Elements.reserve(NumElts);
-    
-    if (EltTy->isIntegerType()) {
-      APSInt Zero(Info.Ctx.getTypeSize(EltTy), EltTy->isUnsignedIntegerType());
-      for (unsigned I = 0; I < NumElts; ++I)
-        Elements.push_back(APValue(Zero));
-    } else {
-      APFloat Zero(Info.Ctx.getFloatTypeSemantics(EltTy));
-      for (unsigned I = 0; I < NumElts; ++I)
-        Elements.push_back(APValue(Zero));
-    }
-    
-    return Success(APValue(Elements.data(), Elements.size()), E);
-  }
 
   case X86::BI__builtin_ia32_cvtsd2ss: {
     APValue VecA, VecB;
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9dcc4bea24a37..edcbdba908522 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
   return (__m512)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_undefined_ps(void) {
+static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) {
   return (__m512)__builtin_ia32_undef512();
 }
 
@@ -3490,7 +3490,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
 static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_cvtpd_ps(__m512d __A) {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-                (__v8sf) _mm256_undefined_ps (),
+                (__v8sf) _mm256_setzero_ps (),
                 (__mmask8) -1,
                 _MM_FROUND_CUR_DIRECTION);
 }
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 605e70307cfc9..126ba30bcca7e 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -3605,7 +3605,7 @@ _mm256_undefined_pd(void)
 /// This intrinsic has no corresponding instruction.
 ///
 /// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_undefined_ps(void) {
   return (__m256)__builtin_ia32_undef256();
 }
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index b6487bed9facb..72a643948bed6 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -1892,7 +1892,7 @@ _mm_loadr_ps(const float *__p)
 /// This intrinsic has no corresponding instruction.
 ///
 /// \returns A 128-bit vector of [4 x float] containing undefined values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_undefined_ps(void)
 {
   return (__m128)__builtin_ia32_undef128();

From 4efe60af5e75b7c51320b66a3bc764a34b757df3 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 12:41:57 +0200
Subject: [PATCH 10/25] Implemented InterpBuiltin cpp implmentaions

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 176 +++++++++++++++++++++++
 1 file changed, 176 insertions(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8496b58105c7a..ad49eb14b911f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -167,6 +167,37 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
   return Result;
 }
 
+// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
+// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
+static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState &S,
+                                const Expr *DiagExpr) {
+  if (Src.isInfinity()) {
+    if (S.diagnosing())
+      S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0;
+    return false;
+  }
+  if (Src.isNaN()) {
+    if (S.diagnosing())
+      S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 1;
+    return false;
+  }
+  APFloat Val = Src;
+  bool LosesInfo = false;
+  APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
+  if (LosesInfo || Val.isDenormal()) {
+    if (S.diagnosing())
+      S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict);
+    return false;
+  }
+  if (Status != APFloat::opOK) {
+    if (S.diagnosing())
+      S.CCEDiag(DiagExpr, diag::note_invalid_subexpr_in_const_expr);
+    return false;
+  }
+  Dst.copy(Val);
+  return true;
+}
+
 static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
                                                   const InterpFrame *Frame,
                                                   const CallExpr *Call) {
@@ -3359,6 +3390,140 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
   pushInteger(S, RetMask, Call->getType());
   return true;
 }
+static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
+                                         const CallExpr *Call) {
+  assert(Call->getNumArgs() == 2);
+
+  const Pointer &B = S.Stk.pop<Pointer>();
+  const Pointer &A = S.Stk.pop<Pointer>();
+  if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B))
+    return false;
+
+  const auto *DstVTy = Call->getType()->castAs<VectorType>();
+  unsigned NumElems = DstVTy->getNumElements();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  // Copy all elements from A to Dst
+  for (unsigned I = 0; I != NumElems; ++I)
+    Dst.elem<Floating>(I) = A.elem<Floating>(I);
+
+  // Convert element 0 from double to float
+  Floating Conv = S.allocFloat(
+      S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
+  APFloat SrcD = B.elem<Floating>(0).getAPFloat();
+  if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+    return false;
+  Dst.elem<Floating>(0) = Conv;
+
+  Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
+                                                      CodePtr OpPC,
+                                                      const CallExpr *Call) {
+  assert(Call->getNumArgs() == 5);
+
+  // Pop in reverse order: rounding, mask, src, b, a
+  APSInt Rounding = popToAPSInt(S, Call->getArg(4)->getType());
+  APSInt MaskInt = popToAPSInt(S, Call->getArg(3)->getType());
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &B = S.Stk.pop<Pointer>();
+  const Pointer &A = S.Stk.pop<Pointer>();
+  if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || !CheckLoad(S, OpPC, Src))
+    return false;
+
+  const auto *DstVTy = Call->getType()->castAs<VectorType>();
+  unsigned NumElems = DstVTy->getNumElements();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  // Copy all elements from A to Dst
+  for (unsigned I = 0; I != NumElems; ++I)
+    Dst.elem<Floating>(I) = A.elem<Floating>(I);
+
+  // If mask bit 0 is set, convert element 0 from double to float; otherwise use Src
+  if (MaskInt.getZExtValue() & 0x1) {
+    Floating Conv = S.allocFloat(
+        S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
+    APFloat SrcD = B.elem<Floating>(0).getAPFloat();
+    if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+      return false;
+    Dst.elem<Floating>(0) = Conv;
+  } else {
+    Dst.elem<Floating>(0) = Src.elem<Floating>(0);
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
+                                          const CallExpr *Call,
+                                          unsigned BuiltinID) {
+  bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+                   BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+  bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+  
+  APSInt MaskVal(1, false);
+  Pointer PassThrough;
+  Pointer SrcPd;
+  APSInt Rounding;
+  
+  if (IsMasked) {
+    // Pop in reverse order
+    if (HasRounding) {
+      // For 512: rounding, mask, passthrough, source
+      Rounding = popToAPSInt(S, Call->getArg(3)->getType());
+      MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
+      PassThrough = S.Stk.pop<Pointer>();
+      SrcPd = S.Stk.pop<Pointer>();
+    } else {
+      // For VL: mask, passthrough, source
+      MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
+      PassThrough = S.Stk.pop<Pointer>();
+      SrcPd = S.Stk.pop<Pointer>();
+    }
+    
+    if (!CheckLoad(S, OpPC, PassThrough))
+      return false;
+  } else {
+    // Pop source only
+    SrcPd = S.Stk.pop<Pointer>();
+  }
+  
+  if (!CheckLoad(S, OpPC, SrcPd))
+    return false;
+
+  const auto *RetVTy = Call->getType()->castAs<VectorType>();
+  unsigned RetElems = RetVTy->getNumElements();
+  unsigned SrcElems = SrcPd.getNumElems();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  // Initialize destination with passthrough or zeros
+  for (unsigned I = 0; I != RetElems; ++I) {
+    if (IsMasked) {
+      Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
+    } else {
+      Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
+    }
+  }
+
+  // Convert double to float for enabled elements (only process source elements that exist)
+  for (unsigned I = 0; I != SrcElems; ++I) {
+    if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0))
+      continue;
+
+    APFloat SrcD = SrcPd.elem<Floating>(I).getAPFloat();
+    Floating Conv = S.allocFloat(
+        S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
+    if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+      return false;
+    Dst.elem<Floating>(I) = Conv;
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
 
 static bool interp__builtin_ia32_shuffle_generic(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
@@ -5169,6 +5334,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_cvtq2mask512:
     return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID);
 
+  case X86::BI__builtin_ia32_cvtsd2ss:
+    return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call);
+
+  case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
+    return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call);
+  case X86::BI__builtin_ia32_cvtpd2ps:
+  case X86::BI__builtin_ia32_cvtpd2ps256:
+  case X86::BI__builtin_ia32_cvtpd2ps_mask:
+  case X86::BI__builtin_ia32_cvtpd2ps512_mask:
+    return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, BuiltinID);
+
   case X86::BI__builtin_ia32_cmpb128_mask:
   case X86::BI__builtin_ia32_cmpw128_mask:
   case X86::BI__builtin_ia32_cmpd128_mask:

From d5084f7beedd37a6cec81558b2c00224dbc5d8d6 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 13:14:03 +0200
Subject: [PATCH 11/25] styled The tests

---
 clang/test/CodeGen/X86/avx-builtins.c         |   2 +
 clang/test/CodeGen/X86/avx512f-builtins.c     |  12 +
 clang/test/CodeGen/X86/avx512vl-builtins.c    |   8 +
 clang/test/CodeGen/X86/sse2-builtins.c        |   4 +
 .../SemaCXX/constexpr-x86-avx-builtins.cpp    |  18 +
 .../constexpr-x86-avx512f-builtins.cpp        | 230 +++++++++
 .../constexpr-x86-avx512vl-builtins.cpp       | 120 +++++
 .../constexpr-x86-intrinsics-pd2ps.cpp        | 479 ------------------
 .../SemaCXX/constexpr-x86-sse2-builtins.cpp   |  79 +++
 9 files changed, 473 insertions(+), 479 deletions(-)
 create mode 100644 clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp
 create mode 100644 clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
 create mode 100644 clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp
 delete mode 100644 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
 create mode 100644 clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp

diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 00bcf9cc1da58..13da4292c5b92 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -968,6 +968,8 @@ __m128 test_mm256_cvtpd_ps(__m256d A) {
   return _mm256_cvtpd_ps(A);
 }
 
+TEST_CONSTEXPR(match_m128(_mm256_cvtpd_ps((__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, -1.0f, +2.0f, +3.5f));
+
 __m256i test_mm256_cvtps_epi32(__m256 A) {
   // CHECK-LABEL: test_mm256_cvtps_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %{{.*}})
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 6401a0e55a83b..499cbd9dee30a 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10615,6 +10615,8 @@ __m256 test_mm512_cvtpd_ps (__m512d __A)
   return _mm512_cvtpd_ps (__A);
 }
 
+TEST_CONSTEXPR(match_m256(_mm512_cvtpd_ps((__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f));
+
 __m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
 {
   // CHECK-LABEL: test_mm512_mask_cvtpd_ps 
@@ -10622,6 +10624,8 @@ __m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
   return _mm512_mask_cvtpd_ps (__W,__U,__A);
 }
 
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps((__m256){ 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f }, 0x05, (__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, 9.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f));
+
 __m512 test_mm512_cvtpd_pslo(__m512d __A)
 {
   // CHECK-LABEL: test_mm512_cvtpd_pslo
@@ -10631,6 +10635,8 @@ __m512 test_mm512_cvtpd_pslo(__m512d __A)
   return _mm512_cvtpd_pslo(__A);
 }
 
+TEST_CONSTEXPR(match_m512(_mm512_cvtpd_pslo((__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+
 __m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
   // CHECK-LABEL: test_mm512_mask_cvtpd_pslo
   // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512
@@ -10639,6 +10645,8 @@ __m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
   return _mm512_mask_cvtpd_pslo(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo((__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }, 0x3, (__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+
 __m256 test_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
 {
   // CHECK-LABEL: test_mm512_maskz_cvtpd_ps 
@@ -11860,12 +11868,16 @@ __m128 test_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
   return _mm_mask_cvtsd_ss(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss((__m128){ 9.0f, 5.0f, 6.0f, 7.0f }, 0x1, (__m128){ 1.0f, 2.0f, 3.0f, 4.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 2.0f, 3.0f, 4.0f));
+
 __m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
   // CHECK-LABEL: test_mm_maskz_cvtsd_ss
   // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round
   return _mm_maskz_cvtsd_ss(__U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x1, (__m128){ 1.0f, 2.0f, 3.0f, 4.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 2.0f, 3.0f, 4.0f));
+
 __m512i test_mm512_setzero_epi32(void)
 {
   // CHECK-LABEL: test_mm512_setzero_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 5f6d8360888f5..013c19ba7a929 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -3999,23 +3999,31 @@ __m128 test_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
   // CHECK: @llvm.x86.avx512.mask.cvtpd2ps
   return _mm_mask_cvtpd_ps(__W,__U,__A); 
 }
+
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps((__m128){ 9.0f, 9.0f, 9.0f, 9.0f }, 0x3, (__m128d){ -1.0, +2.0 }), -1.0f, +2.0f, 9.0f, 9.0f));
 __m128 test_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) {
   // CHECK-LABEL: test_mm_maskz_cvtpd_ps
   // CHECK: @llvm.x86.avx512.mask.cvtpd2ps
   return _mm_maskz_cvtpd_ps(__U,__A); 
 }
+
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, (__m128d){ -1.0, +2.0 }), -1.0f, 0.0f, 0.0f, 0.0f));
 __m128 test_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: test_mm256_mask_cvtpd_ps
   // CHECK: @llvm.x86.avx.cvt.pd2.ps.256
   // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
   return _mm256_mask_cvtpd_ps(__W,__U,__A); 
 }
+
+TEST_CONSTEXPR(match_m128(_mm256_mask_cvtpd_ps((__m128){ 9.0f, 9.0f, 9.0f, 9.0f }, 0xF, (__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, -1.0f, +2.0f, +3.5f));
 __m128 test_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: test_mm256_maskz_cvtpd_ps
   // CHECK: @llvm.x86.avx.cvt.pd2.ps.256
   // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
   return _mm256_maskz_cvtpd_ps(__U,__A); 
 }
+
+TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtpd_ps(0x5, (__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, 0.0f, +2.0f, 0.0f));
 __m128i test_mm_cvtpd_epu32(__m128d __A) {
   // CHECK-LABEL: test_mm_cvtpd_epu32
   // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index ed1ac84b8c4a3..c4975b456ba22 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -573,6 +573,8 @@ __m128 test_mm_cvtpd_ps(__m128d A) {
   return _mm_cvtpd_ps(A);
 }
 
+TEST_CONSTEXPR(match_m128(_mm_cvtpd_ps((__m128d){ -1.0, +2.0 }), -1.0f, +2.0f, 0.0f, 0.0f));
+
 __m128i test_mm_cvtps_epi32(__m128 A) {
   // CHECK-LABEL: test_mm_cvtps_epi32
   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
@@ -614,6 +616,8 @@ __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
   return _mm_cvtsd_ss(A, B);
 }
 
+TEST_CONSTEXPR(match_m128(_mm_cvtsd_ss((__m128){ 9.0f, 5.0f, 6.0f, 7.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 5.0f, 6.0f, 7.0f));
+
 int test_mm_cvtsi128_si32(__m128i A) {
   // CHECK-LABEL: test_mm_cvtsi128_si32
   // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
diff --git a/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp
new file mode 100644
index 0000000000000..724aff3011ded
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+namespace Test_mm256_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+TEST_CONSTEXPR(match_m128(_mm256_cvtpd_ps(a), 0.0f, -1.0f, +2.0f, +3.5f));
+}
+namespace Inexact {
+constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
+}
diff --git a/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
new file mode 100644
index 0000000000000..0d2a82cbbb83c
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
@@ -0,0 +1,230 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx512f -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+namespace Test_mm_mask_cvtsd_ss {
+namespace OK {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss(src, 0x1, a, b), -1.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOff {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss(src, 0x0, a, b), 9.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
+TEST_CONSTEXPR(match_m128(r, 9.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace MaskOnSubnormal {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_sub = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm_maskz_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x1, a, b), -1.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOff {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x0, a, b), 0.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOffInexact {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x0, a, b_inexact), 0.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOnInf {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm512_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_cvtpd_ps(a), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f));
+}
+namespace Inexact {
+constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm512_mask_cvtpd_ps {
+namespace OK {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x05, a), -1.0f, 9.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOffInexact {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, +64.0f, +128.0f));
+}
+namespace MaskOffInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x1F, a_inf), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOffNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x1F, a_nan), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOnInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
+
+namespace Test_mm512_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x81, a), -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, +128.0f));
+}
+namespace MaskOffInexact {
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0b11011111, a_inexact), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, +64.0f, +128.0f));
+}
+namespace MaskOffInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x1F, a_inf), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOffNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x1F, a_nan), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOnInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
+
+namespace Test_mm512_cvtpd_pslo {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m512(_mm512_cvtpd_pslo(a), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+}
+}
+
+namespace Test_mm512_mask_cvtpd_pslo {
+namespace OK {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x3, a), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOffInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x3, a_inf), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOffNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x7, a_nan), -1.0f, +2.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOnInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
diff --git a/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp
new file mode 100644
index 0000000000000..bdce60a357f13
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+namespace Test_mm_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x3, a), -1.0f, +2.0f, 9.0f, 9.0f));
+}
+namespace Partial {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x1, a), -1.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x1, a_inexact), -1.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}}
+}
+}
+
+namespace Test_mm_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, a), -1.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOffInexact {
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, a_inexact), -1.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOnInf {
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}}
+}
+}
+
+namespace Test_mm256_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+TEST_CONSTEXPR(match_m128(_mm256_mask_cvtpd_ps(src, 0xF, a), 0.0f, -1.0f, +2.0f, +3.5f));
+}
+namespace MaskOffInf {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
+
+namespace Test_mm256_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtpd_ps(0x5, a), 0.0f, 0.0f, +2.0f, 0.0f));
+}
+namespace MaskOffInf {
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
deleted file mode 100644
index 4a1e9a9c5ae2c..0000000000000
--- a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
+++ /dev/null
@@ -1,479 +0,0 @@
-// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
-
-#define __MM_MALLOC_H 
-#include <immintrin.h>
-
-namespace Test_mm_cvtsd_ss {
-namespace OK {
-constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, "");
-}
-namespace Inexact {
-constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d b = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
-}
-namespace Inf {
-constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d b = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
-}
-namespace NaN {
-constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d b = { __builtin_nan(""), 0.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
-}
-namespace Subnormal {
-constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d b = { 1e-310, 0.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm_mask_cvtsd_ss {
-namespace OK {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b);
-static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, "");
-}
-namespace MaskOff {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b);
-static_assert(r[0] == 9.0f && r[1] == 2.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
-static_assert(r[0] == 9.0f, "");
-}
-namespace MaskOnInexact {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
-}
-namespace MaskOnInf {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
-}
-namespace MaskOnNaN {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
-}
-namespace MaskOnSubnormal {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_sub = { 1e-310, 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm_maskz_cvtsd_ss {
-namespace OK {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b);
-static_assert(r[0] == -1.0f && r[1] == 2.0f, "");
-}
-namespace MaskOff {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b);
-static_assert(r[0] == 0.0f && r[1] == 2.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b_inexact);
-static_assert(r[0] == 0.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
-}
-namespace MaskOnNaN {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm_cvtpd_ps {
-namespace OK {
-constexpr __m128d a = { -1.0, +2.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
-static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
-}
-namespace Inexact {
-constexpr __m128d a = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}}
-}
-namespace Inf {
-constexpr __m128d a = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@-3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}}
-}
-namespace NaN {
-constexpr __m128d a = { __builtin_nan(""), 0.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@-3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}}
-}
-namespace Subnormal {
-constexpr __m128d a = { 1e-310, 0.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm_mask_cvtpd_ps {
-namespace OK {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a = { -1.0, +2.0 };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x3, a);
-static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
-static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
-}
-namespace Partial {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a = { -1.0, +2.0 };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a);
-static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a_inexact);
-static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
-}
-namespace MaskOnInexact {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}}
-}
-namespace MaskOnInf {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}}
-}
-namespace MaskOnNaN {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}}
-}
-}
-
-namespace Test_mm_maskz_cvtpd_ps {
-namespace OK {
-constexpr __m128d a = { -1.0, +2.0 };
-constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a);
-static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
-static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a_inexact);
-static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
-constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}}
-}
-namespace MaskOnNaN {
-constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
-constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}}
-}
-}
-
-namespace Test_mm256_cvtpd_ps {
-namespace OK {
-constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r = _mm256_cvtpd_ps(a);
-static_assert(r[0] == 0.0f && r[1] == -1.0f, "");
-static_assert(r[2] == +2.0f && r[3] == +3.5f, "");
-}
-namespace Inexact {
-constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 };
-constexpr __m128 r = _mm256_cvtpd_ps(a);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm256_mask_cvtpd_ps {
-namespace OK {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0xF, a);
-static_assert(r[0] == 0.0f && r[1] == -1.0f && r[2] == +2.0f && r[3] == +3.5f, "");
-}
-namespace MaskOffInf {
-// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
-// So even masked-off Inf/NaN values cause errors (architectural limitation)
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
-constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
-// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
-}
-namespace MaskOffNaN {
-// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
-// So even masked-off Inf/NaN values cause errors (architectural limitation)
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
-constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
-// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
-}
-}
-
-namespace Test_mm256_maskz_cvtpd_ps {
-namespace OK {
-constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x5, a);
-static_assert(r[0] == 0.0f && r[1] == 0.0f && r[2] == +2.0f && r[3] == 0.0f, "");
-}
-namespace MaskOffInf {
-// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
-// So even masked-off Inf/NaN values cause errors (architectural limitation)
-constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
-constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avxintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
-// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
-}
-namespace MaskOffNaN {
-// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
-// So even masked-off Inf/NaN values cause errors (architectural limitation)
-constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
-constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avxintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
-// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
-}
-}
-
-namespace Test_mm512_cvtpd_ps {
-namespace OK {
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_cvtpd_ps(a);
-static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
-}
-namespace Inexact {
-constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
-constexpr __m256 r = _mm512_cvtpd_ps(a);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note@-3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm512_mask_cvtpd_ps {
-namespace OK {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x05, a);
-static_assert(r[0] == -1.0f && r[2] == +4.0f, "");
-static_assert(r[1] == 9.0f && r[3] == 9.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact);
-static_assert(r[0] == -1.0f && r[5] == 9.0f && r[6] == 64.0f && r[7] == 128.0f, "");
-}
-namespace MaskOffInf {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_inf);
-static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
-}
-namespace MaskOffNaN {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_nan);
-static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-namespace MaskOnNaN {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-}
-
-namespace Test_mm512_maskz_cvtpd_ps {
-namespace OK {
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x81, a);
-static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
-static_assert(r[1] == 0.0f && r[6] == 0.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0b11011111, a_inexact);
-static_assert(r[0] == -1.0f && r[5] == 0.0f && r[6] == 64.0f && r[7] == 128.0f, "");
-}
-namespace MaskOffInf {
-constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_inf);
-static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
-}
-namespace MaskOffNaN {
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_nan);
-static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-namespace MaskOnNaN {
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-}
-
-namespace Test_mm512_cvtpd_pslo {
-namespace OK {
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_cvtpd_pslo(a);
-static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
-static_assert(r[8] == 0.0f && r[15] == 0.0f, "");
-}
-}
-
-namespace Test_mm512_mask_cvtpd_pslo {
-namespace OK {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
-                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a);
-static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
-static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
-}
-namespace MaskOffInf {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
-                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a_inf);
-static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 9.0f, "");
-}
-namespace MaskOffNaN {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
-                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x7, a_nan);
-static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 4.0f && r[3] == 9.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
-                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-namespace MaskOnNaN {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
-                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
-// expected-error@-1 {{must be initialized by a constant expression}}
-// expected-note@avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note@avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-}
diff --git a/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp
new file mode 100644
index 0000000000000..319a3b02a94f9
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +sse2 -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+namespace Test_mm_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_cvtsd_ss(a, b), -1.0f, 5.0f, 6.0f, 7.0f));
+}
+namespace Inexact {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+TEST_CONSTEXPR(match_m128(_mm_cvtpd_ps(a), -1.0f, +2.0f, 0.0f, 0.0f));
+}
+namespace Inexact {
+constexpr __m128d a = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128d a = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128d a = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128d a = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// expected-note@emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}}
+}
+}

From 51d213d48fcb7d8bf72d835aeda051e94653dc86 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 13:46:38 +0200
Subject: [PATCH 12/25] ran the format commands

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 ++++++++++++++----------
 clang/lib/AST/ExprConstant.cpp           |  1 -
 clang/lib/Headers/avx512fintrin.h        |  7 +++---
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ad49eb14b911f..5e8b8e0e31bb6 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -169,8 +169,8 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
 
 // Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
 // Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
-static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState &S,
-                                const Expr *DiagExpr) {
+static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
+                                       InterpState &S, const Expr *DiagExpr) {
   if (Src.isInfinity()) {
     if (S.diagnosing())
       S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0;
@@ -183,7 +183,8 @@ static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState &
   }
   APFloat Val = Src;
   bool LosesInfo = false;
-  APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
+  APFloat::opStatus Status = Val.convert(
+      APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
   if (LosesInfo || Val.isDenormal()) {
     if (S.diagnosing())
       S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict);
@@ -3391,7 +3392,7 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
   return true;
 }
 static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
-                                         const CallExpr *Call) {
+                                          const CallExpr *Call) {
   assert(Call->getNumArgs() == 2);
 
   const Pointer &B = S.Stk.pop<Pointer>();
@@ -3420,8 +3421,8 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
 }
 
 static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
-                                                      CodePtr OpPC,
-                                                      const CallExpr *Call) {
+                                                     CodePtr OpPC,
+                                                     const CallExpr *Call) {
   assert(Call->getNumArgs() == 5);
 
   // Pop in reverse order: rounding, mask, src, b, a
@@ -3430,7 +3431,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
   const Pointer &Src = S.Stk.pop<Pointer>();
   const Pointer &B = S.Stk.pop<Pointer>();
   const Pointer &A = S.Stk.pop<Pointer>();
-  if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || !CheckLoad(S, OpPC, Src))
+  if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) ||
+      !CheckLoad(S, OpPC, Src))
     return false;
 
   const auto *DstVTy = Call->getType()->castAs<VectorType>();
@@ -3441,7 +3443,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
   for (unsigned I = 0; I != NumElems; ++I)
     Dst.elem<Floating>(I) = A.elem<Floating>(I);
 
-  // If mask bit 0 is set, convert element 0 from double to float; otherwise use Src
+  // If mask bit 0 is set, convert element 0 from double to float; otherwise use
+  // Src
   if (MaskInt.getZExtValue() & 0x1) {
     Floating Conv = S.allocFloat(
         S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
@@ -3463,12 +3466,12 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
   bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
                    BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
   bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
-  
+
   APSInt MaskVal(1, false);
   Pointer PassThrough;
   Pointer SrcPd;
   APSInt Rounding;
-  
+
   if (IsMasked) {
     // Pop in reverse order
     if (HasRounding) {
@@ -3483,14 +3486,14 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
       PassThrough = S.Stk.pop<Pointer>();
       SrcPd = S.Stk.pop<Pointer>();
     }
-    
+
     if (!CheckLoad(S, OpPC, PassThrough))
       return false;
   } else {
     // Pop source only
     SrcPd = S.Stk.pop<Pointer>();
   }
-  
+
   if (!CheckLoad(S, OpPC, SrcPd))
     return false;
 
@@ -3508,7 +3511,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
     }
   }
 
-  // Convert double to float for enabled elements (only process source elements that exist)
+  // Convert double to float for enabled elements (only process source elements
+  // that exist)
   for (unsigned I = 0; I != SrcElems; ++I) {
     if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0))
       continue;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3778b0af80b5a..0a9776cb9ee87 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12951,7 +12951,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
-
   case X86::BI__builtin_ia32_cvtsd2ss: {
     APValue VecA, VecB;
     if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index edcbdba908522..f019fb45e2eea 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3489,10 +3489,9 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_cvtpd_ps(__m512d __A) {
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-                (__v8sf) _mm256_setzero_ps (),
-                (__mmask8) -1,
-                _MM_FROUND_CUR_DIRECTION);
+  return (__m256)__builtin_ia32_cvtpd2ps512_mask(
+      (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR

From 154eea17cd5f9db2de7a6e366cbda917e867d716 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 14:39:09 +0200
Subject: [PATCH 13/25] Fixed The Formates!

---
 clang/lib/Headers/avx512fintrin.h | 8 ++++----
 clang/lib/Headers/avxintrin.h     | 3 +--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index f019fb45e2eea..88dfb2fa29878 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3487,8 +3487,8 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
                                            (__v8sf)_mm256_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_cvtpd_ps(__m512d __A) {
+static __inline__ __m256 
+__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) {
   return (__m256)__builtin_ia32_cvtpd2ps512_mask(
       (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION);
@@ -5374,8 +5374,8 @@ _mm512_kmov (__mmask16 __A)
   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sll_epi32(__m512i __A, __m128i __B) {
+static __inline__ __m512i 
+__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
 }
 
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 126ba30bcca7e..9b45bc3e56bdb 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -3605,8 +3605,7 @@ _mm256_undefined_pd(void)
 /// This intrinsic has no corresponding instruction.
 ///
 /// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_undefined_ps(void) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void) {
   return (__m256)__builtin_ia32_undef256();
 }
 

From c5ecb01aa231e7fefbb09a5a6d90dff60963426a Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 14:48:02 +0200
Subject: [PATCH 14/25] Formatted avx512f header using the projects current
 clang-format

---
 clang/lib/Headers/avx512fintrin.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 88dfb2fa29878..7dbf137d8cac8 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3487,8 +3487,8 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
                                            (__v8sf)_mm256_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m256 
-__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) {
+static __inline__ __m256
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) {
   return (__m256)__builtin_ia32_cvtpd2ps512_mask(
       (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION);
@@ -5374,8 +5374,8 @@ _mm512_kmov (__mmask16 __A)
   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
 #endif
 
-static __inline__ __m512i 
-__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
+static __inline__ __m512i
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
 }
 

From 28a823a970255496b48629ebad12cd313cfc71a7 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <53662962+HamzaHassanain@users.noreply.github.com>
Date: Mon, 1 Dec 2025 17:55:38 +0200
Subject: [PATCH 15/25] Update clang/lib/AST/ByteCode/InterpBuiltin.cpp

Co-authored-by: Timm Baeder <tbaeder@redhat.com>
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5e8b8e0e31bb6..819c3f27239b3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3404,7 +3404,7 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
   unsigned NumElems = DstVTy->getNumElements();
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  // Copy all elements from A to Dst
+  // Copy all elements from A to Dst.
   for (unsigned I = 0; I != NumElems; ++I)
     Dst.elem<Floating>(I) = A.elem<Floating>(I);
 

From b9b71bd174b4628d4c025c67fc6ca1c5a030ae73 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 22:45:01 +0200
Subject: [PATCH 16/25] Did the Reuqested Changes

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 70 +++++++++++-------------
 1 file changed, 32 insertions(+), 38 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5e8b8e0e31bb6..32238c877aad9 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3404,13 +3404,13 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
   unsigned NumElems = DstVTy->getNumElements();
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  // Copy all elements from A to Dst
-  for (unsigned I = 0; I != NumElems; ++I)
+  // Copy all elements except lane 0 (overwritten below) from A to Dst.
+  for (unsigned I = 1; I < NumElems; ++I)
     Dst.elem<Floating>(I) = A.elem<Floating>(I);
 
-  // Convert element 0 from double to float
+  // Convert element 0 from double to float.
   Floating Conv = S.allocFloat(
-      S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
+      S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
   APFloat SrcD = B.elem<Floating>(0).getAPFloat();
   if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
     return false;
@@ -3425,9 +3425,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
                                                      const CallExpr *Call) {
   assert(Call->getNumArgs() == 5);
 
-  // Pop in reverse order: rounding, mask, src, b, a
-  APSInt Rounding = popToAPSInt(S, Call->getArg(4)->getType());
-  APSInt MaskInt = popToAPSInt(S, Call->getArg(3)->getType());
+  APSInt Rounding = popToAPSInt(S, Call->getArg(4));
+  APSInt MaskInt = popToAPSInt(S, Call->getArg(3));
   const Pointer &Src = S.Stk.pop<Pointer>();
   const Pointer &B = S.Stk.pop<Pointer>();
   const Pointer &A = S.Stk.pop<Pointer>();
@@ -3439,17 +3438,17 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
   unsigned NumElems = DstVTy->getNumElements();
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  // Copy all elements from A to Dst
-  for (unsigned I = 0; I != NumElems; ++I)
+  // Copy all elements except lane 0 (overwritten below) from A to Dst.
+  for (unsigned I = 1; I < NumElems; ++I)
     Dst.elem<Floating>(I) = A.elem<Floating>(I);
 
   // If mask bit 0 is set, convert element 0 from double to float; otherwise use
-  // Src
+  // Src.
   if (MaskInt.getZExtValue() & 0x1) {
     Floating Conv = S.allocFloat(
-        S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
-    APFloat SrcD = B.elem<Floating>(0).getAPFloat();
-    if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+        S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
+    APFloat Src = B.elem<Floating>(0).getAPFloat();
+    if (!convertDoubleToFloatStrict(Src, Conv, S, Call))
       return false;
     Dst.elem<Floating>(0) = Conv;
   } else {
@@ -3467,60 +3466,55 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
                    BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
   bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
 
-  APSInt MaskVal(1, false);
+  APSInt MaskVal;
   Pointer PassThrough;
-  Pointer SrcPd;
+  Pointer Src;
   APSInt Rounding;
 
   if (IsMasked) {
-    // Pop in reverse order
+    // Pop in reverse order.
     if (HasRounding) {
-      // For 512: rounding, mask, passthrough, source
-      Rounding = popToAPSInt(S, Call->getArg(3)->getType());
-      MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
+      Rounding = popToAPSInt(S, Call->getArg(3));
+      MaskVal = popToAPSInt(S, Call->getArg(2));
       PassThrough = S.Stk.pop<Pointer>();
-      SrcPd = S.Stk.pop<Pointer>();
+      Src = S.Stk.pop<Pointer>();
     } else {
-      // For VL: mask, passthrough, source
       MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
       PassThrough = S.Stk.pop<Pointer>();
-      SrcPd = S.Stk.pop<Pointer>();
+      Src = S.Stk.pop<Pointer>();
     }
 
     if (!CheckLoad(S, OpPC, PassThrough))
       return false;
   } else {
-    // Pop source only
-    SrcPd = S.Stk.pop<Pointer>();
+    // Pop source only.
+    Src = S.Stk.pop<Pointer>();
   }
 
-  if (!CheckLoad(S, OpPC, SrcPd))
+  if (!CheckLoad(S, OpPC, Src))
     return false;
 
   const auto *RetVTy = Call->getType()->castAs<VectorType>();
   unsigned RetElems = RetVTy->getNumElements();
-  unsigned SrcElems = SrcPd.getNumElems();
+  unsigned SrcElems = Src.getNumElems();
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  // Initialize destination with passthrough or zeros
-  for (unsigned I = 0; I != RetElems; ++I) {
-    if (IsMasked) {
+  // Initialize destination with passthrough or zeros.
+  for (unsigned I = 0; I != RetElems; ++I)
+    if (IsMasked)
       Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
-    } else {
+    else
       Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
-    }
-  }
 
-  // Convert double to float for enabled elements (only process source elements
-  // that exist)
+  // Convert double to float for enabled elements (only process source elements that exist).
   for (unsigned I = 0; I != SrcElems; ++I) {
-    if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0))
+    if (IsMasked && !MaskVal[I])
       continue;
 
-    APFloat SrcD = SrcPd.elem<Floating>(I).getAPFloat();
+    APFloat Src = Src.elem<Floating>(I).getAPFloat();
     Floating Conv = S.allocFloat(
-        S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
-    if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+        S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType()));
+    if (!convertDoubleToFloatStrict(Src, Conv, S, Call))
       return false;
     Dst.elem<Floating>(I) = Conv;
   }

From 21ab33c3ece7a2daba30e7adc4f6e52672bf2e6a Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Mon, 1 Dec 2025 22:45:37 +0200
Subject: [PATCH 17/25] Formated The InterpBuiltin

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 32238c877aad9..89eae9369eb80 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3506,7 +3506,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
     else
       Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
 
-  // Convert double to float for enabled elements (only process source elements that exist).
+  // Convert double to float for enabled elements (only process source elements
+  // that exist).
   for (unsigned I = 0; I != SrcElems; ++I) {
     if (IsMasked && !MaskVal[I])
       continue;

From 4957b30793f4965f3afd24fd96bb0adc4663bb88 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Tue, 2 Dec 2025 11:13:19 +0200
Subject: [PATCH 18/25] fixed a naming confilcts

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 89eae9369eb80..aabf0b8fc4f03 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3411,8 +3411,8 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
   // Convert element 0 from double to float.
   Floating Conv = S.allocFloat(
       S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
-  APFloat SrcD = B.elem<Floating>(0).getAPFloat();
-  if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+  APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
+  if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
     return false;
   Dst.elem<Floating>(0) = Conv;
 
@@ -3447,8 +3447,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
   if (MaskInt.getZExtValue() & 0x1) {
     Floating Conv = S.allocFloat(
         S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
-    APFloat Src = B.elem<Floating>(0).getAPFloat();
-    if (!convertDoubleToFloatStrict(Src, Conv, S, Call))
+    APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
+    if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
       return false;
     Dst.elem<Floating>(0) = Conv;
   } else {
@@ -3512,10 +3512,10 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
     if (IsMasked && !MaskVal[I])
       continue;
 
-    APFloat Src = Src.elem<Floating>(I).getAPFloat();
+    APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
     Floating Conv = S.allocFloat(
         S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType()));
-    if (!convertDoubleToFloatStrict(Src, Conv, S, Call))
+    if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
       return false;
     Dst.elem<Floating>(I) = Conv;
   }

From 8b786f02cb2f512969f9b6965f918447f2dd5f8a Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Tue, 2 Dec 2025 11:25:07 +0200
Subject: [PATCH 19/25] added assertion on getElementType() and
 getASTContext().FloatTy

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index aabf0b8fc4f03..b92454d49bfa8 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3445,6 +3445,10 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
   // If mask bit 0 is set, convert element 0 from double to float; otherwise use
   // Src.
   if (MaskInt.getZExtValue() & 0x1) {
+
+    assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
+           "cvtsd2ss requires float element type in destination vector");
+
     Floating Conv = S.allocFloat(
         S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
     APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
@@ -3506,6 +3510,9 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
     else
       Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
 
+  assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
+          "cvtpd2ps requires float element type in return vector");
+          
   // Convert double to float for enabled elements (only process source elements
   // that exist).
   for (unsigned I = 0; I != SrcElems; ++I) {
@@ -3513,6 +3520,7 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
       continue;
 
     APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
+
     Floating Conv = S.allocFloat(
         S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType()));
     if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))

From 2bab71ecc6aa5a22c129d678dde6e93ef9ec9e41 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Tue, 2 Dec 2025 11:39:33 +0200
Subject: [PATCH 20/25] Ran The formatter Again

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b92454d49bfa8..9a2fe83e98a1e 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3511,8 +3511,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
       Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
 
   assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
-          "cvtpd2ps requires float element type in return vector");
-          
+         "cvtpd2ps requires float element type in return vector");
+
   // Convert double to float for enabled elements (only process source elements
   // that exist).
   for (unsigned I = 0; I != SrcElems; ++I) {

From cc1dadad7570c092019ddbc7d8eb3b308e5cb42e Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Tue, 2 Dec 2025 12:33:52 +0200
Subject: [PATCH 21/25] Did the requested changes

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9a2fe83e98a1e..44c5a66ad3431 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3439,7 +3439,7 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
   // Copy all elements except lane 0 (overwritten below) from A to Dst.
-  for (unsigned I = 1; I < NumElems; ++I)
+  for (unsigned I = 1; I != NumElems; ++I)
     Dst.elem<Floating>(I) = A.elem<Floating>(I);
 
   // If mask bit 0 is set, convert element 0 from double to float; otherwise use
@@ -3483,7 +3483,7 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
       PassThrough = S.Stk.pop<Pointer>();
       Src = S.Stk.pop<Pointer>();
     } else {
-      MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
+      MaskVal = popToAPSInt(S, Call->getArg(2));
       PassThrough = S.Stk.pop<Pointer>();
       Src = S.Stk.pop<Pointer>();
     }
@@ -5346,6 +5346,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
 
   case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
     return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call);
+  
   case X86::BI__builtin_ia32_cvtpd2ps:
   case X86::BI__builtin_ia32_cvtpd2ps256:
   case X86::BI__builtin_ia32_cvtpd2ps_mask:

From b2b68d9166fdcb0af6e2cc7a3cea82e320036ca3 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <53662962+HamzaHassanain@users.noreply.github.com>
Date: Tue, 2 Dec 2025 12:45:36 +0200
Subject: [PATCH 22/25] Fix loop condition in element copy operation

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 44c5a66ad3431..4e85ba020fe07 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3405,7 +3405,7 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
   // Copy all elements except lane 0 (overwritten below) from A to Dst.
-  for (unsigned I = 1; I < NumElems; ++I)
+  for (unsigned I = 1; I != NumElems; ++I)
     Dst.elem<Floating>(I) = A.elem<Floating>(I);
 
   // Convert element 0 from double to float.

From c430491e46b9477d2fa6bfb9bc0c99b1b211f652 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Tue, 2 Dec 2025 16:13:22 +0200
Subject: [PATCH 23/25] Fixed The Whitespace

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 44c5a66ad3431..314b5258f1a4d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -5346,7 +5346,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
 
   case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
     return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call);
-  
+
   case X86::BI__builtin_ia32_cvtpd2ps:
   case X86::BI__builtin_ia32_cvtpd2ps256:
   case X86::BI__builtin_ia32_cvtpd2ps_mask:

From 88488aa890ea5e37b06cf186de4701a46b43764e Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Tue, 2 Dec 2025 19:27:58 +0200
Subject: [PATCH 24/25] Did the required changes

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 84 +++++++++---------------
 1 file changed, 31 insertions(+), 53 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6104e331468a0..438c64e919b82 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3392,47 +3392,28 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
   return true;
 }
 static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
-                                          const CallExpr *Call) {
-  assert(Call->getNumArgs() == 2);
-
-  const Pointer &B = S.Stk.pop<Pointer>();
-  const Pointer &A = S.Stk.pop<Pointer>();
-  if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B))
-    return false;
-
-  const auto *DstVTy = Call->getType()->castAs<VectorType>();
-  unsigned NumElems = DstVTy->getNumElements();
-  const Pointer &Dst = S.Stk.peek<Pointer>();
-
-  // Copy all elements except lane 0 (overwritten below) from A to Dst.
-  for (unsigned I = 1; I != NumElems; ++I)
-    Dst.elem<Floating>(I) = A.elem<Floating>(I);
-
-  // Convert element 0 from double to float.
-  Floating Conv = S.allocFloat(
-      S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
-  APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
-  if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
-    return false;
-  Dst.elem<Floating>(0) = Conv;
-
-  Dst.initializeAllElements();
-  return true;
-}
-
-static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
-                                                     CodePtr OpPC,
-                                                     const CallExpr *Call) {
-  assert(Call->getNumArgs() == 5);
-
-  APSInt Rounding = popToAPSInt(S, Call->getArg(4));
-  APSInt MaskInt = popToAPSInt(S, Call->getArg(3));
-  const Pointer &Src = S.Stk.pop<Pointer>();
-  const Pointer &B = S.Stk.pop<Pointer>();
-  const Pointer &A = S.Stk.pop<Pointer>();
-  if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) ||
-      !CheckLoad(S, OpPC, Src))
-    return false;
+                                          const CallExpr *Call, bool HasMask,
+                                          bool HasRounding) {
+  APSInt Rounding, MaskInt;
+  Pointer Src, B, A;
+
+  if (HasMask) {
+    assert(Call->getNumArgs() == 5);
+    Rounding = popToAPSInt(S, Call->getArg(4));
+    MaskInt = popToAPSInt(S, Call->getArg(3));
+    Src = S.Stk.pop<Pointer>();
+    B = S.Stk.pop<Pointer>();
+    A = S.Stk.pop<Pointer>();
+    if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) ||
+        !CheckLoad(S, OpPC, Src))
+      return false;
+  } else {
+    assert(Call->getNumArgs() == 2);
+    B = S.Stk.pop<Pointer>();
+    A = S.Stk.pop<Pointer>();
+    if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B))
+      return false;
+  }
 
   const auto *DstVTy = Call->getType()->castAs<VectorType>();
   unsigned NumElems = DstVTy->getNumElements();
@@ -3442,10 +3423,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
   for (unsigned I = 1; I != NumElems; ++I)
     Dst.elem<Floating>(I) = A.elem<Floating>(I);
 
-  // If mask bit 0 is set, convert element 0 from double to float; otherwise use
-  // Src.
-  if (MaskInt.getZExtValue() & 0x1) {
-
+  // Convert element 0 from double to float, or use Src if masked off.
+  if (!HasMask || (MaskInt.getZExtValue() & 0x1)) {
     assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
            "cvtsd2ss requires float element type in destination vector");
 
@@ -3464,11 +3443,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
 }
 
 static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
-                                          const CallExpr *Call,
-                                          unsigned BuiltinID) {
-  bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
-                   BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
-  bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+                                          const CallExpr *Call, bool IsMasked,
+                                          bool HasRounding) {
 
   APSInt MaskVal;
   Pointer PassThrough;
@@ -5342,16 +5318,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
     return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID);
 
   case X86::BI__builtin_ia32_cvtsd2ss:
-    return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call);
+    return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false, false);
 
   case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
-    return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call);
+    return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true, true);
 
   case X86::BI__builtin_ia32_cvtpd2ps:
   case X86::BI__builtin_ia32_cvtpd2ps256:
+    return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, false, false);
   case X86::BI__builtin_ia32_cvtpd2ps_mask:
+    return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, false);
   case X86::BI__builtin_ia32_cvtpd2ps512_mask:
-    return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, BuiltinID);
+    return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, true);
 
   case X86::BI__builtin_ia32_cmpb128_mask:
   case X86::BI__builtin_ia32_cmpw128_mask:

From a08303def439deae76e8ff336f2e00b4fcd6bbbb Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067@gmail.com>
Date: Tue, 2 Dec 2025 22:04:24 +0200
Subject: [PATCH 25/25] Did the HasRoundingMask change

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 438c64e919b82..9a301ffcf28ed 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3392,12 +3392,12 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
   return true;
 }
 static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
-                                          const CallExpr *Call, bool HasMask,
-                                          bool HasRounding) {
+                                          const CallExpr *Call,
+                                          bool HasRoundingMask) {
   APSInt Rounding, MaskInt;
   Pointer Src, B, A;
 
-  if (HasMask) {
+  if (HasRoundingMask) {
     assert(Call->getNumArgs() == 5);
     Rounding = popToAPSInt(S, Call->getArg(4));
     MaskInt = popToAPSInt(S, Call->getArg(3));
@@ -3424,7 +3424,7 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
     Dst.elem<Floating>(I) = A.elem<Floating>(I);
 
   // Convert element 0 from double to float, or use Src if masked off.
-  if (!HasMask || (MaskInt.getZExtValue() & 0x1)) {
+  if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) {
     assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
            "cvtsd2ss requires float element type in destination vector");
 
@@ -5318,10 +5318,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
     return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID);
 
   case X86::BI__builtin_ia32_cvtsd2ss:
-    return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false, false);
+    return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false);
 
   case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
-    return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true, true);
+    return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true);
 
   case X86::BI__builtin_ia32_cvtpd2ps:
   case X86::BI__builtin_ia32_cvtpd2ps256: