@@ -1542,6 +1542,49 @@ template <typename T> T waveMultiPrefixProduct(T A, UINT) {
15421542 return A * A;
15431543}
15441544
1545+ template <typename T> struct Op <OpType::WaveMatch, T, 1 > : StrictValidation {};
1546+
1547+ template <typename T> struct ExpectedBuilder <OpType::WaveMatch, T> {
1548+ static std::vector<UINT> buildExpected (Op<OpType::WaveMatch, T, 1 > &,
1549+ const InputSets<T> &,
1550+ const UINT WaveSize) {
1551+ // For this test, the shader arranges it so that lane 0 is different from
1552+ // all the other lanes. Besides that all other lines write their result of
1553+ // WaveMatch as well.
1554+
1555+ std::vector<UINT> Expected;
1556+ Expected.assign (WaveSize * 4 , 0 );
1557+
1558+ const UINT LowWaves = std::min (64U , WaveSize);
1559+ const UINT HighWaves = WaveSize - LowWaves;
1560+
1561+ const uint64_t LowWaveMask =
1562+ (LowWaves < 64 ) ? (1ULL << LowWaves) - 1 : ~0ULL ;
1563+
1564+ const uint64_t HighWaveMask =
1565+ (HighWaves < 64 ) ? (1ULL << HighWaves) - 1 : ~0ULL ;
1566+
1567+ const uint64_t LowExpected = ~1ULL & LowWaveMask;
1568+ const uint64_t HighExpected = ~0ULL & HighWaveMask;
1569+
1570+ Expected[0 ] = 1 ;
1571+ Expected[1 ] = 0 ;
1572+ Expected[2 ] = 0 ;
1573+ Expected[3 ] = 0 ;
1574+
1575+ // all lanes other than the first one have the same result
1576+ for (UINT I = 1 ; I < WaveSize; ++I) {
1577+ const UINT Index = I * 4 ;
1578+ Expected[Index] = static_cast <UINT>(LowExpected);
1579+ Expected[Index + 1 ] = static_cast <UINT>(LowExpected >> 32 );
1580+ Expected[Index + 2 ] = static_cast <UINT>(HighExpected);
1581+ Expected[Index + 3 ] = static_cast <UINT>(HighExpected >> 32 );
1582+ }
1583+
1584+ return Expected;
1585+ }
1586+ };
1587+
15451588#undef WAVE_OP
15461589
15471590//
@@ -2461,6 +2504,7 @@ class DxilConf_SM69_Vectorized {
24612504 HLK_WAVEOP_TEST (WaveActiveAllEqual, HLSLBool_t);
24622505 HLK_WAVEOP_TEST (WaveReadLaneAt, HLSLBool_t);
24632506 HLK_WAVEOP_TEST (WaveReadLaneFirst, HLSLBool_t);
2507+ HLK_WAVEOP_TEST (WaveMatch, HLSLBool_t);
24642508
24652509 HLK_WAVEOP_TEST (WaveActiveSum, int16_t );
24662510 HLK_WAVEOP_TEST (WaveActiveMin, int16_t );
@@ -2476,6 +2520,7 @@ class DxilConf_SM69_Vectorized {
24762520 HLK_WAVEOP_TEST (WaveMultiPrefixBitAnd, int16_t );
24772521 HLK_WAVEOP_TEST (WaveMultiPrefixBitOr, int16_t );
24782522 HLK_WAVEOP_TEST (WaveMultiPrefixBitXor, int16_t );
2523+ HLK_WAVEOP_TEST (WaveMatch, int16_t );
24792524 HLK_WAVEOP_TEST (WaveActiveSum, int32_t );
24802525 HLK_WAVEOP_TEST (WaveActiveMin, int32_t );
24812526 HLK_WAVEOP_TEST (WaveActiveMax, int32_t );
@@ -2490,6 +2535,7 @@ class DxilConf_SM69_Vectorized {
24902535 HLK_WAVEOP_TEST (WaveMultiPrefixBitAnd, int32_t );
24912536 HLK_WAVEOP_TEST (WaveMultiPrefixBitOr, int32_t );
24922537 HLK_WAVEOP_TEST (WaveMultiPrefixBitXor, int32_t );
2538+ HLK_WAVEOP_TEST (WaveMatch, int32_t );
24932539 HLK_WAVEOP_TEST (WaveActiveSum, int64_t );
24942540 HLK_WAVEOP_TEST (WaveActiveMin, int64_t );
24952541 HLK_WAVEOP_TEST (WaveActiveMax, int64_t );
@@ -2504,6 +2550,7 @@ class DxilConf_SM69_Vectorized {
25042550 HLK_WAVEOP_TEST (WaveMultiPrefixBitAnd, int64_t );
25052551 HLK_WAVEOP_TEST (WaveMultiPrefixBitOr, int64_t );
25062552 HLK_WAVEOP_TEST (WaveMultiPrefixBitXor, int64_t );
2553+ HLK_WAVEOP_TEST (WaveMatch, int64_t );
25072554
25082555 // Note: WaveActiveBit* ops don't support uint16_t in HLSL
25092556 // But the WaveMultiPrefixBit ops support all int and uint types
@@ -2521,6 +2568,7 @@ class DxilConf_SM69_Vectorized {
25212568 HLK_WAVEOP_TEST (WaveMultiPrefixBitAnd, uint16_t );
25222569 HLK_WAVEOP_TEST (WaveMultiPrefixBitOr, uint16_t );
25232570 HLK_WAVEOP_TEST (WaveMultiPrefixBitXor, uint16_t );
2571+ HLK_WAVEOP_TEST (WaveMatch, uint16_t );
25242572 HLK_WAVEOP_TEST (WaveActiveSum, uint32_t );
25252573 HLK_WAVEOP_TEST (WaveActiveMin, uint32_t );
25262574 HLK_WAVEOP_TEST (WaveActiveMax, uint32_t );
@@ -2538,6 +2586,7 @@ class DxilConf_SM69_Vectorized {
25382586 HLK_WAVEOP_TEST (WaveMultiPrefixBitAnd, uint32_t );
25392587 HLK_WAVEOP_TEST (WaveMultiPrefixBitOr, uint32_t );
25402588 HLK_WAVEOP_TEST (WaveMultiPrefixBitXor, uint32_t );
2589+ HLK_WAVEOP_TEST (WaveMatch, uint32_t );
25412590 HLK_WAVEOP_TEST (WaveActiveSum, uint64_t );
25422591 HLK_WAVEOP_TEST (WaveActiveMin, uint64_t );
25432592 HLK_WAVEOP_TEST (WaveActiveMax, uint64_t );
@@ -2555,6 +2604,7 @@ class DxilConf_SM69_Vectorized {
25552604 HLK_WAVEOP_TEST (WaveMultiPrefixBitAnd, uint64_t );
25562605 HLK_WAVEOP_TEST (WaveMultiPrefixBitOr, uint64_t );
25572606 HLK_WAVEOP_TEST (WaveMultiPrefixBitXor, uint64_t );
2607+ HLK_WAVEOP_TEST (WaveMatch, uint64_t );
25582608
25592609 HLK_WAVEOP_TEST (WaveActiveSum, HLSLHalf_t);
25602610 HLK_WAVEOP_TEST (WaveActiveMin, HLSLHalf_t);
@@ -2567,6 +2617,7 @@ class DxilConf_SM69_Vectorized {
25672617 HLK_WAVEOP_TEST (WavePrefixProduct, HLSLHalf_t);
25682618 HLK_WAVEOP_TEST (WaveMultiPrefixSum, HLSLHalf_t);
25692619 HLK_WAVEOP_TEST (WaveMultiPrefixProduct, HLSLHalf_t);
2620+ HLK_WAVEOP_TEST (WaveMatch, HLSLHalf_t);
25702621 HLK_WAVEOP_TEST (WaveActiveSum, float );
25712622 HLK_WAVEOP_TEST (WaveActiveMin, float );
25722623 HLK_WAVEOP_TEST (WaveActiveMax, float );
@@ -2578,6 +2629,7 @@ class DxilConf_SM69_Vectorized {
25782629 HLK_WAVEOP_TEST (WavePrefixProduct, float );
25792630 HLK_WAVEOP_TEST (WaveMultiPrefixSum, float );
25802631 HLK_WAVEOP_TEST (WaveMultiPrefixProduct, float );
2632+ HLK_WAVEOP_TEST (WaveMatch, float );
25812633 HLK_WAVEOP_TEST (WaveActiveSum, double );
25822634 HLK_WAVEOP_TEST (WaveActiveMin, double );
25832635 HLK_WAVEOP_TEST (WaveActiveMax, double );
@@ -2589,6 +2641,7 @@ class DxilConf_SM69_Vectorized {
25892641 HLK_WAVEOP_TEST (WavePrefixProduct, double );
25902642 HLK_WAVEOP_TEST (WaveMultiPrefixSum, double );
25912643 HLK_WAVEOP_TEST (WaveMultiPrefixProduct, double );
2644+ HLK_WAVEOP_TEST (WaveMatch, double );
25922645
25932646private:
25942647 bool Initialized = false ;
0 commit comments