Skip to content

Commit

Permalink
Merge pull request #48 from howjmay/movemask
Browse files Browse the repository at this point in the history
feat: Add _mm_movemask*
  • Loading branch information
howjmay authored Jan 20, 2024
2 parents 9a8610f + 06a6bd8 commit 8552de4
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 75 deletions.
30 changes: 25 additions & 5 deletions sse2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -2004,13 +2004,33 @@ FORCE_INLINE __m128 _mm_movelh_ps(__m128 a, __m128 b) {
return vreinterpretq_f64_m128(__riscv_vslideup_vx_f64m1(_a, _b, 1, 2));
}

// FORCE_INLINE int _mm_movemask_epi8 (__m128i a) {}
FORCE_INLINE int _mm_movemask_epi8(__m128i a) {
vint8m1_t _a = vreinterpretq_m128i_i8(a);
vuint16m1_t nonzeros =
__riscv_vreinterpret_v_b8_u16m1(__riscv_vmslt_vx_i8m1_b8(_a, 0, 16));
return (int)__riscv_vmv_x_s_u16m1_u16(nonzeros);
}

// FORCE_INLINE int _mm_movemask_pd (__m128d a) {}
FORCE_INLINE int _mm_movemask_pd(__m128d a) {
vint64m1_t _a = vreinterpretq_m128d_i64(a);
vuint8m1_t nonzeros =
__riscv_vreinterpret_v_b64_u8m1(__riscv_vmslt_vx_i64m1_b64(_a, 0, 2));
return (int)(__riscv_vmv_x_s_u8m1_u8(nonzeros) & 0x3);
}

// FORCE_INLINE int _mm_movemask_pi8 (__m64 a) {}
FORCE_INLINE int _mm_movemask_pi8(__m64 a) {
vint8m1_t _a = vreinterpretq_m128i_i8(a);
vuint8m1_t nonzeros =
__riscv_vreinterpret_v_b8_u8m1(__riscv_vmslt_vx_i8m1_b8(_a, 0, 8));
return (int)__riscv_vmv_x_s_u8m1_u8(nonzeros);
}

// FORCE_INLINE int _mm_movemask_ps (__m128 a) {}
FORCE_INLINE int _mm_movemask_ps(__m128 a) {
vint32m1_t _a = vreinterpretq_m128_i32(a);
vuint8m1_t nonzeros =
__riscv_vreinterpret_v_b32_u8m1(__riscv_vmslt_vx_i32m1_b32(_a, 0, 4));
return (int)(__riscv_vmv_x_s_u8m1_u8(nonzeros) & 0xf);
}

// FORCE_INLINE __m64 _mm_movepi64_pi64 (__m128i a) {}

Expand Down Expand Up @@ -2081,7 +2101,7 @@ FORCE_INLINE __m64 _m_pminsw(__m64 a, __m64 b) { return _mm_min_pi16(a, b); }

FORCE_INLINE __m64 _m_pminub(__m64 a, __m64 b) { return _mm_min_pu8(a, b); }

// FORCE_INLINE int _m_pmovmskb (__m64 a) {}
FORCE_INLINE int _m_pmovmskb(__m64 a) { return _mm_movemask_pi8(a); }

// FORCE_INLINE __m64 _m_pmulhuw (__m64 a, __m64 b) {}

Expand Down
150 changes: 80 additions & 70 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2556,49 +2556,49 @@ result_t test_mm_movelh_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
}

result_t test_mm_movemask_pi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
// const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
// unsigned int _c = 0;
// for (int i = 0; i < 8; i++) {
// if (_a[i] & 0x80) {
// _c |= (1 << i);
// }
// }
//
// const __m64 *a = (const __m64 *)_a;
// int c = _mm_movemask_pi8(*a);
//
// ASSERT_RETURN((unsigned int)c == _c);
// return TEST_SUCCESS;
// #else
#ifdef ENABLE_TEST_ALL
const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
unsigned int _c = 0;
for (int i = 0; i < 8; i++) {
if (_a[i] & 0x80) {
_c |= (1 << i);
}
}

const __m64 *a = (const __m64 *)_a;
int c = _mm_movemask_pi8(*a);

ASSERT_RETURN((unsigned int)c == _c);
return TEST_SUCCESS;
#else
return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
#endif // ENABLE_TEST_ALL
}

result_t test_mm_movemask_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
// const float *p = impl.test_cases_float_pointer1;
// int ret = 0;
//
// const uint32_t *ip = (const uint32_t *)p;
// if (ip[0] & 0x80000000) {
// ret |= 1;
// }
// if (ip[1] & 0x80000000) {
// ret |= 2;
// }
// if (ip[2] & 0x80000000) {
// ret |= 4;
// }
// if (ip[3] & 0x80000000) {
// ret |= 8;
// }
// __m128 a = load_m128(p);
// int val = _mm_movemask_ps(a);
// return val == ret ? TEST_SUCCESS : TEST_FAIL;
// #else
#ifdef ENABLE_TEST_ALL
const float *p = impl.test_cases_float_pointer1;
int ret = 0;

const uint32_t *ip = (const uint32_t *)p;
if (ip[0] & 0x80000000) {
ret |= 1;
}
if (ip[1] & 0x80000000) {
ret |= 2;
}
if (ip[2] & 0x80000000) {
ret |= 4;
}
if (ip[3] & 0x80000000) {
ret |= 8;
}
__m128 a = load_m128(p);
int val = _mm_movemask_ps(a);
return val == ret ? TEST_SUCCESS : TEST_FAIL;
#else
return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
#endif // ENABLE_TEST_ALL
}

result_t test_mm_mul_ps(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
Expand Down Expand Up @@ -2825,7 +2825,18 @@ result_t test_m_pminub(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {

result_t test_m_pmovmskb(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
// return test_mm_movemask_pi8(impl, iter);
const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1;
unsigned int _c = 0;
for (int i = 0; i < 8; i++) {
if (_a[i] & 0x80) {
_c |= (1 << i);
}
}

const __m64 *a = (const __m64 *)_a;
int c = _m_pmovmskb(*a);

ASSERT_RETURN((unsigned int)c == _c);
// #else
return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
Expand Down Expand Up @@ -6035,42 +6046,41 @@ result_t test_mm_move_sd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
}

result_t test_mm_movemask_epi8(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
// const int32_t *_a = impl.test_cases_int_pointer1;
// __m128i a = load_m128i(_a);
//
// const uint8_t *ip = (const uint8_t *)_a;
// int ret = 0;
// uint32_t mask = 1;
// for (uint32_t i = 0; i < 16; i++) {
// if (ip[i] & 0x80) {
// ret |= mask;
// }
// mask = mask << 1;
// }
// int test = _mm_movemask_epi8(a);
// ASSERT_RETURN(test == ret);
// return TEST_SUCCESS;
// #else
#ifdef ENABLE_TEST_ALL
const int32_t *_a = impl.test_cases_int_pointer1;
__m128i a = load_m128i(_a);

const uint8_t *_a_u8 = (const uint8_t *)_a;
int _c = 0;
uint32_t mask = 1;
for (uint32_t i = 0; i < 16; i++) {
if (_a_u8[i] & 0x80) {
_c |= mask;
}
mask = mask << 1;
}
int c = _mm_movemask_epi8(a);
ASSERT_RETURN(c == _c);
return TEST_SUCCESS;
#else
return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
#endif // ENABLE_TEST_ALL
}

result_t test_mm_movemask_pd(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
// const double *_a = (const double *)impl.test_cases_float_pointer1;
// unsigned int _c = 0;
// _c |= ((*(const uint64_t *)_a) >> 63) & 0x1;
// _c |= (((*(const uint64_t *)(_a + 1)) >> 62) & 0x2);
//
// __m128d a = load_m128d(_a);
// int c = _mm_movemask_pd(a);
//
// ASSERT_RETURN((unsigned int)c == _c);
// return TEST_SUCCESS;
// #else
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
unsigned int _c = 0;
_c |= ((*(const uint64_t *)_a) >> 63) & 0x1;
_c |= (((*(const uint64_t *)(_a + 1)) >> 62) & 0x2);

__m128d a = load_m128d(_a);
int c = _mm_movemask_pd(a);
ASSERT_RETURN((unsigned int)c == _c);
return TEST_SUCCESS;
#else
return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
#endif // ENABLE_TEST_ALL
}

result_t test_mm_movepi64_pi64(const SSE2RVV_TEST_IMPL &impl, uint32_t iter) {
Expand Down

0 comments on commit 8552de4

Please sign in to comment.