Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add vqrshl[b|h|s|d]_[s8|s16|s32|s64|u8|u16|u32|u64] #493

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -5650,21 +5650,45 @@ FORCE_INLINE uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b) {

// FORCE_INLINE uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b);

// FORCE_INLINE int8_t vqrshlb_s8(int8_t a, int8_t b);
FORCE_INLINE int8_t vqrshlb_s8(int8_t a, int8_t b) {
if (b < 0) {
return ((int16_t)a + (1 << (-b - 1))) >> (-b);
} else {
return vqshlb_s8(a, b);
}
}

// FORCE_INLINE int16_t vqrshlh_s16(int16_t a, int16_t b);
FORCE_INLINE int16_t vqrshlh_s16(int16_t a, int16_t b) {
if (b < 0) {
return ((int32_t)a + (1 << (-b - 1))) >> (-b);
} else {
return vqshlh_s16(a, b);
}
}

// FORCE_INLINE int32_t vqrshls_s32(int32_t a, int32_t b);
FORCE_INLINE int32_t vqrshls_s32(int32_t a, int32_t b) {
if (b < 0) {
return ((int64_t)a + (1 << (-b - 1))) >> (-b);
} else {
return vqshls_s32(a, b);
}
}

// FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b);
FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b) {
if (b < 0) {
return ((int64_t)a + (1 << (-b - 1))) >> (-b);
} else {
return vqshld_s64(a, b);
}
}

// FORCE_INLINE uint8_t vqrshlb_u8(uint8_t a, int8_t b);
FORCE_INLINE uint8_t vqrshlb_u8(uint8_t a, int8_t b) {}

// FORCE_INLINE uint16_t vqrshlh_u16(uint16_t a, int16_t b);
FORCE_INLINE uint16_t vqrshlh_u16(uint16_t a, int16_t b) {}

// FORCE_INLINE uint32_t vqrshls_u32(uint32_t a, int32_t b);
FORCE_INLINE uint32_t vqrshls_u32(uint32_t a, int32_t b) {}

// FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b);
FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b) {}

FORCE_INLINE int8x8_t vshr_n_s8(int8x8_t a, const int b) {
const int imm = b - (b >> 3);
Expand Down
147 changes: 140 additions & 7 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18687,7 +18687,44 @@ result_t test_vqrshl_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#endif // ENABLE_TEST_ALL
}

result_t test_vqrshl_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
#include <stdio.h>

result_t test_vqrshl_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
const int64_t *_a = (int64_t *)impl.test_cases_int_pointer1;
int64_t *_b = (int64_t *)impl.test_cases_int_pointer2;
for (int i = 0; i < 1; i++) {
_b[i] = _b[i] % 64;
}
int64_t _c[1];
for (int i = 0; i < 1; i++) {
if (_b[i] < 0) {
printf("nono\n");
_c[i] = ((int64_t)_a[i] + (1 << (-_b[i] - 1))) >> (-_b[i]);
} else {
printf("YEYWYWY\n");
if (_b[i]< 0) {
_c[i] = _a[i] >> -_b[i];
}
if ((INT64_MAX >> _b[i]) < _a[i]) {
_c[i] = INT64_MAX;
} else {
if ((INT64_MIN >> _b[i]) > _a[i]) {
_c[i] = INT64_MIN;
} else {
_c[i] = _a[i] << _b[i];
}
}
}
}
int64x1_t a = vld1_s64(_a);
int64x1_t b = vld1_s64(_b);
int64x1_t c = vqrshl_s64(a, b);
return validate_int64(c, _c[0]);
// #else
// return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
}

result_t test_vqrshl_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
Expand Down Expand Up @@ -18913,19 +18950,115 @@ result_t test_vqrshlq_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {

result_t test_vqrshlq_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

result_t test_vqrshlb_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqrshlb_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
const int8_t *_a = (int8_t *)impl.test_cases_int_pointer1;
int8_t *_b = (int8_t *)impl.test_cases_int_pointer2;
_b[0] = _b[0] % 8;
int8_t _c, c;
if (_b[0] < 0) {
_c = sat_rshr(_a[0], _b[0]);
} else {
_c = saturate_int8((int16_t)_a[0] << _b[0]);
}
c = vqrshlb_s8(_a[0], _b[0]);
return c == _c ? TEST_SUCCESS : TEST_FAIL;
// #else
// return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
}

result_t test_vqrshlh_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqrshlh_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
const int16_t *_a = (int16_t *)impl.test_cases_int_pointer1;
int16_t *_b = (int16_t *)impl.test_cases_int_pointer2;
_b[0] = _b[0] % 16;
int16_t _c, c;
if (_b[0] < 0) {
_c = sat_rshr(_a[0], _b[0]);
} else {
_c = saturate_int16((int32_t)_a[0] << _b[0]);
}
c = vqrshlh_s16(_a[0], _b[0]);
return c == _c ? TEST_SUCCESS : TEST_FAIL;
// #else
// return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
}

result_t test_vqrshls_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqrshls_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
int32_t *_b = (int32_t *)impl.test_cases_int_pointer2;
_b[0] = _b[0] % 32;
int32_t _c, c;
if (_b[0] < 0) {
_c = sat_rshr(_a[0], _b[0]);
} else {
_c = saturate_int32((int64_t)_a[0] << _b[0]);
}
c = vqrshls_s32(_a[0], _b[0]);
return c == _c ? TEST_SUCCESS : TEST_FAIL;
// #else
// return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
}

result_t test_vqrshld_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

result_t test_vqrshlb_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqrshlb_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
const uint8_t *_a = (uint8_t *)impl.test_cases_int_pointer1;
int8_t *_b = (int8_t *)impl.test_cases_int_pointer2;
_b[0] = _b[0] % 8;
uint8_t _c, c;
if (_b[0] < 0) {
_c = sat_rshr(_a[0], _b[0]);
} else {
_c = saturate_uint8((uint16_t)_a[0] << _b[0]);
}
c = vqrshlb_u8(_a[0], _b[0]);
return c == _c ? TEST_SUCCESS : TEST_FAIL;
// #else
// return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
}

result_t test_vqrshlh_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqrshlh_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
const uint16_t *_a = (uint16_t *)impl.test_cases_int_pointer1;
int16_t *_b = (int16_t *)impl.test_cases_int_pointer2;
_b[0] = _b[0] % 16;
uint16_t _c, c;
if (_b[0] < 0) {
_c = sat_rshr(_a[0], _b[0]);
} else {
_c = saturate_uint16((uint32_t)_a[0] << _b[0]);
}
c = vqrshlh_u16(_a[0], _b[0]);
return c == _c ? TEST_SUCCESS : TEST_FAIL;
// #else
// return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
}

result_t test_vqrshls_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqrshls_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
// #ifdef ENABLE_TEST_ALL
const uint32_t *_a = (uint32_t *)impl.test_cases_int_pointer1;
int32_t *_b = (int32_t *)impl.test_cases_int_pointer2;
_b[0] = _b[0] % 32;
uint32_t _c, c;
if (_b[0] < 0) {
_c = sat_rshr(_a[0], _b[0]);
} else {
_c = saturate_uint32((uint64_t)_a[0] << _b[0]);
}
c = vqrshls_u32(_a[0], _b[0]);
return c == _c ? TEST_SUCCESS : TEST_FAIL;
// #else
// return TEST_UNIMPL;
// #endif // ENABLE_TEST_ALL
}

result_t test_vqrshld_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

Expand Down
16 changes: 8 additions & 8 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1047,14 +1047,14 @@
_(vqrshlq_u16) \
_(vqrshlq_u32) \
_(vqrshlq_u64) \
/*_(vqrshlb_s8) */ \
/*_(vqrshlh_s16) */ \
/*_(vqrshls_s32) */ \
/*_(vqrshld_s64) */ \
/*_(vqrshlb_u8) */ \
/*_(vqrshlh_u16) */ \
/*_(vqrshls_u32) */ \
/*_(vqrshld_u64) */ \
_(vqrshlb_s8) \
_(vqrshlh_s16) \
_(vqrshls_s32) \
_(vqrshld_s64) \
_(vqrshlb_u8) \
_(vqrshlh_u16) \
_(vqrshls_u32) \
_(vqrshld_u64) \
_(vshr_n_s8) \
_(vshr_n_s16) \
_(vshr_n_s32) \
Expand Down
Loading