diff --git a/src/vatensor/bitwise.cpp b/src/vatensor/bitwise.cpp index e9e46b1..2168c91 100644 --- a/src/vatensor/bitwise.cpp +++ b/src/vatensor/bitwise.cpp @@ -11,8 +11,8 @@ using namespace va; -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -void bitwise_and(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void bitwise_and(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::bitwise_and, promote::common_int_in_same_out @@ -24,27 +24,17 @@ void bitwise_and(VStoreAllocator& allocator, VArrayTarget target, const VData& a b ); } -#endif void va::bitwise_and(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION OPTIMIZE_COMMUTATIVE(::bitwise_and, allocator, target, a, b); #endif - va::xoperation_inplace< - Feature::bitwise_and, - promote::common_int_in_same_out - >( - XFunction {}, - allocator, - target, - a, - b - ); + ::bitwise_and(allocator, target, a, b); } -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -void bitwise_or(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void bitwise_or(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::bitwise_or, promote::common_int_in_same_out @@ -56,27 +46,17 @@ void bitwise_or(VStoreAllocator& allocator, VArrayTarget target, const VData& a, b ); } -#endif void va::bitwise_or(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION OPTIMIZE_COMMUTATIVE(::bitwise_or, allocator, target, a, b); #endif - va::xoperation_inplace< - Feature::bitwise_or, - promote::common_int_in_same_out - >( - XFunction {}, - allocator, - target, - a, - b - ); + ::bitwise_or(allocator, target, a, b); } -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -void bitwise_xor(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void bitwise_xor(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::bitwise_xor, promote::common_int_in_same_out @@ -88,23 +68,13 @@ void bitwise_xor(VStoreAllocator& allocator, VArrayTarget target, const VData& a b ); } -#endif void va::bitwise_xor(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION OPTIMIZE_COMMUTATIVE(::bitwise_xor, allocator, target, a, b); #endif - va::xoperation_inplace< - Feature::bitwise_xor, - promote::common_int_in_same_out - >( - XFunction {}, - allocator, - target, - a, - b - ); + ::bitwise_xor(allocator, target, a, b); } void va::bitwise_not(VStoreAllocator& allocator, VArrayTarget target, const VData& a) { @@ -119,8 +89,8 @@ void va::bitwise_not(VStoreAllocator& allocator, VArrayTarget target, const VDat ); } -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -void bitwise_left_shift(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void bitwise_left_shift(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::bitwise_left_shift, promote::left_of_ints_in_same_out @@ -132,27 +102,17 @@ void bitwise_left_shift(VStoreAllocator& allocator, VArrayTarget target, const V b ); } -#endif void va::bitwise_left_shift(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - OPTIMIZE_COMMUTATIVE(::bitwise_left_shift, allocator, target, a, b); + OPTIMIZE_NONCOMMUTATIVE(::bitwise_left_shift, allocator, target, a, b); #endif - va::xoperation_inplace< - Feature::bitwise_left_shift, - promote::left_of_ints_in_same_out - >( - XFunction {}, - allocator, - target, - a, - b - ); + ::bitwise_left_shift(allocator, target, a, b); } -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -void bitwise_right_shift(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void bitwise_right_shift(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::bitwise_right_shift, promote::left_of_ints_in_same_out @@ -164,21 +124,11 @@ void bitwise_right_shift(VStoreAllocator& allocator, VArrayTarget target, const b ); } -#endif void va::bitwise_right_shift(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - OPTIMIZE_COMMUTATIVE(::bitwise_right_shift, allocator, target, a, b); + OPTIMIZE_NONCOMMUTATIVE(::bitwise_right_shift, allocator, target, a, b); #endif - va::xoperation_inplace< - Feature::bitwise_right_shift, - promote::left_of_ints_in_same_out - >( - XFunction {}, - allocator, - target, - a, - b - ); + ::bitwise_right_shift(allocator, target, a, b); } diff --git a/src/vatensor/comparison.cpp b/src/vatensor/comparison.cpp index 9257a8e..791433e 100644 --- a/src/vatensor/comparison.cpp +++ b/src/vatensor/comparison.cpp @@ -11,11 +11,8 @@ using namespace va; -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -#if !(defined(__aarch64__) || defined(_M_ARM64) || defined(__ARM_NEON) || defined(_M_ARM)) -// FIXME NEON xtensor / xsimd has a compile-time bug, see -// https://github.com/xtensor-stack/xtensor/issues/2733 -void equal_to(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void equal_to(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::equal_to, promote::common_in_nat_out @@ -27,34 +24,23 @@ void equal_to(VStoreAllocator& allocator, VArrayTarget target, const VData& a, c b ); } -#endif -#endif void va::equal_to(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -// Doesn't work right now with NEON, see above. +// FIXME NEON xtensor / xsimd has a compile-time bug, see +// https://github.com/xtensor-stack/xtensor/issues/2733 #if !(defined(__aarch64__) || defined(_M_ARM64) || defined(__ARM_NEON) || defined(_M_ARM)) OPTIMIZE_COMMUTATIVE(::equal_to, allocator, target, a, b); #endif #endif - va::xoperation_inplace< - Feature::equal_to, - promote::common_in_nat_out - >( - va::XFunction {}, - allocator, - target, - a, - b - ); + ::equal_to(allocator, target, a, b); } -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -#if !(defined(__aarch64__) || defined(_M_ARM64) || defined(__ARM_NEON) || defined(_M_ARM)) -void not_equal_to(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void not_equal_to(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::not_equal_to, promote::common_in_nat_out @@ -66,8 +52,6 @@ void not_equal_to(VStoreAllocator& allocator, VArrayTarget target, const VData& b ); } -#endif -#endif void va::not_equal_to(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION @@ -76,48 +60,11 @@ void va::not_equal_to(VStoreAllocator& allocator, VArrayTarget target, const VDa #endif #endif - va::xoperation_inplace< - Feature::not_equal_to, - promote::common_in_nat_out - >( - va::XFunction {}, - allocator, - target, - a, - b - ); + ::not_equal_to(allocator, target, a, b); } -void va::greater(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - if (va::dimension(a) == 0) { - va::xoperation_inplace< - Feature::greater, - promote::reject_complex - >( - va::XFunction {}, - allocator, - target, - va::to_single_value(a), - b - ); - return; - } - if (va::dimension(b) == 0) { - va::xoperation_inplace< - Feature::greater, - promote::reject_complex - >( - va::XFunction {}, - allocator, - target, - a, - va::to_single_value(b) - ); - return; - } -#endif - +template +void greater(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::greater, promote::reject_complex @@ -130,36 +77,16 @@ void va::greater(VStoreAllocator& allocator, VArrayTarget target, const VData& a ); } -void va::greater_equal(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { +void va::greater(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - if (va::dimension(a) == 0) { - va::xoperation_inplace< - Feature::greater_equal, - promote::reject_complex - >( - va::XFunction {}, - allocator, - target, - va::to_single_value(a), - b - ); - return; - } - if (va::dimension(b) == 0) { - va::xoperation_inplace< - Feature::greater_equal, - promote::reject_complex - >( - va::XFunction {}, - allocator, - target, - a, - va::to_single_value(b) - ); - return; - } + OPTIMIZE_NONCOMMUTATIVE(::greater, allocator, target, a, b); #endif + ::greater(allocator, target, a, b); +} + +template +void greater_equal(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::greater_equal, promote::reject_complex @@ -172,36 +99,16 @@ void va::greater_equal(VStoreAllocator& allocator, VArrayTarget target, const VD ); } -void va::less(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { +void va::greater_equal(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - if (va::dimension(a) == 0) { - va::xoperation_inplace< - Feature::less, - promote::reject_complex - >( - va::XFunction {}, - allocator, - target, - va::to_single_value(a), - b - ); - return; - } - if (va::dimension(b) == 0) { - va::xoperation_inplace< - Feature::less, - promote::reject_complex - >( - va::XFunction {}, - allocator, - target, - a, - va::to_single_value(b) - ); - return; - } + OPTIMIZE_NONCOMMUTATIVE(::greater_equal, allocator, target, a, b); #endif + ::greater_equal(allocator, target, a, b); +} + +template +void less(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::less, promote::reject_complex @@ -214,36 +121,16 @@ void va::less(VStoreAllocator& allocator, VArrayTarget target, const VData& a, c ); } -void va::less_equal(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { +void va::less(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - if (va::dimension(a) == 0) { - va::xoperation_inplace< - Feature::less_equal, - promote::reject_complex - >( - va::XFunction {}, - allocator, - target, - va::to_single_value(a), - b - ); - return; - } - if (va::dimension(b) == 0) { - va::xoperation_inplace< - Feature::less_equal, - promote::reject_complex - >( - va::XFunction {}, - allocator, - target, - a, - va::to_single_value(b) - ); - return; - } + OPTIMIZE_NONCOMMUTATIVE(::less, allocator, target, a, b); #endif + ::less(allocator, target, a, b); +} + +template +void less_equal(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::less_equal, promote::reject_complex @@ -256,6 +143,14 @@ void va::less_equal(VStoreAllocator& allocator, VArrayTarget target, const VData ); } +void va::less_equal(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { +#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION + OPTIMIZE_NONCOMMUTATIVE(::less_equal, allocator, target, a, b); +#endif + + ::less_equal(allocator, target, a, b); +} + template void is_close(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b, double rtol, double atol, bool equal_nan) { va::xoperation_inplace< diff --git a/src/vatensor/scalar_tricks.hpp b/src/vatensor/scalar_tricks.hpp index c96cb25..f80d786 100644 --- a/src/vatensor/scalar_tricks.hpp +++ b/src/vatensor/scalar_tricks.hpp @@ -11,6 +11,16 @@ if (va::dimension(B_ARRAY) == 0) {\ return;\ } +#define OPTIMIZE_NONCOMMUTATIVE(FN_NAME, ALLOCATOR, TARGET, A_ARRAY, B_ARRAY)\ +if (va::dimension(A_ARRAY) == 0) {\ + FN_NAME(ALLOCATOR, TARGET, va::to_single_value(A_ARRAY), B_ARRAY);\ + return;\ +}\ +if (va::dimension(B_ARRAY) == 0) {\ + FN_NAME(ALLOCATOR, TARGET, A_ARRAY, va::to_single_value(B_ARRAY));\ + return;\ +} + #define OPTIMIZE_COMMUTATIVE_REDUCTION(FN_NAME, A_ARRAY, B_ARRAY)\ if (va::dimension(A_ARRAY) == 0) {\ return FN_NAME(B_ARRAY, va::to_single_value(A_ARRAY));\ diff --git a/src/vatensor/vmath.cpp b/src/vatensor/vmath.cpp index 78100e4..f498806 100644 --- a/src/vatensor/vmath.cpp +++ b/src/vatensor/vmath.cpp @@ -28,8 +28,8 @@ void va::negative(VStoreAllocator& allocator, VArrayTarget target, const VData& ); } -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION -void add(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void add(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::add, promote::num_function_result_in_same_out @@ -41,18 +41,22 @@ void add(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const b ); } -#endif void va::add(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION OPTIMIZE_COMMUTATIVE(::add, allocator, target, a, b); #endif + ::add(allocator, target, a, b); +} + +template +void subtract(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< - Feature::add, - promote::num_function_result_in_same_out + Feature::subtract, + promote::num_function_result_in_same_out >( - XFunction {}, + XFunction {}, allocator, target, a, @@ -62,47 +66,14 @@ void va::add(VStoreAllocator& allocator, VArrayTarget target, const VData& a, co void va::subtract(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - if (va::dimension(a) == 0) { - va::xoperation_inplace< - Feature::subtract, - promote::num_function_result_in_same_out - >( - XFunction {}, - allocator, - target, - va::to_single_value(a), - b - ); - return; - } - if (va::dimension(b) == 0) { - va::xoperation_inplace< - Feature::subtract, - promote::num_function_result_in_same_out - >( - XFunction {}, - allocator, - target, - a, - va::to_single_value(b) - ); - return; - } + OPTIMIZE_NONCOMMUTATIVE(::subtract, allocator, target, a, b); #endif - va::xoperation_inplace< - Feature::subtract, - promote::num_function_result_in_same_out - >( - XFunction {}, - allocator, - target, - a, - b - ); + ::subtract(allocator, target, a, b); } -void multiply(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void multiply(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::multiply, promote::num_function_result_in_same_out @@ -120,48 +91,11 @@ void va::multiply(VStoreAllocator& allocator, VArrayTarget target, const VData& OPTIMIZE_COMMUTATIVE(::multiply, allocator, target, a, b); #endif - va::xoperation_inplace< - Feature::multiply, - promote::num_function_result_in_same_out - >( - XFunction {}, - allocator, - target, - a, - b - ); + ::multiply(allocator, target, a, b); } -void va::divide(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { -#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - if (va::dimension(a) == 0) { - va::xoperation_inplace< - Feature::divide, - promote::num_function_result_in_same_out - >( - XFunction {}, - allocator, - target, - va::to_single_value(a), - b - ); - return; - } - if (va::dimension(b) == 0) { - va::xoperation_inplace< - Feature::divide, - promote::num_function_result_in_same_out - >( - XFunction {}, - allocator, - target, - a, - va::to_single_value(b) - ); - return; - } -#endif - +template +void divide(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::divide, promote::num_function_result_in_same_out @@ -174,36 +108,16 @@ void va::divide(VStoreAllocator& allocator, VArrayTarget target, const VData& a, ); } -void va::remainder(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { +void va::divide(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - if (va::dimension(a) == 0) { - va::xoperation_inplace< - Feature::remainder, - promote::reject_complex> - >( - XFunction {}, - allocator, - target, - va::to_single_value(a), - b - ); - return; - } - if (va::dimension(b) == 0) { - va::xoperation_inplace< - Feature::remainder, - promote::reject_complex> - >( - XFunction {}, - allocator, - target, - a, - va::to_single_value(b) - ); - return; - } + OPTIMIZE_NONCOMMUTATIVE(::divide, allocator, target, a, b); #endif + ::divide(allocator, target, a, b); +} + +template +void remainder(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::remainder, promote::reject_complex> @@ -216,36 +130,16 @@ void va::remainder(VStoreAllocator& allocator, VArrayTarget target, const VData& ); } -void va::pow(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { +void va::remainder(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { #ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION - if (va::dimension(a) == 0) { - va::xoperation_inplace< - Feature::pow, - promote::num_function_result_in_same_out - >( - XFunction {}, - allocator, - target, - va::to_single_value(a), - b - ); - return; - } - if (va::dimension(b) == 0) { - va::xoperation_inplace< - Feature::pow, - promote::num_function_result_in_same_out - >( - XFunction {}, - allocator, - target, - a, - va::to_single_value(b) - ); - return; - } + OPTIMIZE_NONCOMMUTATIVE(::remainder, allocator, target, a, b); #endif + ::remainder(allocator, target, a, b); +} + +template +void pow(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::pow, promote::num_function_result_in_same_out @@ -258,7 +152,16 @@ void va::pow(VStoreAllocator& allocator, VArrayTarget target, const VData& a, co ); } -void minimum(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +void va::pow(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VData& b) { +#ifndef NUMDOT_DISABLE_SCALAR_OPTIMIZATION + OPTIMIZE_NONCOMMUTATIVE(::pow, allocator, target, a, b); +#endif + + ::pow(allocator, target, a, b); +} + +template +void minimum(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::minimum, promote::reject_complex @@ -276,19 +179,11 @@ void va::minimum(VStoreAllocator& allocator, VArrayTarget target, const VData& a OPTIMIZE_COMMUTATIVE(::minimum, allocator, target, a, b); #endif - va::xoperation_inplace< - Feature::minimum, - promote::reject_complex - >( - XFunction> {}, - allocator, - target, - a, - b - ); + ::minimum(allocator, target, a, b); } -void maximum(VStoreAllocator& allocator, VArrayTarget target, const VData& a, const VScalar& b) { +template +void maximum(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& b) { va::xoperation_inplace< Feature::maximum, promote::reject_complex @@ -306,15 +201,21 @@ void va::maximum(VStoreAllocator& allocator, VArrayTarget target, const VData& a OPTIMIZE_COMMUTATIVE(::maximum, allocator, target, a, b); #endif + ::maximum(allocator, target, a, b); +} + +template +void clip(VStoreAllocator& allocator, VArrayTarget target, const A& a, const B& lo, const C& hi) { va::xoperation_inplace< - Feature::maximum, + Feature::clip, promote::reject_complex >( - XFunction> {}, + XFunction {}, allocator, target, a, - b + lo, + hi ); } @@ -323,32 +224,12 @@ void va::clip(VStoreAllocator& allocator, VArrayTarget target, const VData& a, c // TODO Check binary size add and perhaps just use min and max. if (va::dimension(lo) == 0 && va::dimension(hi) == 0) { - va::xoperation_inplace< - Feature::clip, - promote::reject_complex - >( - XFunction {}, - allocator, - target, - a, - va::to_single_value(lo), - va::to_single_value(hi) - ); + ::clip(allocator, target, a, to_single_value(lo), to_single_value(hi)); return; } #endif - va::xoperation_inplace< - Feature::clip, - promote::reject_complex - >( - XFunction {}, - allocator, - target, - a, - lo, - hi - ); + ::clip(allocator, target, a, lo, hi); } void va::sign(VStoreAllocator& allocator, VArrayTarget target, const VData& array) {