diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 3d5cd3d544..e36cfe1914 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -37,6 +37,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/acos.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/acosh.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/add.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/angle.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/asin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/asinh.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/atan.cpp @@ -87,6 +88,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/pow.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/proj.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/real.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/reciprocal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/remainder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/round.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/rsqrt.cpp diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 4355fea442..4508e1e3e3 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -102,6 +102,7 @@ acos, acosh, add, + angle, asin, asinh, atan, @@ -153,6 +154,7 @@ pow, proj, real, + reciprocal, remainder, round, rsqrt, @@ -342,4 +344,6 @@ "var", "__array_api_version__", "__array_namespace_info__", + "reciprocal", + "angle", ] diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 4a0d1c451f..f638ebc3c1 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -28,7 +28,8 @@ from ._copy_utils import _empty_like_orderK, _empty_like_pair_orderK from ._type_utils import ( - _acceptance_fn_default, + _acceptance_fn_default_binary, + _acceptance_fn_default_unary, _all_data_types, _find_buf_dtype, _find_buf_dtype2, @@ -62,17 +63,39 @@ class UnaryElementwiseFunc: computational tasks complete execution, while the second event corresponds to computational tasks associated with function evaluation. + acceptance_fn (callable, optional): + Function to influence type promotion behavior of this unary + function. The function takes 4 arguments: + arg_dtype - Data type of the first argument + buf_dtype - Data type the argument would be cast to + res_dtype - Data type of the output array with function values + sycl_dev - The :class:`dpctl.SyclDevice` where the function + evaluation is carried out. + The function is invoked when the argument of the unary function + requires casting, e.g. the argument of `dpctl.tensor.log` is an + array with integral data type. docs (str): Documentation string for the unary function. """ - def __init__(self, name, result_type_resolver_fn, unary_dp_impl_fn, docs): + def __init__( + self, + name, + result_type_resolver_fn, + unary_dp_impl_fn, + docs, + acceptance_fn=None, + ): self.__name__ = "UnaryElementwiseFunc" self.name_ = name self.result_type_resolver_fn_ = result_type_resolver_fn self.types_ = None self.unary_fn_ = unary_dp_impl_fn self.__doc__ = docs + if callable(acceptance_fn): + self.acceptance_fn_ = acceptance_fn + else: + self.acceptance_fn_ = _acceptance_fn_default_unary def __str__(self): return f"<{self.__name__} '{self.name_}'>" @@ -93,6 +116,24 @@ def get_type_result_resolver_function(self): """ return self.result_type_resolver_fn_ + def get_type_promotion_path_acceptance_function(self): + """Returns the acceptance function for this + elementwise binary function. + + Acceptance function influences the type promotion + behavior of this unary function. + The function takes 4 arguments: + arg_dtype - Data type of the first argument + buf_dtype - Data type the argument would be cast to + res_dtype - Data type of the output array with function values + sycl_dev - The :class:`dpctl.SyclDevice` where the function + evaluation is carried out. + The function is invoked when the argument of the unary function + requires casting, e.g. the argument of `dpctl.tensor.log` is an + array with integral data type. + """ + return self.acceptance_fn_ + @property def types(self): """Returns information about types supported by @@ -122,7 +163,10 @@ def __call__(self, x, out=None, order="K"): if order not in ["C", "F", "K", "A"]: order = "K" buf_dt, res_dt = _find_buf_dtype( - x.dtype, self.result_type_resolver_fn_, x.sycl_device + x.dtype, + self.result_type_resolver_fn_, + x.sycl_device, + acceptance_fn=self.acceptance_fn_, ) if res_dt is None: raise TypeError( @@ -482,7 +526,7 @@ def __init__( if callable(acceptance_fn): self.acceptance_fn_ = acceptance_fn else: - self.acceptance_fn_ = _acceptance_fn_default + self.acceptance_fn_ = _acceptance_fn_default_binary def __str__(self): return f"<{self.__name__} '{self.name_}'>" diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 9879960999..a271ca8792 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -17,7 +17,7 @@ import dpctl.tensor._tensor_elementwise_impl as ti from ._elementwise_common import BinaryElementwiseFunc, UnaryElementwiseFunc -from ._type_utils import _acceptance_fn_divide +from ._type_utils import _acceptance_fn_divide, _acceptance_fn_reciprocal # U01: ==== ABS (x) _abs_docstring_ = """ @@ -1880,10 +1880,72 @@ Returns: usm_narray: An array containing the element-wise reciprocal square-root. - The data type of the returned array is determined by + The returned array has a floating-point data type determined by the Type Promotion Rules. """ rsqrt = UnaryElementwiseFunc( "rsqrt", ti._rsqrt_result_type, ti._rsqrt, _rsqrt_docstring_ ) + + +# U42: ==== RECIPROCAL (x) +_reciprocal_docstring = """ +reciprocal(x, out=None, order='K') + +Computes the reciprocal of each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have a real-valued floating-point data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise reciprocals. + The returned array has a floating-point data type determined + by the Type Promotion Rules. +""" + +reciprocal = UnaryElementwiseFunc( + "reciprocal", + ti._reciprocal_result_type, + ti._reciprocal, + _reciprocal_docstring, + acceptance_fn=_acceptance_fn_reciprocal, +) + + +# U43: ==== ANGLE (x) +_angle_docstring = """ +angle(x, out=None, order='K') + +Computes the phase angle (also called the argument) of each element `x_i` for +input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have a complex-valued floating-point data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise phase angles. + The returned array has a floating-point data type determined + by the Type Promotion Rules. +""" + +angle = UnaryElementwiseFunc( + "angle", + ti._angle_result_type, + ti._angle, + _angle_docstring, +) diff --git a/dpctl/tensor/_type_utils.py b/dpctl/tensor/_type_utils.py index 7f496b02fa..bacd488226 100644 --- a/dpctl/tensor/_type_utils.py +++ b/dpctl/tensor/_type_utils.py @@ -132,7 +132,27 @@ def _to_device_supported_dtype(dt, dev): return dt -def _find_buf_dtype(arg_dtype, query_fn, sycl_dev): +def _acceptance_fn_default_unary(arg_dtype, ret_buf_dt, res_dt, sycl_dev): + return True + + +def _acceptance_fn_reciprocal(arg_dtype, buf_dt, res_dt, sycl_dev): + # if the kind of result is different from + # the kind of input, use the default data + # we use default dtype for the resulting kind. + # This guarantees alignment of reciprocal and + # divide output types. + if buf_dt.kind != arg_dtype.kind: + default_dt = _get_device_default_dtype(res_dt.kind, sycl_dev) + if res_dt == default_dt: + return True + else: + return False + else: + return True + + +def _find_buf_dtype(arg_dtype, query_fn, sycl_dev, acceptance_fn): res_dt = query_fn(arg_dtype) if res_dt: return None, res_dt @@ -144,7 +164,11 @@ def _find_buf_dtype(arg_dtype, query_fn, sycl_dev): if _can_cast(arg_dtype, buf_dt, _fp16, _fp64): res_dt = query_fn(buf_dt) if res_dt: - return buf_dt, res_dt + acceptable = acceptance_fn(arg_dtype, buf_dt, res_dt, sycl_dev) + if acceptable: + return buf_dt, res_dt + else: + continue return None, None @@ -163,7 +187,7 @@ def _get_device_default_dtype(dt_kind, sycl_dev): raise RuntimeError -def _acceptance_fn_default( +def _acceptance_fn_default_binary( arg1_dtype, arg2_dtype, ret_buf1_dt, ret_buf2_dt, res_dt, sycl_dev ): return True @@ -230,6 +254,8 @@ def _find_buf_dtype2(arg1_dtype, arg2_dtype, query_fn, sycl_dev, acceptance_fn): "_find_buf_dtype", "_find_buf_dtype2", "_to_device_supported_dtype", - "_acceptance_fn_default", + "_acceptance_fn_default_unary", + "_acceptance_fn_reciprocal", + "_acceptance_fn_default_binary", "_acceptance_fn_divide", ] diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 5b394d971b..284de1cbe1 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -687,7 +687,8 @@ cdef class usm_ndarray: """ Returns real component for arrays with complex data-types and returns itself for all other data-types. """ - if (self.typenum_ < UAR_CFLOAT): + # explicitly check for UAR_HALF, which is greater than UAR_CFLOAT + if (self.typenum_ < UAR_CFLOAT or self.typenum_ == UAR_HALF): # elements are real return self if (self.typenum_ < UAR_TYPE_SENTINEL): @@ -698,7 +699,8 @@ cdef class usm_ndarray: """ Returns imaginary component for arrays with complex data-types and returns zero array for all other data-types. """ - if (self.typenum_ < UAR_CFLOAT): + # explicitly check for UAR_HALF, which is greater than UAR_CFLOAT + if (self.typenum_ < UAR_CFLOAT or self.typenum_ == UAR_HALF): # elements are real return _zero_like(self) if (self.typenum_ < UAR_TYPE_SENTINEL): @@ -1306,14 +1308,15 @@ cdef usm_ndarray _m_transpose(usm_ndarray ary): cdef usm_ndarray _zero_like(usm_ndarray ary): """ - Make C-contiguous array of zero elements with same shape - and type as ary. + Make C-contiguous array of zero elements with same shape, + type, device, and sycl_queue as ary. """ cdef dt = _make_typestr(ary.typenum_) cdef usm_ndarray r = usm_ndarray( - _make_int_tuple(ary.nd_, ary.shape_), + _make_int_tuple(ary.nd_, ary.shape_) if ary.nd_ > 0 else tuple(), dtype=dt, - buffer=ary.base_.get_usm_type() + buffer=ary.base_.get_usm_type(), + buffer_ctor_kwargs={"queue": ary.get_sycl_queue()}, ) r.base_.memset() return r diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/angle.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/angle.hpp new file mode 100644 index 0000000000..17ef25b0b6 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/angle.hpp @@ -0,0 +1,185 @@ +//=== angle.hpp - Unary function ANGLE ------*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of ANGLE(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" +#include "sycl_complex.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace angle +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct AngleFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) const + { +#ifdef USE_SYCL_FOR_COMPLEX_TYPES + using rT = typename argT::value_type; + + return exprm_ns::arg(exprm_ns::complex(in)); // std::arg(in); +#else + return std::arg(in); +#endif + } +}; + +template +using AngleContigFunctor = + elementwise_common::UnaryContigFunctor, + vec_sz, + n_vecs>; + +template +using AngleStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct AngleOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, float>, + td_ns::TypeMapResultEntry, double>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class angle_contig_kernel; + +template +sycl::event angle_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, AngleOutputType, AngleContigFunctor, angle_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct AngleContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = angle_contig_impl; + return fn; + } + } +}; + +template struct AngleTypeMapFactory +{ + /*! @brief get typeid for output type of std::arg(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename AngleOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class angle_strided_kernel; + +template +sycl::event +angle_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, AngleOutputType, AngleStridedFunctor, angle_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct AngleStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = angle_strided_impl; + return fn; + } + } +}; + +} // namespace angle +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp index 92f5ffa729..634c29b5ed 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp @@ -1,4 +1,4 @@ -//=== proj.hpp - Unary function CONJ ------ +//=== proj.hpp - Unary function PROJ ------ //*-C++-*--/===// // // Data Parallel Control (dpctl) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/reciprocal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/reciprocal.hpp new file mode 100644 index 0000000000..c775576867 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/reciprocal.hpp @@ -0,0 +1,203 @@ +//=== reciprocal.hpp - Unary function RECIPROCAL ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of RECIPROCAL(x) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include + +#include "sycl_complex.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace reciprocal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct ReciprocalFunctor +{ + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) const + { + if constexpr (is_complex::value) { + + using realT = typename argT::value_type; +#ifdef USE_SYCL_FOR_COMPLEX_TYPES + + return realT(1) / exprm_ns::complex(in); +#else + return realT(1) / in; +#endif + } + else { + return argT(1) / in; + } + } +}; + +template +using ReciprocalContigFunctor = + elementwise_common::UnaryContigFunctor, + vec_sz, + n_vecs>; + +template +using ReciprocalStridedFunctor = + elementwise_common::UnaryStridedFunctor>; + +template struct ReciprocalOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class reciprocal_contig_kernel; + +template +sycl::event reciprocal_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct ReciprocalContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename ReciprocalOutputType::value_type, void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = reciprocal_contig_impl; + return fn; + } + } +}; + +template struct ReciprocalTypeMapFactory +{ + /*! @brief get typeid for output type of 1 / x */ + std::enable_if_t::value, int> get() + { + using rT = typename ReciprocalOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class reciprocal_strided_kernel; + +template +sycl::event +reciprocal_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct ReciprocalStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename ReciprocalOutputType::value_type, void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = reciprocal_strided_impl; + return fn; + } + } +}; + +} // namespace reciprocal +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/angle.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/angle.cpp new file mode 100644 index 0000000000..166b37b27b --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/angle.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "angle.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/angle.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U43: ==== ANGLE (x) +namespace impl +{ + +namespace angle_fn_ns = dpctl::tensor::kernels::angle; + +static unary_contig_impl_fn_ptr_t + angle_contig_dispatch_vector[td_ns::num_types]; +static int angle_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + angle_strided_dispatch_vector[td_ns::num_types]; + +void populate_angle_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = angle_fn_ns; + + using fn_ns::AngleContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(angle_contig_dispatch_vector); + + using fn_ns::AngleStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(angle_strided_dispatch_vector); + + using fn_ns::AngleTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(angle_output_typeid_vector); +}; + +} // namespace impl + +void init_angle(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_angle_dispatch_vectors(); + using impl::angle_contig_dispatch_vector; + using impl::angle_output_typeid_vector; + using impl::angle_strided_dispatch_vector; + + auto angle_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, angle_output_typeid_vector, + angle_contig_dispatch_vector, angle_strided_dispatch_vector); + }; + m.def("_angle", angle_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto angle_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + angle_output_typeid_vector); + }; + m.def("_angle_result_type", angle_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/angle.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/angle.hpp new file mode 100644 index 0000000000..30a58696b3 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/angle.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_angle(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp index 751e44ff55..2398ff258b 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp @@ -29,6 +29,7 @@ #include "acos.hpp" #include "acosh.hpp" #include "add.hpp" +#include "angle.hpp" #include "asin.hpp" #include "asinh.hpp" #include "atan.hpp" @@ -79,6 +80,7 @@ #include "pow.hpp" #include "proj.hpp" #include "real.hpp" +#include "reciprocal.hpp" #include "remainder.hpp" #include "round.hpp" #include "rsqrt.hpp" @@ -110,6 +112,7 @@ void init_elementwise_functions(py::module_ m) init_acos(m); init_acosh(m); init_add(m); + init_angle(m); init_asin(m); init_asinh(m); init_atan(m); @@ -161,6 +164,7 @@ void init_elementwise_functions(py::module_ m) init_pow(m); init_proj(m); init_real(m); + init_reciprocal(m); init_remainder(m); init_round(m); init_rsqrt(m); diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/reciprocal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/reciprocal.cpp new file mode 100644 index 0000000000..5f86188c99 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/reciprocal.cpp @@ -0,0 +1,123 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "reciprocal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/reciprocal.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U42: ==== REAL (x) +namespace impl +{ + +namespace reciprocal_fn_ns = dpctl::tensor::kernels::reciprocal; + +static unary_contig_impl_fn_ptr_t + reciprocal_contig_dispatch_vector[td_ns::num_types]; +static int reciprocal_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + reciprocal_strided_dispatch_vector[td_ns::num_types]; + +void populate_reciprocal_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = reciprocal_fn_ns; + + using fn_ns::ReciprocalContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(reciprocal_contig_dispatch_vector); + + using fn_ns::ReciprocalStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(reciprocal_strided_dispatch_vector); + + using fn_ns::ReciprocalTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(reciprocal_output_typeid_vector); +}; + +} // namespace impl + +void init_reciprocal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_reciprocal_dispatch_vectors(); + using impl::reciprocal_contig_dispatch_vector; + using impl::reciprocal_output_typeid_vector; + using impl::reciprocal_strided_dispatch_vector; + + auto reciprocal_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + reciprocal_output_typeid_vector, + reciprocal_contig_dispatch_vector, + reciprocal_strided_dispatch_vector); + }; + m.def("_reciprocal", reciprocal_pyapi, "", py::arg("src"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + auto reciprocal_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + reciprocal_output_typeid_vector); + }; + m.def("_reciprocal_result_type", reciprocal_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/reciprocal.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/reciprocal.hpp new file mode 100644 index 0000000000..506aeaa803 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/reciprocal.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_reciprocal(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tests/elementwise/test_angle.py b/dpctl/tests/elementwise/test_angle.py new file mode 100644 index 0000000000..bf16888be3 --- /dev/null +++ b/dpctl/tests/elementwise/test_angle.py @@ -0,0 +1,92 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest + +import dpctl.tensor as dpt +from dpctl.tensor._type_utils import _can_cast +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _complex_fp_dtypes, _no_complex_dtypes + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_angle_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = dpt.asarray(1, dtype=dtype, sycl_queue=q) + dt = dpt.dtype(dtype) + dev = q.sycl_device + _fp16 = dev.has_aspect_fp16 + _fp64 = dev.has_aspect_fp64 + if _can_cast(dt, dpt.complex64, _fp16, _fp64): + assert dpt.angle(x).dtype == dpt.float32 + else: + assert dpt.angle(x).dtype == dpt.float64 + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes[1:]) +def test_angle_real(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = dpt.arange(10, dtype=dtype, sycl_queue=q) + r = dpt.angle(x) + + assert dpt.all(r == 0) + + +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) +def test_angle_complex(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + tol = 8 * dpt.finfo(dtype).resolution + vals = dpt.pi * dpt.arange(10, dtype=dpt.finfo(dtype).dtype, sycl_queue=q) + + x = dpt.zeros(10, dtype=dtype, sycl_queue=q) + + x.imag[...] = vals + r = dpt.angle(x) + expected = dpt.atan2(x.imag, x.real) + assert dpt.allclose(r, expected, atol=tol, rtol=tol) + + x.real[...] += dpt.pi + r = dpt.angle(x) + expected = dpt.atan2(x.imag, x.real) + assert dpt.allclose(r, expected, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["c8", "c16"]) +def test_angle_special_cases(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + vals = [np.nan, -np.nan, np.inf, -np.inf, +0.0, -0.0] + vals = [complex(*val) for val in itertools.product(vals, repeat=2)] + + x = dpt.asarray(vals, dtype=dtype, sycl_queue=q) + + r = dpt.angle(x) + expected = dpt.atan2(x.imag, x.real) + + tol = 8 * dpt.finfo(dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) diff --git a/dpctl/tests/elementwise/test_reciprocal.py b/dpctl/tests/elementwise/test_reciprocal.py new file mode 100644 index 0000000000..319cac915f --- /dev/null +++ b/dpctl/tests/elementwise/test_reciprocal.py @@ -0,0 +1,93 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import pytest + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _complex_fp_dtypes + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_reciprocal_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = dpt.asarray(1, dtype=dtype, sycl_queue=q) + one = dpt.asarray(1, dtype=dtype, sycl_queue=q) + expected_dtype = dpt.divide(one, x).dtype + assert dpt.reciprocal(x).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_reciprocal_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + x = dpt.linspace(1, 13, num=n_seq, dtype=dtype, sycl_queue=q) + res = dpt.reciprocal(x) + expected = 1 / x + tol = 8 * dpt.finfo(res.dtype).resolution + assert dpt.allclose(res, expected, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_reciprocal_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2054 + + x = dpt.linspace(1, 13, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + res = dpt.reciprocal(x) + expected = 1 / x + tol = 8 * dpt.finfo(res.dtype).resolution + assert dpt.allclose(res, expected, atol=tol, rtol=tol) + + +def test_reciprocal_special_cases(): + get_queue_or_skip() + + x = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + res = dpt.reciprocal(x) + expected = dpt.asarray([dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0], dtype="f4") + assert dpt.allclose(res, expected, equal_nan=True) + + +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) +def test_reciprocal_complex_special_cases(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + c_ = [complex(*v) for v in itertools.product(inps_, repeat=2)] + + z = dpt.asarray(c_, dtype=dtype) + r = dpt.reciprocal(z) + + expected = 1 / z + + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) diff --git a/dpctl/tests/elementwise/test_type_utils.py b/dpctl/tests/elementwise/test_type_utils.py index f6e09b9b4e..9407195bd9 100644 --- a/dpctl/tests/elementwise/test_type_utils.py +++ b/dpctl/tests/elementwise/test_type_utils.py @@ -124,7 +124,9 @@ def _denier_fn(dt): for fp16 in [True, False]: dev = MockDevice(fp16, fp64) arg_dt = dpt.float64 - r = tu._find_buf_dtype(arg_dt, _denier_fn, dev) + r = tu._find_buf_dtype( + arg_dt, _denier_fn, dev, tu._acceptance_fn_default_unary + ) assert r == ( None, None, @@ -157,7 +159,11 @@ def _denier_fn(dt1, dt2): arg1_dt = dpt.float64 arg2_dt = dpt.complex64 r = tu._find_buf_dtype2( - arg1_dt, arg2_dt, _denier_fn, dev, tu._acceptance_fn_default + arg1_dt, + arg2_dt, + _denier_fn, + dev, + tu._acceptance_fn_default_binary, ) assert r == ( None, diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 095bbc5638..5e3feb660d 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1461,6 +1461,24 @@ def test_real_imag_views(): X_scalar[...] = complex(n * m, 2 * n * m) assert X_scalar.real and X_scalar.imag + # check that _zero_like works for scalars + X_scalar = dpt.usm_ndarray((), dtype="f4") + assert isinstance(X_scalar.imag, dpt.usm_ndarray) + assert not X_scalar.imag + assert X_scalar.real.sycl_queue == X_scalar.imag.sycl_queue + + +def test_real_imag_views_fp16(): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dpt.float16, q) + + X = dpt.usm_ndarray( + (3, 4), dtype=dpt.float16, buffer_ctor_kwargs={"queue": q} + ) + assert isinstance(X.real, dpt.usm_ndarray) and isinstance( + X.imag, dpt.usm_ndarray + ) + @pytest.mark.parametrize( "dtype",