diff --git a/thrust/testing/complex.cu b/thrust/testing/complex.cu index a06d9b323b1..8a283abfc8c 100644 --- a/thrust/testing/complex.cu +++ b/thrust/testing/complex.cu @@ -54,18 +54,6 @@ struct TestComplexSizeAndAlignment }; SimpleUnitTest TestComplexSizeAndAlignmentInstance; -template -struct TestComplexTypeCheck -{ - void operator()() - { - THRUST_STATIC_ASSERT(thrust::is_complex>::value); - THRUST_STATIC_ASSERT(thrust::is_complex>::value); - THRUST_STATIC_ASSERT(thrust::is_complex>::value); - } -}; -SimpleUnitTest TestComplexTypeCheckInstance; - template struct TestComplexConstructionAndAssignment { @@ -449,18 +437,17 @@ struct TestComplexBasicArithmetic // Test the basic arithmetic functions against std ASSERT_ALMOST_EQUAL(thrust::abs(a), std::abs(b)); + ASSERT_ALMOST_EQUAL(thrust::arg(a), std::arg(b)); + ASSERT_ALMOST_EQUAL(thrust::norm(a), std::norm(b)); ASSERT_EQUAL(thrust::conj(a), std::conj(b)); - static_assert(cuda::std::is_same, decltype(thrust::conj(a))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::polar(data[0], data[1]), std::polar(data[0], data[1])); - static_assert(cuda::std::is_same, decltype(thrust::polar(data[0], data[1]))>::value, ""); // random_samples does not seem to produce infinities so proj(z) == z ASSERT_EQUAL(thrust::proj(a), a); - static_assert(cuda::std::is_same, decltype(thrust::proj(a))>::value, ""); } }; SimpleUnitTest TestComplexBasicArithmeticInstance; @@ -557,9 +544,6 @@ struct TestComplexExponentialFunctions ASSERT_ALMOST_EQUAL(thrust::exp(a), std::exp(b)); ASSERT_ALMOST_EQUAL(thrust::log(a), std::log(b)); ASSERT_ALMOST_EQUAL(thrust::log10(a), std::log10(b)); - static_assert(cuda::std::is_same, decltype(thrust::exp(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::log(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::log10(a))>::value, ""); } }; SimpleUnitTest @@ -579,24 +563,16 @@ struct TestComplexPowerFunctions const std::complex b_std(b_thrust); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, b_thrust), std::pow(a_std, b_std)); - static_assert(cuda::std::is_same, decltype(thrust::pow(a_thrust, b_thrust))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, b_thrust.real()), std::pow(a_std, b_std.real())); - static_assert(cuda::std::is_same, decltype(thrust::pow(a_thrust, b_thrust.real()))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust.real(), b_thrust), std::pow(a_std.real(), b_std)); - static_assert(cuda::std::is_same, decltype(thrust::pow(a_thrust.real(), b_thrust))>::value, ""); - - ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, 4), std::pow(a_std, 4)); - static_assert(cuda::std::is_same, decltype(thrust::pow(a_thrust, 4))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::sqrt(a_thrust), std::sqrt(a_std)); - static_assert(cuda::std::is_same, decltype(thrust::sqrt(a_thrust))>::value, ""); } // Test power functions with promoted types. { using T0 = T; using T1 = other_floating_point_type_t; - using promoted = typename thrust::detail::promoted_numerical_type::type; thrust::host_vector data = unittest::random_samples(4); @@ -606,17 +582,11 @@ struct TestComplexPowerFunctions const std::complex b_std(data[2], data[3]); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, b_thrust), std::pow(a_std, b_std)); - static_assert(cuda::std::is_same, decltype(thrust::pow(a_thrust, b_thrust))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(b_thrust, a_thrust), std::pow(b_std, a_std)); - static_assert(cuda::std::is_same, decltype(thrust::pow(b_thrust, a_thrust))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, b_thrust.real()), std::pow(a_std, b_std.real())); - static_assert(cuda::std::is_same, decltype(thrust::pow(a_thrust, b_thrust.real()))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(b_thrust, a_thrust.real()), std::pow(b_std, a_std.real())); - static_assert(cuda::std::is_same, decltype(thrust::pow(b_thrust, a_thrust.real()))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust.real(), b_thrust), std::pow(a_std.real(), b_std)); - static_assert(cuda::std::is_same, decltype(thrust::pow(a_thrust.real(), b_thrust))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(b_thrust.real(), a_thrust), std::pow(b_std.real(), a_std)); - static_assert(cuda::std::is_same, decltype(thrust::pow(b_thrust.real(), a_thrust))>::value, ""); } } }; @@ -635,32 +605,20 @@ struct TestComplexTrigonometricFunctions ASSERT_ALMOST_EQUAL(thrust::cos(a), std::cos(c)); ASSERT_ALMOST_EQUAL(thrust::sin(a), std::sin(c)); ASSERT_ALMOST_EQUAL(thrust::tan(a), std::tan(c)); - static_assert(cuda::std::is_same, decltype(thrust::cos(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::sin(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::tan(a))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::cosh(a), std::cosh(c)); ASSERT_ALMOST_EQUAL(thrust::sinh(a), std::sinh(c)); ASSERT_ALMOST_EQUAL(thrust::tanh(a), std::tanh(c)); - static_assert(cuda::std::is_same, decltype(thrust::cosh(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::sinh(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::tanh(a))>::value, ""); #if THRUST_CPP_DIALECT >= 2011 ASSERT_ALMOST_EQUAL(thrust::acos(a), std::acos(c)); ASSERT_ALMOST_EQUAL(thrust::asin(a), std::asin(c)); ASSERT_ALMOST_EQUAL(thrust::atan(a), std::atan(c)); - static_assert(cuda::std::is_same, decltype(thrust::acos(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::asin(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::atan(a))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::acosh(a), std::acosh(c)); ASSERT_ALMOST_EQUAL(thrust::asinh(a), std::asinh(c)); ASSERT_ALMOST_EQUAL(thrust::atanh(a), std::atanh(c)); - static_assert(cuda::std::is_same, decltype(thrust::acosh(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::asinh(a))>::value, ""); - static_assert(cuda::std::is_same, decltype(thrust::atanh(a))>::value, ""); #endif } @@ -709,21 +667,3 @@ struct TestComplexStdComplexDeviceInterop SimpleUnitTest TestComplexStdComplexDeviceInteropInstance; #endif - -template -struct TestComplexExplicitConstruction -{ - struct user_complex { - __host__ __device__ user_complex(T, T) {} - __host__ __device__ user_complex(const thrust::complex&) {} - }; - - void operator()() - { - const thrust::complex input(42.0, 1337.0); - const user_complex result = thrust::exp(input); - (void)result; - } -}; -SimpleUnitTest - TestComplexExplicitConstructionInstance; diff --git a/thrust/testing/unittest/assertions.h b/thrust/testing/unittest/assertions.h index f8adc87eeb4..748562c25ea 100644 --- a/thrust/testing/unittest/assertions.h +++ b/thrust/testing/unittest/assertions.h @@ -304,14 +304,31 @@ bool almost_equal(double a, double b, double a_tol, double r_tol) return true; } +namespace +{ // anonymous namespace + +template +struct is_complex : public THRUST_NS_QUALIFIER::false_type +{}; + +template +struct is_complex> : public THRUST_NS_QUALIFIER::true_type +{}; + +template +struct is_complex> : public THRUST_NS_QUALIFIER::true_type +{}; + +} // namespace + template -typename THRUST_NS_QUALIFIER::detail::enable_if::value && - THRUST_NS_QUALIFIER::is_complex::value, - bool>::type -almost_equal(const T1 &a, const T2 &b, double a_tol, double r_tol) +inline + typename THRUST_NS_QUALIFIER::detail::enable_if::value && is_complex::value, + bool>::type + almost_equal(const T1 &a, const T2 &b, double a_tol, double r_tol) { - return almost_equal(a.real(), b.real(), a_tol, r_tol) && - almost_equal(a.imag(), b.imag(), a_tol, r_tol); + return almost_equal(a.real(), b.real(), a_tol, r_tol) && + almost_equal(a.imag(), b.imag(), a_tol, r_tol); } template diff --git a/thrust/thrust/complex.h b/thrust/thrust/complex.h index 5aae8149050..f72661abd83 100644 --- a/thrust/thrust/complex.h +++ b/thrust/thrust/complex.h @@ -1,5 +1,6 @@ /* - * Copyright 2008-2023 NVIDIA Corporation + * Copyright 2008-2019 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,14 +30,38 @@ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) # pragma system_header #endif // no system header + +#include +#include +#include #include -#include -#include -#include +#if THRUST_CPP_DIALECT >= 2011 +# define THRUST_STD_COMPLEX_REAL(z) \ + reinterpret_cast< \ + const typename thrust::detail::remove_reference::type::value_type (&)[2] \ + >(z)[0] +# define THRUST_STD_COMPLEX_IMAG(z) \ + reinterpret_cast< \ + const typename thrust::detail::remove_reference::type::value_type (&)[2] \ + >(z)[1] +# define THRUST_STD_COMPLEX_DEVICE __device__ +#else +# define THRUST_STD_COMPLEX_REAL(z) (z).real() +# define THRUST_STD_COMPLEX_IMAG(z) (z).imag() +# define THRUST_STD_COMPLEX_DEVICE +#endif THRUST_NAMESPACE_BEGIN +/* + * Calls to the standard math library from inside the thrust namespace + * with real arguments require explicit scope otherwise they will fail + * to resolve as it will find the equivalent complex function but then + * fail to match the template, and give up looking for other scopes. + */ + + /*! \addtogroup numerics * \{ */ @@ -45,6 +70,86 @@ THRUST_NAMESPACE_BEGIN * \{ */ +/*! \cond + */ + +namespace detail +{ + +template +struct complex_storage; + +#if THRUST_CPP_DIALECT >= 2011 \ + && (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) \ + && (THRUST_GCC_VERSION >= 40800) + // C++11 implementation, excluding GCC 4.7, which doesn't have `alignas`. + template + struct complex_storage + { + struct alignas(Align) type { T x; T y; }; + }; +#elif (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) \ + || ( (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) \ + && (THRUST_GCC_VERSION < 40600)) + // C++03 implementation for MSVC and GCC <= 4.5. + // + // We have to implement `aligned_type` with specializations for MSVC + // and GCC 4.2 and older because they require literals as arguments to + // their alignment attribute. + + #if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) + // MSVC implementation. + #define THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(X) \ + template \ + struct complex_storage \ + { \ + __declspec(align(X)) struct type { T x; T y; }; \ + }; \ + /**/ + #else + // GCC <= 4.2 implementation. + #define THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(X) \ + template \ + struct complex_storage \ + { \ + struct type { T x; T y; } __attribute__((aligned(X))); \ + }; \ + /**/ + #endif + + // The primary template is a fallback, which doesn't specify any alignment. + // It's only used when T is very large and we're using an older compilers + // which we have to fully specialize each alignment case. + template + struct complex_storage + { + T x; T y; + }; + + THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(1); + THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(2); + THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(4); + THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(8); + THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(16); + THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(32); + THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(64); + THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION(128); + + #undef THRUST_DEFINE_COMPLEX_STORAGE_SPECIALIZATION +#else + // C++03 implementation for GCC > 4.5, Clang, PGI, ICPC, and xlC. + template + struct complex_storage + { + struct type { T x; T y; } __attribute__((aligned(Align))); + }; +#endif + +} // end namespace detail + +/*! \endcond + */ + /*! \p complex is the Thrust equivalent to std::complex. It is * functionally identical to it, but can also be used in device code which * std::complex currently cannot. @@ -53,561 +158,898 @@ THRUST_NAMESPACE_BEGIN * float or double. Others types are not supported. * */ - -template -struct complex : public ::cuda::std::complex +template +struct complex { - using base = ::cuda::std::complex; - using base::base; // inherit constructors - using base::operator=; // inherit assignment operators +public: + + /*! \p value_type is the type of \p complex's real and imaginary parts. + */ + typedef T value_type; + + + /* --- Constructors --- */ + + /*! Construct a complex number with an imaginary part of 0. + * + * \param re The real part of the number. + */ + __host__ __device__ + complex(const T& re); + + /*! Construct a complex number from its real and imaginary parts. + * + * \param re The real part of the number. + * \param im The imaginary part of the number. + */ + __host__ __device__ + complex(const T& re, const T& im); + +#if THRUST_CPP_DIALECT >= 2011 + /*! Default construct a complex number. + */ complex() = default; - __host__ __device__ complex(const base& rhs) : base(rhs) {} - __host__ __device__ complex(base&& rhs) : base(::cuda::std::move(rhs)) {} + /*! This copy constructor copies from a \p complex with a type that is + * convertible to this \p complex's \c value_type. + * + * \param z The \p complex to copy from. + */ + complex(const complex& z) = default; +#else + /*! Default construct a complex number. + */ + __host__ __device__ + complex(); - template ::value>::type, - typename = typename detail::enable_if::value>::type> + /*! This copy constructor copies from a \p complex with a type that is + * convertible to this \p complex's \c value_type. + * + * \param z The \p complex to copy from. + */ __host__ __device__ - complex &operator=(const complex &rhs) - { - this->real(static_cast(rhs.real())); - this->imag(static_cast(rhs.imag())); - return *this; - } + complex(const complex& z); +#endif + + /*! This converting copy constructor copies from a \p complex with a type + * that is convertible to this \p complex's \c value_type. + * + * \param z The \p complex to copy from. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex(const complex& z); + + /*! This converting copy constructor copies from a std::complex with + * a type that is convertible to this \p complex's \c value_type. + * + * \param z The \p complex to copy from. + */ + __host__ THRUST_STD_COMPLEX_DEVICE + complex(const std::complex& z); + + /*! This converting copy constructor copies from a std::complex with + * a type that is convertible to this \p complex's \c value_type. + * + * \param z The \p complex to copy from. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ THRUST_STD_COMPLEX_DEVICE + complex(const std::complex& z); - template ::value>::type, - typename = typename detail::enable_if::value>::type> + + /* --- Assignment Operators --- */ + + /*! Assign `re` to the real part of this \p complex and set the imaginary part + * to 0. + * + * \param re The real part of the number. + */ __host__ __device__ - complex &operator=(const U &rhs) - { - this->operator=(static_cast(rhs)); - return *this; - } + complex& operator=(const T& re); + +#if THRUST_CPP_DIALECT >= 2011 + /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this + * \p complex respectively. + * + * \param z The \p complex to copy from. + */ + complex& operator=(const complex& z) = default; +#else + /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this + * \p complex respectively. + * + * \param z The \p complex to copy from. + */ + __host__ __device__ + complex& operator=(const complex& z); +#endif + + /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this + * \p complex respectively. + * + * \param z The \p complex to copy from. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator=(const complex& z); + + /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this + * \p complex respectively. + * + * \param z The \p complex to copy from. + */ + __host__ THRUST_STD_COMPLEX_DEVICE + complex& operator=(const std::complex& z); + + /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this + * \p complex respectively. + * + * \param z The \p complex to copy from. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ THRUST_STD_COMPLEX_DEVICE + complex& operator=(const std::complex& z); + + + /* --- Compound Assignment Operators --- */ + + /*! Adds a \p complex to this \p complex and assigns the result to this + * \p complex. + * + * \param z The \p complex to be added. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator+=(const complex& z); + + /*! Subtracts a \p complex from this \p complex and assigns the result to + * this \p complex. + * + * \param z The \p complex to be subtracted. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator-=(const complex& z); + + /*! Multiplies this \p complex by another \p complex and assigns the result + * to this \p complex. + * + * \param z The \p complex to be multiplied. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator*=(const complex& z); + + /*! Divides this \p complex by another \p complex and assigns the result to + * this \p complex. + * + * \param z The \p complex to be divided. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator/=(const complex& z); + + /*! Adds a scalar to this \p complex and assigns the result to this + * \p complex. + * + * \param z The \p complex to be added. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator+=(const U& z); + + /*! Subtracts a scalar from this \p complex and assigns the result to + * this \p complex. + * + * \param z The scalar to be subtracted. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator-=(const U& z); + + /*! Multiplies this \p complex by a scalar and assigns the result + * to this \p complex. + * + * \param z The scalar to be multiplied. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator*=(const U& z); + + /*! Divides this \p complex by a scalar and assigns the result to + * this \p complex. + * + * \param z The scalar to be divided. + * + * \tparam U is convertible to \c value_type. + */ + template + __host__ __device__ + complex& operator/=(const U& z); + + + + /* --- Getter functions --- + * The volatile ones are there to help for example + * with certain reductions optimizations + */ + + /*! Returns the real part of this \p complex. + */ + __host__ __device__ + T real() const volatile { return data.x; } + + /*! Returns the imaginary part of this \p complex. + */ + __host__ __device__ + T imag() const volatile { return data.y; } + + /*! Returns the real part of this \p complex. + */ + __host__ __device__ + T real() const { return data.x; } + + /*! Returns the imaginary part of this \p complex. + */ + __host__ __device__ + T imag() const { return data.y; } + + - /*! Cast this \p complex to a std::complex + /* --- Setter functions --- + * The volatile ones are there to help for example + * with certain reductions optimizations */ - __host__ operator ::std::complex() const { return ::std::complex(this->real(), this->imag()); } + + /*! Sets the real part of this \p complex. + * + * \param re The new real part of this \p complex. + */ + __host__ __device__ + void real(T re) volatile { data.x = re; } + + /*! Sets the imaginary part of this \p complex. + * + * \param im The new imaginary part of this \p complex.e + */ + __host__ __device__ + void imag(T im) volatile { data.y = im; } + + /*! Sets the real part of this \p complex. + * + * \param re The new real part of this \p complex. + */ + __host__ __device__ + void real(T re) { data.x = re; } + + /*! Sets the imaginary part of this \p complex. + * + * \param im The new imaginary part of this \p complex. + */ + __host__ __device__ + void imag(T im) { data.y = im; } + + + + /* --- Casting functions --- */ + + /*! Casts this \p complex to a std::complex of the same type. + */ + __host__ + operator std::complex() const { return std::complex(real(), imag()); } + +private: + typename detail::complex_storage::type data; }; -/* --- Equality Operators --- */ -/*! Returns true if two \p complex numbers with different underlying type - * are equal and false otherwise. +/* --- General Functions --- */ + +/*! Returns the magnitude (also known as absolute value) of a \p complex. * - * \param x The first \p complex. - * \param y The second \p complex. + * \param z The \p complex from which to calculate the absolute value. + */ +template +__host__ __device__ +T abs(const complex& z); + +/*! Returns the phase angle (also known as argument) in radians of a \p complex. * - * \tparam \c T0 is convertible to \c T1. + * \param z The \p complex from which to calculate the phase angle. */ -template ::value>> -__host__ __device__ bool operator==(const complex &x, const complex &y) -{ - return x.real() == y.real() && x.imag() == y.imag(); -} +template +__host__ __device__ +T arg(const complex& z); -/*! Returns true if two \p complex numbers with different underlying type - * are equal and false otherwise. +/*! Returns the square of the magnitude of a \p complex. * - * \param x The first \p thrust::complex. - * \param y The second \p cuda::std::complex. + * \param z The \p complex from which to calculate the norm. + */ +template +__host__ __device__ +T norm(const complex& z); + +/*! Returns the complex conjugate of a \p complex. * - * \tparam \c T0 is convertible to \c T1. + * \param z The \p complex from which to calculate the complex conjugate. */ -template ::value>> -__host__ __device__ bool operator==(const complex &x, const ::cuda::std::complex &y) -{ - return x.real() == y.real() && x.imag() == y.imag(); -} +template +__host__ __device__ +complex conj(const complex& z); -/*! Returns true if two \p complex numbers with different underlying type - * are equal and false otherwise. +/*! Returns a \p complex with the specified magnitude and phase. * - * \param x The first \p cuda::std::complex. - * \param y The second \p thrust::complex. + * \param m The magnitude of the returned \p complex. + * \param theta The phase of the returned \p complex in radians. + */ +template +__host__ __device__ +complex::type> +polar(const T0& m, const T1& theta = T1()); + +/*! Returns the projection of a \p complex on the Riemann sphere. + * For all finite \p complex it returns the argument. For \p complexs + * with a non finite part returns (INFINITY,+/-0) where the sign of + * the zero matches the sign of the imaginary part of the argument. * - * \tparam \c T0 is convertible to \c T1. + * \param z The \p complex argument. */ -template ::value>> -__host__ __device__ bool operator==(const ::cuda::std::complex &x, const complex &y) -{ - return x.real() == y.real() && x.imag() == y.imag(); -} +template +__host__ __device__ +complex proj(const T& z); -/*! Returns true if the imaginary part of the \p complex number is zero and - * the real part is equal to the scalar. Returns false otherwise. + + +/* --- Binary Arithmetic operators --- */ + +/*! Adds two \p complex numbers. * - * \param x The \p complex. - * \param y The scalar. + * The value types of the two \p complex types should be compatible and the + * type of the returned \p complex is the promoted type of the two arguments. * - * \tparam \c T0 is convertible to \c T1. + * \param x The first \p complex. + * \param y The second \p complex. */ -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ bool operator==(const T0 &x, const complex &y) -{ - return x == y.real() && T0() == y.imag(); -} +template +__host__ __device__ +complex::type> +operator+(const complex& x, const complex& y); -/*! Returns true if the imaginary part of the \p complex number is zero and - * the real part is equal to the scalar. Returns false otherwise. +/*! Adds a scalar to a \p complex number. + * + * The value type of the \p complex should be compatible with the scalar and + * the type of the returned \p complex is the promoted type of the two arguments. * * \param x The \p complex. * \param y The scalar. + */ +template +__host__ __device__ +complex::type> +operator+(const complex& x, const T1& y); + +/*! Adds a \p complex number to a scalar. + * + * The value type of the \p complex should be compatible with the scalar and + * the type of the returned \p complex is the promoted type of the two arguments. * - * \tparam \c T0 is convertible to \c T1. + * \param x The scalar. + * \param y The \p complex. */ -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ bool operator==(const complex &x, const T1 &y) -{ - return x.real() == y && x.imag() == T1(); -} +template +__host__ __device__ +complex::type> +operator+(const T0& x, const complex& y); -/*! Returns true if two \p complex numbers with different underlying type - * are different and false otherwise. +/*! Subtracts two \p complex numbers. * - * \param x The first \p complex. - * \param y The second \p complex. + * The value types of the two \p complex types should be compatible and the + * type of the returned \p complex is the promoted type of the two arguments. * - * \tparam \c T0 is convertible to \c T1. + * \param x The first \p complex (minuend). + * \param y The second \p complex (subtrahend). */ -template ::value>> -__host__ __device__ bool operator!=(const complex &x, const complex &y) -{ - return !(x == y); -} +template +__host__ __device__ +complex::type> +operator-(const complex& x, const complex& y); -/*! Returns true if two \p complex numbers with different underlying type - * are different and false otherwise. +/*! Subtracts a scalar from a \p complex number. * - * \param x The first \p thrust::complex. - * \param y The second \p cuda::std::complex. + * The value type of the \p complex should be compatible with the scalar and + * the type of the returned \p complex is the promoted type of the two arguments. * - * \tparam \c T0 is convertible to \c T1. + * \param x The \p complex (minuend). + * \param y The scalar (subtrahend). */ -template ::value>> -__host__ __device__ bool operator!=(const complex &x, const ::cuda::std::complex &y) -{ - return !(x == y); -} +template +__host__ __device__ +complex::type> +operator-(const complex& x, const T1& y); -/*! Returns true if two \p complex numbers with different underlying type - * are different and false otherwise. +/*! Subtracts a \p complex number from a scalar. * - * \param x The second \p cuda::std::complex. - * \param y The first \p thrust::complex. + * The value type of the \p complex should be compatible with the scalar and + * the type of the returned \p complex is the promoted type of the two arguments. * - * \tparam \c T0 is convertible to \c T1. + * \param x The scalar (minuend). + * \param y The \p complex (subtrahend). */ -template ::value>> -__host__ __device__ bool operator!=(const ::cuda::std::complex &x, const complex &y) -{ - return !(x == y); -} +template +__host__ __device__ +complex::type> +operator-(const T0& x, const complex& y); -/*! Returns true if two \p complex numbers with different underlying type - * are different and false otherwise. +/*! Multiplies two \p complex numbers. * - * \param x The scalar. - * \param y The \p complex. + * The value types of the two \p complex types should be compatible and the + * type of the returned \p complex is the promoted type of the two arguments. * - * \tparam \c T0 is convertible to \c T1. + * \param x The first \p complex. + * \param y The second \p complex. */ -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ bool operator!=(const T0 &x, const complex &y) -{ - return !(x == y); -} +template +__host__ __device__ +complex::type> +operator*(const complex& x, const complex& y); -/*! Returns true if two \p complex numbers with different underlying type - * are different and false otherwise. +/*! Multiplies a \p complex number by a scalar. * * \param x The \p complex. * \param y The scalar. - * - * \tparam \c T0 is convertible to \c T1. */ -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ bool operator!=(const complex &x, const T1 &y) -{ - return !(x == y); -} +template +__host__ __device__ +complex::type> +operator*(const complex& x, const T1& y); -/* --- Add Operator --- */ +/*! Multiplies a scalar by a \p complex number. + * + * The value type of the \p complex should be compatible with the scalar and + * the type of the returned \p complex is the promoted type of the two arguments. + * + * \param x The scalar. + * \param y The \p complex. + */ +template +__host__ __device__ +complex::type> +operator*(const T0& x, const complex& y); -/*! Adds two \p complex numbers. +/*! Divides two \p complex numbers. * * The value types of the two \p complex types should be compatible and the * type of the returned \p complex is the promoted type of the two arguments. * - * \param x The first \p complex. - * \param y The second \p complex. - * - * \tparam \c T0 is convertible to \c T1. + * \param x The numerator (dividend). + * \param y The denomimator (divisor). */ -template ::value>::type> -__host__ __device__ complex::type> -operator+(const complex &x, const complex &y) -{ - typedef typename detail::promoted_numerical_type::type T; - return complex(x.real() + y.real(), x.imag() + y.imag()); -} +template +__host__ __device__ +complex::type> +operator/(const complex& x, const complex& y); -/*! Adds a scalar to a \p complex number. +/*! Divides a \p complex number by a scalar. * * The value type of the \p complex should be compatible with the scalar and * the type of the returned \p complex is the promoted type of the two arguments. * - * \param x The \p complex. - * \param y The scalar. - * - * \tparam \c T0 is convertible to \c T1. + * \param x The complex numerator (dividend). + * \param y The scalar denomimator (divisor). */ -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ complex::type> -operator+(const complex &x, const T1 &y) -{ - typedef typename detail::promoted_numerical_type::type T; - return complex(x.real() + y, T(x.imag())); -} +template +__host__ __device__ +complex::type> +operator/(const complex& x, const T1& y); -/*! Adds a \p complex number to a scalar. +/*! Divides a scalar by a \p complex number. * * The value type of the \p complex should be compatible with the scalar and * the type of the returned \p complex is the promoted type of the two arguments. * - * \param x The scalar. - * \param y The \p complex. + * \param x The scalar numerator (dividend). + * \param y The complex denomimator (divisor). + */ +template +__host__ __device__ +complex::type> +operator/(const T0& x, const complex& y); + + + +/* --- Unary Arithmetic operators --- */ + +/*! Unary plus, returns its \p complex argument. * - * \tparam \c T0 is convertible to \c T1. + * \param y The \p complex argument. */ -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ complex::type> -operator+(const T0 &x, const complex &y) -{ - typedef typename detail::promoted_numerical_type::type T; - return complex(x + y.real(), T(y.imag())); -} +template +__host__ __device__ +complex +operator+(const complex& y); -/* --- Substraction Operator --- */ +/*! Unary minus, returns the additive inverse (negation) of its \p complex + * argument. + * + * \param y The \p complex argument. + */ +template +__host__ __device__ +complex +operator-(const complex& y); -/*! Subtracts two \p complex numbers. + + +/* --- Exponential Functions --- */ + +/*! Returns the complex exponential of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex exp(const complex& z); + +/*! Returns the complex natural logarithm of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex log(const complex& z); + +/*! Returns the complex base 10 logarithm of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex log10(const complex& z); + + + +/* --- Power Functions --- */ + +/*! Returns a \p complex number raised to another. * * The value types of the two \p complex types should be compatible and the * type of the returned \p complex is the promoted type of the two arguments. * - * \param x The first \p complex (minuend). - * \param y The second \p complex (subtrahend). - * - * \tparam \c T0 is convertible to \c T1. + * \param x The base. + * \param y The exponent. */ -template ::value>::type> -__host__ __device__ complex::type> -operator-(const complex &x, const complex &y) -{ - typedef typename detail::promoted_numerical_type::type T; - return complex(x.real() - y.real(), x.imag() - y.imag()); -} +template +__host__ __device__ +complex::type> +pow(const complex& x, const complex& y); -/*! Subtracts a scalar from a \p complex number. +/*! Returns a \p complex number raised to a scalar. * * The value type of the \p complex should be compatible with the scalar and * the type of the returned \p complex is the promoted type of the two arguments. * - * \param x The \p complex (minuend). - * \param y The scalar (subtrahend). - * - * \tparam \c T0 is convertible to \c T1. + * \param x The base. + * \param y The exponent. */ -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ complex::type> -operator-(const complex &x, const T1 &y) -{ - typedef typename detail::promoted_numerical_type::type T; - return complex(x.real() - y, T(x.imag())); -} +template +__host__ __device__ +complex::type> +pow(const complex& x, const T1& y); -/*! Subtracts a \p complex number from a scalar. +/*! Returns a scalar raised to a \p complex number. * * The value type of the \p complex should be compatible with the scalar and * the type of the returned \p complex is the promoted type of the two arguments. * - * \param x The scalar (minuend). - * \param y The \p complex (subtrahend). + * \param x The base. + * \param y The exponent. + */ +template +__host__ __device__ +complex::type> +pow(const T0& x, const complex& y); + +/*! Returns the complex square root of a \p complex number. * - * \tparam \c T0 is convertible to \c T1. + * \param z The \p complex argument. */ -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ complex::type> -operator-(const T0 &x, const complex &y) -{ - typedef typename detail::promoted_numerical_type::type T; - return complex(x - y.real(), -T(y.imag())); -} +template +__host__ __device__ +complex sqrt(const complex& z); -/* --- Multiplication Operator --- */ -template ::value>::type> -__host__ __device__ complex::type> -operator*(const complex &x, const complex &y) -{ - typedef typename detail::promoted_numerical_type::type T; - // fall back to std implementation of multiplication - return ::cuda::std::complex(x) * ::cuda::std::complex(y); -} - -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ complex::type> -operator*(const complex &x, const T1 &y) -{ - typedef typename detail::promoted_numerical_type::type T; - // fall back to std implementation of multiplication - return ::cuda::std::complex(x) * ::cuda::std::complex(y); -} - -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ complex::type> -operator*(const T0 &x, const complex &y) -{ - typedef typename detail::promoted_numerical_type::type T; - // fall back to std implementation of multiplication - return ::cuda::std::complex(x) * ::cuda::std::complex(y); -} - -/* --- Division Operator --- */ - -template ::value>::type> -__host__ __device__ complex::type> -operator/(const complex &x, const complex &y) -{ - typedef typename detail::promoted_numerical_type::type T; - // fall back to std implementation of division - return ::cuda::std::complex(x) / ::cuda::std::complex(y); -} - -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ complex::type> -operator/(const complex &x, const T1 &y) -{ - typedef typename detail::promoted_numerical_type::type T; - // fall back to std implementation of division - return ::cuda::std::complex(x) / ::cuda::std::complex(y); -} - -template ::value>::type, - typename = typename detail::enable_if::value>::type> -__host__ __device__ complex::type> -operator/(const T0 &x, const complex &y) -{ - typedef typename detail::promoted_numerical_type::type T; - // fall back to std implementation of division - return ::cuda::std::complex(x) / ::cuda::std::complex(y); -} - -// The using declarations allows imports all necessary functions for thurst::complex. -// However, they also lead to thrust::abs(1.0F) being valid code after include . -// We are importing those for the plain value taking overloads and specialize for those taking -// or returning a `thrust::complex` below -using ::cuda::std::abs; -using ::cuda::std::arg; -using ::cuda::std::conj; -using ::cuda::std::norm; -// polar only takes a T but returns a complex so we cannot pull that one in. -// using ::cuda::std::polar; -using ::cuda::std::proj; - -using ::cuda::std::exp; -using ::cuda::std::log; -using ::cuda::std::log10; -// pow always returns a complex. -// using ::cuda::std::pow; -using ::cuda::std::sqrt; - -using ::cuda::std::acos; -using ::cuda::std::acosh; -using ::cuda::std::asin; -using ::cuda::std::asinh; -using ::cuda::std::atan; -using ::cuda::std::atanh; -using ::cuda::std::cos; -using ::cuda::std::cosh; -using ::cuda::std::sin; -using ::cuda::std::sinh; -using ::cuda::std::tan; -using ::cuda::std::tanh; - -// Those functions return `cuda::std::complex` so we must provide an explicit overload that returns `thrust::complex` -template -__host__ __device__ complex conj(const complex& c) { - return static_cast>(::cuda::std::conj(c)); -} -template -__host__ __device__ complex polar(const T& rho, const T& theta = T{}) { - return static_cast>(::cuda::std::polar(rho, theta)); -} -template -__host__ __device__ complex proj(const complex& c) { - return static_cast>(::cuda::std::proj(c)); -} - -template -__host__ __device__ complex exp(const complex& c) { - return static_cast>(::cuda::std::exp(c)); -} -template -__host__ __device__ complex log(const complex& c) { - return static_cast>(::cuda::std::log(c)); -} -template -__host__ __device__ complex log10(const complex& c) { - return static_cast>(::cuda::std::log10(c)); -} -template -__host__ __device__ complex::type> -pow(const complex& x, const complex& y) { - return static_cast::type>>(::cuda::std::pow(x, y)); -} -template::value, int> = 0> -__host__ __device__ complex::type> -pow(const complex& x, const T1& y) { - return static_cast::type>>(::cuda::std::pow(x, y)); -} -template::value, int> = 0> -__host__ __device__ complex::type> -pow(const T0& x, const complex& y) { - return static_cast::type>>(::cuda::std::pow(x, y)); -} -template -__host__ __device__ complex sqrt(const complex& c) { - return static_cast>(::cuda::std::sqrt(c)); -} -template -__host__ __device__ complex acos(const complex& c) { - return static_cast>(::cuda::std::acos(c)); -} -template -__host__ __device__ complex acosh(const complex& c) { - return static_cast>(::cuda::std::acosh(c)); -} -template -__host__ __device__ complex asin(const complex& c) { - return static_cast>(::cuda::std::asin(c)); -} -template -__host__ __device__ complex asinh(const complex& c) { - return static_cast>(::cuda::std::asinh(c)); -} -template -__host__ __device__ complex atan(const complex& c) { - return static_cast>(::cuda::std::atan(c)); -} -template -__host__ __device__ complex atanh(const complex& c) { - return static_cast>(::cuda::std::atanh(c)); -} -template -__host__ __device__ complex cos(const complex& c) { - return static_cast>(::cuda::std::cos(c)); -} -template -__host__ __device__ complex cosh(const complex& c) { - return static_cast>(::cuda::std::cosh(c)); -} -template -__host__ __device__ complex sin(const complex& c) { - return static_cast>(::cuda::std::sin(c)); -} -template -__host__ __device__ complex sinh(const complex& c) { - return static_cast>(::cuda::std::sinh(c)); -} -template -__host__ __device__ complex tan(const complex& c) { - return static_cast>(::cuda::std::tan(c)); -} -template -__host__ __device__ complex tanh(const complex& c) { - return static_cast>(::cuda::std::tanh(c)); -} +/* --- Trigonometric Functions --- */ + +/*! Returns the complex cosine of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex cos(const complex& z); + +/*! Returns the complex sine of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex sin(const complex& z); + +/*! Returns the complex tangent of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex tan(const complex& z); + + + +/* --- Hyperbolic Functions --- */ + +/*! Returns the complex hyperbolic cosine of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex cosh(const complex& z); + +/*! Returns the complex hyperbolic sine of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex sinh(const complex& z); + +/*! Returns the complex hyperbolic tangent of a \p complex number. + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex tanh(const complex& z); + + +/* --- Inverse Trigonometric Functions --- */ + +/*! Returns the complex arc cosine of a \p complex number. + * + * The range of the real part of the result is [0, Pi] and + * the range of the imaginary part is [-inf, +inf] + * + * \param z The \p complex argument. + */ template -struct proclaim_trivially_relocatable> : thrust::true_type -{}; +__host__ __device__ +complex acos(const complex& z); -/*! Is of `true_type` when \c T is either a thrust::complex, std::complex, or cuda::std::complex. +/*! Returns the complex arc sine of a \p complex number. + * + * The range of the real part of the result is [-Pi/2, Pi/2] and + * the range of the imaginary part is [-inf, +inf] + * + * \param z The \p complex argument. */ template -struct is_complex : public thrust::false_type -{}; +__host__ __device__ +complex asin(const complex& z); + +/*! Returns the complex arc tangent of a \p complex number. + * + * The range of the real part of the result is [-Pi/2, Pi/2] and + * the range of the imaginary part is [-inf, +inf] + * + * \param z The \p complex argument. + */ template -struct is_complex> : public thrust::true_type -{}; +__host__ __device__ +complex atan(const complex& z); + + + +/* --- Inverse Hyperbolic Functions --- */ + +/*! Returns the complex inverse hyperbolic cosine of a \p complex number. + * + * The range of the real part of the result is [0, +inf] and + * the range of the imaginary part is [-Pi, Pi] + * + * \param z The \p complex argument. + */ +template +__host__ __device__ +complex acosh(const complex& z); + +/*! Returns the complex inverse hyperbolic sine of a \p complex number. + * + * The range of the real part of the result is [-inf, +inf] and + * the range of the imaginary part is [-Pi/2, Pi/2] + * + * \param z The \p complex argument. + */ template -struct is_complex<::cuda::std::complex> : public thrust::true_type -{}; +__host__ __device__ +complex asinh(const complex& z); + +/*! Returns the complex inverse hyperbolic tangent of a \p complex number. + * + * The range of the real part of the result is [-inf, +inf] and + * the range of the imaginary part is [-Pi/2, Pi/2] + * + * \param z The \p complex argument. + */ template -struct is_complex<::std::complex> : public thrust::true_type -{}; +__host__ __device__ +complex atanh(const complex& z); + + + +/* --- Stream Operators --- */ + +/*! Writes to an output stream a \p complex number in the form (real, imaginary). + * + * \param os The output stream. + * \param z The \p complex number to output. + */ +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const complex& z); + +/*! Reads a \p complex number from an input stream. + * + * The recognized formats are: + * - real + * - (real) + * - (real, imaginary) + * + * The values read must be convertible to the \p complex's \c value_type + * + * \param is The input stream. + * \param z The \p complex number to set. + */ +template +__host__ +std::basic_istream& +operator>>(std::basic_istream& is, complex& z); + + + +/* --- Equality Operators --- */ + +/*! Returns true if two \p complex numbers are equal and false otherwise. + * + * \param x The first \p complex. + * \param y The second \p complex. + */ +template +__host__ __device__ +bool operator==(const complex& x, const complex& y); + +/*! Returns true if two \p complex numbers are equal and false otherwise. + * + * \param x The first \p complex. + * \param y The second \p complex. + */ +template +__host__ THRUST_STD_COMPLEX_DEVICE +bool operator==(const complex& x, const std::complex& y); + +/*! Returns true if two \p complex numbers are equal and false otherwise. + * + * \param x The first \p complex. + * \param y The second \p complex. + */ +template +__host__ THRUST_STD_COMPLEX_DEVICE +bool operator==(const std::complex& x, const complex& y); + +/*! Returns true if the imaginary part of the \p complex number is zero and + * the real part is equal to the scalar. Returns false otherwise. + * + * \param x The scalar. + * \param y The \p complex. + */ +template +__host__ __device__ +bool operator==(const T0& x, const complex& y); + +/*! Returns true if the imaginary part of the \p complex number is zero and + * the real part is equal to the scalar. Returns false otherwise. + * + * \param x The \p complex. + * \param y The scalar. + */ +template +__host__ __device__ +bool operator==(const complex& x, const T1& y); + +/*! Returns true if two \p complex numbers are different and false otherwise. + * + * \param x The first \p complex. + * \param y The second \p complex. + */ +template +__host__ __device__ +bool operator!=(const complex& x, const complex& y); + +/*! Returns true if two \p complex numbers are different and false otherwise. + * + * \param x The first \p complex. + * \param y The second \p complex. + */ +template +__host__ THRUST_STD_COMPLEX_DEVICE +bool operator!=(const complex& x, const std::complex& y); + +/*! Returns true if two \p complex numbers are different and false otherwise. + * + * \param x The first \p complex. + * \param y The second \p complex. + */ +template +__host__ THRUST_STD_COMPLEX_DEVICE +bool operator!=(const std::complex& x, const complex& y); + +/*! Returns true if the imaginary part of the \p complex number is not zero or + * the real part is different from the scalar. Returns false otherwise. + * + * \param x The scalar. + * \param y The \p complex. + */ +template +__host__ __device__ +bool operator!=(const T0& x, const complex& y); + +/*! Returns true if the imaginary part of the \p complex number is not zero or + * the real part is different from the scalar. Returns false otherwise. + * + * \param x The \p complex. + * \param y The scalar. + */ +template +__host__ __device__ +bool operator!=(const complex& x, const T1& y); THRUST_NAMESPACE_END +#include + +#undef THRUST_STD_COMPLEX_REAL +#undef THRUST_STD_COMPLEX_IMAG +#undef THRUST_STD_COMPLEX_DEVICE + /*! \} // complex_numbers */ /*! \} // numerics */ + diff --git a/thrust/thrust/detail/complex/arithmetic.h b/thrust/thrust/detail/complex/arithmetic.h new file mode 100644 index 00000000000..f82d4040cdd --- /dev/null +++ b/thrust/thrust/detail/complex/arithmetic.h @@ -0,0 +1,311 @@ +/* + * Copyright 2008-2021 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include + +THRUST_NAMESPACE_BEGIN + + /* --- Binary Arithmetic Operators --- */ + +template +__host__ __device__ +complex::type> +operator+(const complex& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x.real() + y.real(), x.imag() + y.imag()); +} + +template +__host__ __device__ +complex::type> +operator+(const complex& x, const T1& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x.real() + y, x.imag()); +} + +template +__host__ __device__ +complex::type> +operator+(const T0& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x + y.real(), y.imag()); +} + + +template +__host__ __device__ +complex::type> +operator-(const complex& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x.real() - y.real(), x.imag() - y.imag()); +} + +template +__host__ __device__ +complex::type> +operator-(const complex& x, const T1& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x.real() - y, x.imag()); +} + +template +__host__ __device__ +complex::type> +operator-(const T0& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x - y.real(), -y.imag()); +} + + +template +__host__ __device__ +complex::type> +operator*(const complex& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex( x.real() * y.real() - x.imag() * y.imag() + , x.real() * y.imag() + x.imag() * y.real()); +} + +template +__host__ __device__ +complex::type> +operator*(const complex& x, const T1& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x.real() * y, x.imag() * y); +} + +template +__host__ __device__ +complex::type> +operator*(const T0& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x * y.real(), x * y.imag()); +} + + +template +__host__ __device__ +complex::type> +operator/(const complex& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + + // Find `abs` by ADL. + using std::abs; + + T s = abs(y.real()) + abs(y.imag()); + + T oos = T(1.0) / s; + + T ars = x.real() * oos; + T ais = x.imag() * oos; + T brs = y.real() * oos; + T bis = y.imag() * oos; + + s = (brs * brs) + (bis * bis); + + oos = T(1.0) / s; + + complex quot( ((ars * brs) + (ais * bis)) * oos + , ((ais * brs) - (ars * bis)) * oos); + return quot; +} + +template +__host__ __device__ +complex::type> +operator/(const complex& x, const T1& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x.real() / y, x.imag() / y); +} + +template +__host__ __device__ +complex::type> +operator/(const T0& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return complex(x) / y; +} + + + +/* --- Unary Arithmetic Operators --- */ + +template +__host__ __device__ +complex operator+(const complex& y) +{ + return y; +} + +template +__host__ __device__ +complex operator-(const complex& y) +{ + return y * -T(1); +} + + +/* --- Other Basic Arithmetic Functions --- */ + +// As std::hypot is only C++11 we have to use the C interface +template +__host__ __device__ +T abs(const complex& z) +{ + return hypot(z.real(), z.imag()); +} + +// XXX Why are we specializing here? +namespace detail { +namespace complex { + +__host__ __device__ +inline float abs(const thrust::complex& z) +{ + return hypotf(z.real(),z.imag()); +} + +__host__ __device__ +inline double abs(const thrust::complex& z) +{ + return hypot(z.real(),z.imag()); +} + +} // end namespace complex +} // end namespace detail + +template <> +__host__ __device__ +inline float abs(const complex& z) +{ + return detail::complex::abs(z); +} + +template <> +__host__ __device__ +inline double abs(const complex& z) +{ + return detail::complex::abs(z); +} + + +template +__host__ __device__ +T arg(const complex& z) +{ + // Find `atan2` by ADL. + using std::atan2; + return atan2(z.imag(), z.real()); +} + + +template +__host__ __device__ +complex conj(const complex& z) +{ + return complex(z.real(), -z.imag()); +} + + +template +__host__ __device__ +T norm(const complex& z) +{ + return z.real() * z.real() + z.imag() * z.imag(); +} + +// XXX Why specialize these, we could just rely on ADL. +template <> +__host__ __device__ +inline float norm(const complex& z) +{ + // Find `abs` and `sqrt` by ADL. + using std::abs; + using std::sqrt; + + if (abs(z.real()) < sqrt(FLT_MIN) && abs(z.imag()) < sqrt(FLT_MIN)) + { + float a = z.real() * 4.0f; + float b = z.imag() * 4.0f; + return (a * a + b * b) / 16.0f; + } + + return z.real() * z.real() + z.imag() * z.imag(); +} + +template <> +__host__ __device__ +inline double norm(const complex& z) +{ + // Find `abs` and `sqrt` by ADL. + using std::abs; + using std::sqrt; + + if (abs(z.real()) < sqrt(DBL_MIN) && abs(z.imag()) < sqrt(DBL_MIN)) + { + double a = z.real() * 4.0; + double b = z.imag() * 4.0; + return (a * a + b * b) / 16.0; + } + + return z.real() * z.real() + z.imag() * z.imag(); +} + + +template +__host__ __device__ +complex::type> +polar(const T0& m, const T1& theta) +{ + typedef typename detail::promoted_numerical_type::type T; + + // Find `cos` and `sin` by ADL. + using std::cos; + using std::sin; + + return complex(m * cos(theta), m * sin(theta)); +} + +THRUST_NAMESPACE_END + diff --git a/thrust/thrust/detail/complex/c99math.h b/thrust/thrust/detail/complex/c99math.h new file mode 100644 index 00000000000..16233e1b2f6 --- /dev/null +++ b/thrust/thrust/detail/complex/c99math.h @@ -0,0 +1,205 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail +{ +namespace complex +{ + +// Define basic arithmetic functions so we can use them without explicit scope +// keeping the code as close as possible to FreeBSDs for ease of maintenance. +// It also provides an easy way to support compilers with missing C99 functions. +// When possible, just use the names in the global scope. +// Some platforms define these as macros, others as free functions. +// Avoid using the std:: form of these as nvcc may treat std::foo() as __host__ functions. + +using ::log; +using ::acos; +using ::asin; +using ::sqrt; +using ::sinh; +using ::tan; +using ::cos; +using ::sin; +using ::exp; +using ::cosh; +using ::atan; + +template +inline __host__ __device__ T infinity(); + +template <> +inline __host__ __device__ float infinity() +{ + float res; + set_float_word(res, 0x7f800000); + return res; +} + + +template <> +inline __host__ __device__ double infinity() +{ + double res; + insert_words(res, 0x7ff00000,0); + return res; +} + +#if defined _MSC_VER +__host__ __device__ inline int isinf(float x){ + return std::abs(x) == infinity(); +} + +__host__ __device__ inline int isinf(double x){ + return std::abs(x) == infinity(); +} + +__host__ __device__ inline int isnan(float x){ + return x != x; +} + +__host__ __device__ inline int isnan(double x){ + return x != x; +} + +__host__ __device__ inline int signbit(float x){ + return ((*((uint32_t *)&x)) & 0x80000000) != 0 ? 1 : 0; +} + +__host__ __device__ inline int signbit(double x){ + return ((*((uint64_t *)&x)) & 0x8000000000000000) != 0ull ? 1 : 0; +} + +__host__ __device__ inline int isfinite(float x){ + return !isnan(x) && !isinf(x); +} + +__host__ __device__ inline int isfinite(double x){ + return !isnan(x) && !isinf(x); +} + +#else + +# if defined(__CUDACC__) && !(defined(__CUDA__) && defined(__clang__)) && !defined(_NVHPC_CUDA) +// NVCC implements at least some signature of these as functions not macros. +using ::isinf; +using ::isnan; +using ::signbit; +using ::isfinite; +# else +// Some compilers do not provide these in the global scope, because they are +// supposed to be macros. The versions in `std` are supposed to be functions. +// Since we're not compiling with nvcc, it's safe to use the functions in std:: +using std::isinf; +using std::isnan; +using std::signbit; +using std::isfinite; +# endif // __CUDACC__ +#endif // _MSC_VER + +using ::atanh; + +#if defined _MSC_VER + +__host__ __device__ inline double copysign(double x, double y){ + uint32_t hx,hy; + get_high_word(hx,x); + get_high_word(hy,y); + set_high_word(x,(hx&0x7fffffff)|(hy&0x80000000)); + return x; +} + +__host__ __device__ inline float copysignf(float x, float y){ + uint32_t ix,iy; + get_float_word(ix,x); + get_float_word(iy,y); + set_float_word(x,(ix&0x7fffffff)|(iy&0x80000000)); + return x; +} + + + +#if !defined(__CUDACC__) && !defined(_NVHPC_CUDA) + +// Simple approximation to log1p as Visual Studio is lacking one +inline double log1p(double x){ + double u = 1.0+x; + if(u == 1.0){ + return x; + }else{ + if(u > 2.0){ + // Use normal log for large arguments + return log(u); + }else{ + return log(u)*(x/(u-1.0)); + } + } +} + +inline float log1pf(float x){ + float u = 1.0f+x; + if(u == 1.0f){ + return x; + }else{ + if(u > 2.0f){ + // Use normal log for large arguments + return logf(u); + }else{ + return logf(u)*(x/(u-1.0f)); + } + } +} + +#if _MSV_VER <= 1500 +#include + +inline float hypotf(float x, float y){ + return abs(std::complex(x,y)); +} + +inline double hypot(double x, double y){ + return _hypot(x,y); +} + +#endif // _MSC_VER <= 1500 + +#endif // __CUDACC__ + +#endif // _MSC_VER + +} // namespace complex + +} // namespace detail + +THRUST_NAMESPACE_END + diff --git a/thrust/thrust/detail/complex/catrig.h b/thrust/thrust/detail/complex/catrig.h new file mode 100644 index 00000000000..b841ddb266d --- /dev/null +++ b/thrust/thrust/detail/complex/catrig.h @@ -0,0 +1,794 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2012 Stephen Montgomery-Smith + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Adapted from FreeBSD by Filipe Maia : + * freebsd/lib/msun/src/catrig.c + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ +inline void raise_inexact(){ + const volatile float tiny = 7.888609052210118054117286e-31; /* 0x1p-100; */ + // needs the volatile to prevent compiler from ignoring it + volatile float junk = 1 + tiny; + (void)junk; +} + +__host__ __device__ inline complex clog_for_large_values(complex z); + +/* + * Testing indicates that all these functions are accurate up to 4 ULP. + * The functions casin(h) and cacos(h) are about 2.5 times slower than asinh. + * The functions catan(h) are a little under 2 times slower than atanh. + * + * The code for casinh, casin, cacos, and cacosh comes first. The code is + * rather complicated, and the four functions are highly interdependent. + * + * The code for catanh and catan comes at the end. It is much simpler than + * the other functions, and the code for these can be disconnected from the + * rest of the code. + */ + +/* + * ================================ + * | casinh, casin, cacos, cacosh | + * ================================ + */ + +/* + * The algorithm is very close to that in "Implementing the complex arcsine + * and arccosine functions using exception handling" by T. E. Hull, Thomas F. + * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on + * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335, + * http://dl.acm.org/citation.cfm?id=275324. + * + * Throughout we use the convention z = x + I*y. + * + * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B) + * where + * A = (|z+I| + |z-I|) / 2 + * B = (|z+I| - |z-I|) / 2 = y/A + * + * These formulas become numerically unstable: + * (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that + * is, Re(casinh(z)) is close to 0); + * (b) for Im(casinh(z)) when z is close to either of the intervals + * [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is + * close to PI/2). + * + * These numerical problems are overcome by defining + * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2 + * Then if A < A_crossover, we use + * log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1))) + * A-1 = f(x, 1+y) + f(x, 1-y) + * and if B > B_crossover, we use + * asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y))) + * A-y = f(x, y+1) + f(x, y-1) + * where without loss of generality we have assumed that x and y are + * non-negative. + * + * Much of the difficulty comes because the intermediate computations may + * produce overflows or underflows. This is dealt with in the paper by Hull + * et al by using exception handling. We do this by detecting when + * computations risk underflow or overflow. The hardest part is handling the + * underflows when computing f(a, b). + * + * Note that the function f(a, b) does not appear explicitly in the paper by + * Hull et al, but the idea may be found on pages 308 and 309. Introducing the + * function f(a, b) allows us to concentrate many of the clever tricks in this + * paper into one function. + */ + +/* + * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2. + * Pass hypot(a, b) as the third argument. + */ +__host__ __device__ +inline double +f(double a, double b, double hypot_a_b) +{ + if (b < 0) + return ((hypot_a_b - b) / 2); + if (b == 0) + return (a / 2); + return (a * a / (hypot_a_b + b) / 2); +} + +/* + * All the hard work is contained in this function. + * x and y are assumed positive or zero, and less than RECIP_EPSILON. + * Upon return: + * rx = Re(casinh(z)) = -Im(cacos(y + I*x)). + * B_is_usable is set to 1 if the value of B is usable. + * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y. + * If returning sqrt_A2my2 has potential to result in an underflow, it is + * rescaled, and new_y is similarly rescaled. + */ +__host__ __device__ +inline void +do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B, + double *sqrt_A2my2, double *new_y) +{ + double R, S, A; /* A, B, R, and S are as in Hull et al. */ + double Am1, Amy; /* A-1, A-y. */ + const double A_crossover = 10; /* Hull et al suggest 1.5, but 10 works better */ + const double FOUR_SQRT_MIN = 5.966672584960165394632772e-154; /* =0x1p-509; >= 4 * sqrt(DBL_MIN) */ + const double B_crossover = 0.6417; /* suggested by Hull et al */ + + R = hypot(x, y + 1); /* |z+I| */ + S = hypot(x, y - 1); /* |z-I| */ + + /* A = (|z+I| + |z-I|) / 2 */ + A = (R + S) / 2; + /* + * Mathematically A >= 1. There is a small chance that this will not + * be so because of rounding errors. So we will make certain it is + * so. + */ + if (A < 1) + A = 1; + + if (A < A_crossover) { + /* + * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y). + * rx = log1p(Am1 + sqrt(Am1*(A+1))) + */ + if (y == 1 && x < DBL_EPSILON * DBL_EPSILON / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *rx = sqrt(x); + } else if (x >= DBL_EPSILON * fabs(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN + */ + Am1 = f(x, 1 + y, R) + f(x, 1 - y, S); + *rx = log1p(Am1 + sqrt(Am1 * (A + 1))); + } else if (y < 1) { + /* + * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and + * A = 1 (inexactly). + */ + *rx = x / sqrt((1 - y) * (1 + y)); + } else { /* if (y > 1) */ + /* + * A-1 = y-1 (inexactly). + */ + *rx = log1p((y - 1) + sqrt((y - 1) * (y + 1))); + } + } else { + *rx = log(A + sqrt(A * A - 1)); + } + + *new_y = y; + + if (y < FOUR_SQRT_MIN) { + /* + * Avoid a possible underflow caused by y/A. For casinh this + * would be legitimate, but will be picked up by invoking atan2 + * later on. For cacos this would not be legitimate. + */ + *B_is_usable = 0; + *sqrt_A2my2 = A * (2 / DBL_EPSILON); + *new_y = y * (2 / DBL_EPSILON); + return; + } + + /* B = (|z+I| - |z-I|) / 2 = y/A */ + *B = y / A; + *B_is_usable = 1; + + if (*B > B_crossover) { + *B_is_usable = 0; + /* + * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1). + * sqrt_A2my2 = sqrt(Amy*(A+y)) + */ + if (y == 1 && x < DBL_EPSILON / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *sqrt_A2my2 = sqrt(x) * sqrt((A + y) / 2); + } else if (x >= DBL_EPSILON * fabs(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN + * and + * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN + */ + Amy = f(x, y + 1, R) + f(x, y - 1, S); + *sqrt_A2my2 = sqrt(Amy * (A + y)); + } else if (y > 1) { + /* + * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and + * A = y (inexactly). + * + * y < RECIP_EPSILON. So the following + * scaling should avoid any underflow problems. + */ + *sqrt_A2my2 = x * (4 / DBL_EPSILON / DBL_EPSILON) * y / + sqrt((y + 1) * (y - 1)); + *new_y = y * (4 / DBL_EPSILON / DBL_EPSILON); + } else { /* if (y < 1) */ + /* + * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and + * A = 1 (inexactly). + */ + *sqrt_A2my2 = sqrt((1 - y) * (1 + y)); + } + } +} + +/* + * casinh(z) = z + O(z^3) as z -> 0 + * + * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity + * The above formula works for the imaginary part as well, because + * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3) + * as z -> infinity, uniformly in y + */ +__host__ __device__ inline +complex casinh(complex z) +{ + double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y; + int B_is_usable; + complex w; + const double RECIP_EPSILON = 1.0 / DBL_EPSILON; + const double m_ln2 = 6.9314718055994531e-1; /* 0x162e42fefa39ef.0p-53 */ + x = z.real(); + y = z.imag(); + ax = fabs(x); + ay = fabs(y); + + if (isnan(x) || isnan(y)) { + /* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */ + if (isinf(x)) + return (complex(x, y + y)); + /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */ + if (isinf(y)) + return (complex(y, x + x)); + /* casinh(NaN + I*0) = NaN + I*0 */ + if (y == 0) + return (complex(x + x, y)); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (complex(x + 0.0 + (y + 0.0), x + 0.0 + (y + 0.0))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + if (signbit(x) == 0) + w = clog_for_large_values(z) + m_ln2; + else + w = clog_for_large_values(-z) + m_ln2; + return (complex(copysign(w.real(), x), copysign(w.imag(), y))); + } + + /* Avoid spuriously raising inexact for z = 0. */ + if (x == 0 && y == 0) + return (z); + + /* All remaining cases are inexact. */ + raise_inexact(); + + const double SQRT_6_EPSILON = 3.6500241499888571e-8; /* 0x13988e1409212e.0p-77 */ + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (z); + + do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); + if (B_is_usable) + ry = asin(B); + else + ry = atan2(new_y, sqrt_A2my2); + return (complex(copysign(rx, x), copysign(ry, y))); +} + +/* + * casin(z) = reverse(casinh(reverse(z))) + * where reverse(x + I*y) = y + I*x = I*conj(z). + */ +__host__ __device__ inline +complex casin(complex z) +{ + complex w = casinh(complex(z.imag(), z.real())); + + return (complex(w.imag(), w.real())); +} + +/* + * cacos(z) = PI/2 - casin(z) + * but do the computation carefully so cacos(z) is accurate when z is + * close to 1. + * + * cacos(z) = PI/2 - z + O(z^3) as z -> 0 + * + * cacos(z) = -sign(y)*I*clog(z) + O(1/z^2) as z -> infinity + * The above formula works for the real part as well, because + * Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3) + * as z -> infinity, uniformly in y + */ +__host__ __device__ inline +complex cacos(complex z) +{ + double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x; + int sx, sy; + int B_is_usable; + complex w; + const double pio2_hi = 1.5707963267948966e0; /* 0x1921fb54442d18.0p-52 */ + const volatile double pio2_lo = 6.1232339957367659e-17; /* 0x11a62633145c07.0p-106 */ + const double m_ln2 = 6.9314718055994531e-1; /* 0x162e42fefa39ef.0p-53 */ + + x = z.real(); + y = z.imag(); + sx = signbit(x); + sy = signbit(y); + ax = fabs(x); + ay = fabs(y); + + if (isnan(x) || isnan(y)) { + /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */ + if (isinf(x)) + return (complex(y + y, -infinity())); + /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */ + if (isinf(y)) + return (complex(x + x, -y)); + /* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */ + if (x == 0) + return (complex(pio2_hi + pio2_lo, y + y)); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (complex(x + 0.0 + (y + 0), x + 0.0 + (y + 0))); + } + + const double RECIP_EPSILON = 1.0 / DBL_EPSILON; + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + w = clog_for_large_values(z); + rx = fabs(w.imag()); + ry = w.real() + m_ln2; + if (sy == 0) + ry = -ry; + return (complex(rx, ry)); + } + + /* Avoid spuriously raising inexact for z = 1. */ + if (x == 1.0 && y == 0.0) + return (complex(0, -y)); + + /* All remaining cases are inexact. */ + raise_inexact(); + + const double SQRT_6_EPSILON = 3.6500241499888571e-8; /* 0x13988e1409212e.0p-77 */ + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (complex(pio2_hi - (x - pio2_lo), -y)); + + do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); + if (B_is_usable) { + if (sx == 0) + rx = acos(B); + else + rx = acos(-B); + } else { + if (sx == 0) + rx = atan2(sqrt_A2mx2, new_x); + else + rx = atan2(sqrt_A2mx2, -new_x); + } + if (sy == 0) + ry = -ry; + return (complex(rx, ry)); +} + +/* + * cacosh(z) = I*cacos(z) or -I*cacos(z) + * where the sign is chosen so Re(cacosh(z)) >= 0. + */ +__host__ __device__ inline +complex cacosh(complex z) +{ + complex w; + double rx, ry; + + w = cacos(z); + rx = w.real(); + ry = w.imag(); + /* cacosh(NaN + I*NaN) = NaN + I*NaN */ + if (isnan(rx) && isnan(ry)) + return (complex(ry, rx)); + /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */ + /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */ + if (isnan(rx)) + return (complex(fabs(ry), rx)); + /* cacosh(0 + I*NaN) = NaN + I*NaN */ + if (isnan(ry)) + return (complex(ry, ry)); + return (complex(fabs(ry), copysign(rx, z.imag()))); +} + +/* + * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. + */ +__host__ __device__ inline +complex clog_for_large_values(complex z) +{ + double x, y; + double ax, ay, t; + const double m_e = 2.7182818284590452e0; /* 0x15bf0a8b145769.0p-51 */ + + x = z.real(); + y = z.imag(); + ax = fabs(x); + ay = fabs(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + /* + * Avoid overflow in hypot() when x and y are both very large. + * Divide x and y by E, and then add 1 to the logarithm. This depends + * on E being larger than sqrt(2). + * Dividing by E causes an insignificant loss of accuracy; however + * this method is still poor since it is uneccessarily slow. + */ + if (ax > DBL_MAX / 2) + return (complex(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x))); + + /* + * Avoid overflow when x or y is large. Avoid underflow when x or + * y is small. + */ + const double QUARTER_SQRT_MAX = 5.966672584960165394632772e-154; /* = 0x1p509; <= sqrt(DBL_MAX) / 4 */ + const double SQRT_MIN = 1.491668146240041348658193e-154; /* = 0x1p-511; >= sqrt(DBL_MIN) */ + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) + return (complex(log(hypot(x, y)), atan2(y, x))); + + return (complex(log(ax * ax + ay * ay) / 2, atan2(y, x))); +} + +/* + * ================= + * | catanh, catan | + * ================= + */ + +/* + * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow). + * Assumes x*x and y*y will not overflow. + * Assumes x and y are finite. + * Assumes y is non-negative. + * Assumes fabs(x) >= DBL_EPSILON. + */ +__host__ __device__ +inline double sum_squares(double x, double y) +{ + const double SQRT_MIN = 1.491668146240041348658193e-154; /* = 0x1p-511; >= sqrt(DBL_MIN) */ + /* Avoid underflow when y is small. */ + if (y < SQRT_MIN) + return (x * x); + + return (x * x + y * y); +} + +/* + * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y). + * Assumes x and y are not NaN, and one of x and y is larger than + * RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use + * the code creal(1/z), because the imaginary part may produce an unwanted + * underflow. + * This is only called in a context where inexact is always raised before + * the call, so no effort is made to avoid or force inexact. + */ +__host__ __device__ +inline double real_part_reciprocal(double x, double y) +{ + double scale; + uint32_t hx, hy; + int32_t ix, iy; + + /* + * This code is inspired by the C99 document n1124.pdf, Section G.5.1, + * example 2. + */ + get_high_word(hx, x); + ix = hx & 0x7ff00000; + get_high_word(hy, y); + iy = hy & 0x7ff00000; + //#define BIAS (DBL_MAX_EXP - 1) + const int BIAS = DBL_MAX_EXP - 1; + /* XXX more guard digits are useful iff there is extra precision. */ + //#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */ + const int CUTOFF = (DBL_MANT_DIG / 2 + 1); + if (ix - iy >= CUTOFF << 20 || isinf(x)) + return (1 / x); /* +-Inf -> +-0 is special */ + if (iy - ix >= CUTOFF << 20) + return (x / y / y); /* should avoid double div, but hard */ + if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20) + return (x / (x * x + y * y)); + scale = 1; + set_high_word(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */ + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} + + +/* + * catanh(z) = log((1+z)/(1-z)) / 2 + * = log1p(4*x / |z-1|^2) / 4 + * + I * atan2(2*y, (1-x)*(1+x)-y*y) / 2 + * + * catanh(z) = z + O(z^3) as z -> 0 + * + * catanh(z) = 1/z + sign(y)*I*PI/2 + O(1/z^3) as z -> infinity + * The above formula works for the real part as well, because + * Re(catanh(z)) = x/|z|^2 + O(x/z^4) + * as z -> infinity, uniformly in x + */ +#if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC +__host__ __device__ inline +complex catanh(complex z) +{ + double x, y, ax, ay, rx, ry; + const volatile double pio2_lo = 6.1232339957367659e-17; /* 0x11a62633145c07.0p-106 */ + const double pio2_hi = 1.5707963267948966e0;/* 0x1921fb54442d18.0p-52 */ + + + x = z.real(); + y = z.imag(); + ax = fabs(x); + ay = fabs(y); + + /* This helps handle many cases. */ + if (y == 0 && ax <= 1) + return (complex(atanh(x), y)); + + /* To ensure the same accuracy as atan(), and to filter out z = 0. */ + if (x == 0) + return (complex(x, atan(y))); + + if (isnan(x) || isnan(y)) { + /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */ + if (isinf(x)) + return (complex(copysign(0.0, x), y + y)); + /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ + if (isinf(y)) + return (complex(copysign(0.0, x), + copysign(pio2_hi + pio2_lo, y))); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (complex(x + 0.0 + (y + 0), x + 0.0 + (y + 0))); + } + + const double RECIP_EPSILON = 1.0 / DBL_EPSILON; + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) + return (complex(real_part_reciprocal(x, y), + copysign(pio2_hi + pio2_lo, y))); + + const double SQRT_3_EPSILON = 2.5809568279517849e-8; /* 0x1bb67ae8584caa.0p-78 */ + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { + /* + * z = 0 was filtered out above. All other cases must raise + * inexact, but this is the only only that needs to do it + * explicitly. + */ + raise_inexact(); + return (z); + } + + const double m_ln2 = 6.9314718055994531e-1; /* 0x162e42fefa39ef.0p-53 */ + if (ax == 1 && ay < DBL_EPSILON) + rx = (m_ln2 - log(ay)) / 2; + else + rx = log1p(4 * ax / sum_squares(ax - 1, ay)) / 4; + + if (ax == 1) + ry = atan2(2.0, -ay) / 2; + else if (ay < DBL_EPSILON) + ry = atan2(2 * ay, (1 - ax) * (1 + ax)) / 2; + else + ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; + + return (complex(copysign(rx, x), copysign(ry, y))); +} + +/* + * catan(z) = reverse(catanh(reverse(z))) + * where reverse(x + I*y) = y + I*x = I*conj(z). + */ +__host__ __device__ inline +complexcatan(complex z) +{ + complex w = catanh(complex(z.imag(), z.real())); + return (complex(w.imag(), w.real())); +} + +#endif + +} // namespace complex + +} // namespace detail + + +template +__host__ __device__ +inline complex acos(const complex& z){ + const complex ret = thrust::asin(z); + const ValueType pi = ValueType(3.14159265358979323846); + return complex(pi/2 - ret.real(),-ret.imag()); +} + + +template +__host__ __device__ +inline complex asin(const complex& z){ + const complex i(0,1); + return -i*asinh(i*z); +} + +template +__host__ __device__ +inline complex atan(const complex& z){ + const complex i(0,1); + return -i*thrust::atanh(i*z); +} + + +template +__host__ __device__ +inline complex acosh(const complex& z){ + thrust::complex ret((z.real() - z.imag()) * (z.real() + z.imag()) - ValueType(1.0), + ValueType(2.0) * z.real() * z.imag()); + ret = thrust::sqrt(ret); + if (z.real() < ValueType(0.0)){ + ret = -ret; + } + ret += z; + ret = thrust::log(ret); + if (ret.real() < ValueType(0.0)){ + ret = -ret; + } + return ret; +} + +template +__host__ __device__ +inline complex asinh(const complex& z){ + return thrust::log(thrust::sqrt(z*z+ValueType(1))+z); +} + +template +__host__ __device__ +inline complex atanh(const complex& z){ + ValueType imag2 = z.imag() * z.imag(); + ValueType n = ValueType(1.0) + z.real(); + n = imag2 + n * n; + + ValueType d = ValueType(1.0) - z.real(); + d = imag2 + d * d; + complex ret(ValueType(0.25) * (std::log(n) - std::log(d)),0); + + d = ValueType(1.0) - z.real() * z.real() - imag2; + + ret.imag(ValueType(0.5) * std::atan2(ValueType(2.0) * z.imag(), d)); + return ret; +} + +template <> +__host__ __device__ +inline complex acos(const complex& z){ + return detail::complex::cacos(z); +} + +template <> +__host__ __device__ +inline complex asin(const complex& z){ + return detail::complex::casin(z); +} + +#if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC +template <> +__host__ __device__ +inline complex atan(const complex& z){ + return detail::complex::catan(z); +} +#endif + +template <> +__host__ __device__ +inline complex acosh(const complex& z){ + return detail::complex::cacosh(z); +} + + +template <> +__host__ __device__ +inline complex asinh(const complex& z){ + return detail::complex::casinh(z); +} + +#if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC +template <> +__host__ __device__ +inline complex atanh(const complex& z){ + return detail::complex::catanh(z); +} +#endif + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/catrigf.h b/thrust/thrust/detail/complex/catrigf.h new file mode 100644 index 00000000000..1e4166bb0e4 --- /dev/null +++ b/thrust/thrust/detail/complex/catrigf.h @@ -0,0 +1,509 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2012 Stephen Montgomery-Smith + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Adapted from FreeBSD by Filipe Maia : + * freebsd/lib/msun/src/catrig.c + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ inline + complex clog_for_large_values(complex z); + +/* + * The algorithm is very close to that in "Implementing the complex arcsine + * and arccosine functions using exception handling" by T. E. Hull, Thomas F. + * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on + * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335, + * http://dl.acm.org/citation.cfm?id=275324. + * + * See catrig.c for complete comments. + * + * XXX comments were removed automatically, and even short ones on the right + * of statements were removed (all of them), contrary to normal style. Only + * a few comments on the right of declarations remain. + */ + +__host__ __device__ +inline float +f(float a, float b, float hypot_a_b) +{ + if (b < 0.0f) + return ((hypot_a_b - b) / 2.0f); + if (b == 0.0f) + return (a / 2.0f); + return (a * a / (hypot_a_b + b) / 2.0f); +} + +/* + * All the hard work is contained in this function. + * x and y are assumed positive or zero, and less than RECIP_EPSILON. + * Upon return: + * rx = Re(casinh(z)) = -Im(cacos(y + I*x)). + * B_is_usable is set to 1 if the value of B is usable. + * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y. + * If returning sqrt_A2my2 has potential to result in an underflow, it is + * rescaled, and new_y is similarly rescaled. + */ +__host__ __device__ +inline void +do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B, + float *sqrt_A2my2, float *new_y) +{ + float R, S, A; /* A, B, R, and S are as in Hull et al. */ + float Am1, Amy; /* A-1, A-y. */ + const float A_crossover = 10; /* Hull et al suggest 1.5, but 10 works better */ + const float FOUR_SQRT_MIN = 4.336808689942017736029811e-19f;; /* =0x1p-61; >= 4 * sqrt(FLT_MIN) */ + const float B_crossover = 0.6417f; /* suggested by Hull et al */ + R = hypotf(x, y + 1); + S = hypotf(x, y - 1); + + A = (R + S) / 2; + if (A < 1) + A = 1; + + if (A < A_crossover) { + if (y == 1 && x < FLT_EPSILON * FLT_EPSILON / 128) { + *rx = sqrtf(x); + } else if (x >= FLT_EPSILON * fabsf(y - 1)) { + Am1 = f(x, 1 + y, R) + f(x, 1 - y, S); + *rx = log1pf(Am1 + sqrtf(Am1 * (A + 1))); + } else if (y < 1) { + *rx = x / sqrtf((1 - y) * (1 + y)); + } else { + *rx = log1pf((y - 1) + sqrtf((y - 1) * (y + 1))); + } + } else { + *rx = logf(A + sqrtf(A * A - 1)); + } + + *new_y = y; + + if (y < FOUR_SQRT_MIN) { + *B_is_usable = 0; + *sqrt_A2my2 = A * (2 / FLT_EPSILON); + *new_y = y * (2 / FLT_EPSILON); + return; + } + + *B = y / A; + *B_is_usable = 1; + + if (*B > B_crossover) { + *B_is_usable = 0; + if (y == 1 && x < FLT_EPSILON / 128) { + *sqrt_A2my2 = sqrtf(x) * sqrtf((A + y) / 2); + } else if (x >= FLT_EPSILON * fabsf(y - 1)) { + Amy = f(x, y + 1, R) + f(x, y - 1, S); + *sqrt_A2my2 = sqrtf(Amy * (A + y)); + } else if (y > 1) { + *sqrt_A2my2 = x * (4 / FLT_EPSILON / FLT_EPSILON) * y / + sqrtf((y + 1) * (y - 1)); + *new_y = y * (4 / FLT_EPSILON / FLT_EPSILON); + } else { + *sqrt_A2my2 = sqrtf((1 - y) * (1 + y)); + } + } + +} + +__host__ __device__ inline +complex +casinhf(complex z) +{ + float x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y; + int B_is_usable; + complex w; + const float RECIP_EPSILON = 1.0f / FLT_EPSILON; + const float m_ln2 = 6.9314718055994531e-1f; /* 0x162e42fefa39ef.0p-53 */ + x = z.real(); + y = z.imag(); + ax = fabsf(x); + ay = fabsf(y); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (complex(x, y + y)); + if (isinf(y)) + return (complex(y, x + x)); + if (y == 0) + return (complex(x + x, y)); + return (complex(x + 0.0f + (y + 0), x + 0.0f + (y + 0))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + if (signbit(x) == 0) + w = clog_for_large_values(z) + m_ln2; + else + w = clog_for_large_values(-z) + m_ln2; + return (complex(copysignf(w.real(), x), + copysignf(w.imag(), y))); + } + + if (x == 0 && y == 0) + return (z); + + raise_inexact(); + + const float SQRT_6_EPSILON = 8.4572793338e-4f; /* 0xddb3d7.0p-34 */ + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (z); + + do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); + if (B_is_usable) + ry = asinf(B); + else + ry = atan2f(new_y, sqrt_A2my2); + return (complex(copysignf(rx, x), copysignf(ry, y))); +} + +__host__ __device__ inline +complex casinf(complex z) +{ + complex w = casinhf(complex(z.imag(), z.real())); + + return (complex(w.imag(), w.real())); +} + +__host__ __device__ inline +complex cacosf(complex z) +{ + float x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x; + int sx, sy; + int B_is_usable; + complex w; + const float pio2_hi = 1.5707963267948966e0f; /* 0x1921fb54442d18.0p-52 */ + const volatile float pio2_lo = 6.1232339957367659e-17f; /* 0x11a62633145c07.0p-106 */ + const float m_ln2 = 6.9314718055994531e-1f; /* 0x162e42fefa39ef.0p-53 */ + + x = z.real(); + y = z.imag(); + sx = signbit(x); + sy = signbit(y); + ax = fabsf(x); + ay = fabsf(y); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (complex(y + y, -infinity())); + if (isinf(y)) + return (complex(x + x, -y)); + if (x == 0) + return (complex(pio2_hi + pio2_lo, y + y)); + return (complex(x + 0.0f + (y + 0), x + 0.0f + (y + 0))); + } + + const float RECIP_EPSILON = 1.0f / FLT_EPSILON; + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + w = clog_for_large_values(z); + rx = fabsf(w.imag()); + ry = w.real() + m_ln2; + if (sy == 0) + ry = -ry; + return (complex(rx, ry)); + } + + if (x == 1 && y == 0) + return (complex(0, -y)); + + raise_inexact(); + + const float SQRT_6_EPSILON = 8.4572793338e-4f; /* 0xddb3d7.0p-34 */ + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (complex(pio2_hi - (x - pio2_lo), -y)); + + do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); + if (B_is_usable) { + if (sx == 0) + rx = acosf(B); + else + rx = acosf(-B); + } else { + if (sx == 0) + rx = atan2f(sqrt_A2mx2, new_x); + else + rx = atan2f(sqrt_A2mx2, -new_x); + } + if (sy == 0) + ry = -ry; + return (complex(rx, ry)); +} + +__host__ __device__ inline +complex cacoshf(complex z) +{ + complex w; + float rx, ry; + + w = cacosf(z); + rx = w.real(); + ry = w.imag(); + /* cacosh(NaN + I*NaN) = NaN + I*NaN */ + if (isnan(rx) && isnan(ry)) + return (complex(ry, rx)); + /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */ + /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */ + if (isnan(rx)) + return (complex(fabsf(ry), rx)); + /* cacosh(0 + I*NaN) = NaN + I*NaN */ + if (isnan(ry)) + return (complex(ry, ry)); + return (complex(fabsf(ry), copysignf(rx, z.imag()))); +} + + /* + * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. + */ +__host__ __device__ inline +complex clog_for_large_values(complex z) +{ + float x, y; + float ax, ay, t; + const float m_e = 2.7182818284590452e0f; /* 0x15bf0a8b145769.0p-51 */ + + x = z.real(); + y = z.imag(); + ax = fabsf(x); + ay = fabsf(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + if (ax > FLT_MAX / 2) + return (complex(logf(hypotf(x / m_e, y / m_e)) + 1, + atan2f(y, x))); + + const float QUARTER_SQRT_MAX = 2.3058430092136939520000000e+18f; /* = 0x1p61; <= sqrt(FLT_MAX) / 4 */ + const float SQRT_MIN = 1.084202172485504434007453e-19f; /* 0x1p-63; >= sqrt(FLT_MIN) */ + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) + return (complex(logf(hypotf(x, y)), atan2f(y, x))); + + return (complex(logf(ax * ax + ay * ay) / 2, atan2f(y, x))); +} + +/* + * ================= + * | catanh, catan | + * ================= + */ + +/* + * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow). + * Assumes x*x and y*y will not overflow. + * Assumes x and y are finite. + * Assumes y is non-negative. + * Assumes fabsf(x) >= FLT_EPSILON. + */ +__host__ __device__ +inline float sum_squares(float x, float y) +{ + const float SQRT_MIN = 1.084202172485504434007453e-19f; /* 0x1p-63; >= sqrt(FLT_MIN) */ + /* Avoid underflow when y is small. */ + if (y < SQRT_MIN) + return (x * x); + + return (x * x + y * y); +} + +__host__ __device__ +inline float real_part_reciprocal(float x, float y) +{ + float scale; + uint32_t hx, hy; + int32_t ix, iy; + + get_float_word(hx, x); + ix = hx & 0x7f800000; + get_float_word(hy, y); + iy = hy & 0x7f800000; + //#define BIAS (FLT_MAX_EXP - 1) + const int BIAS = FLT_MAX_EXP - 1; + //#define CUTOFF (FLT_MANT_DIG / 2 + 1) + const int CUTOFF = (FLT_MANT_DIG / 2 + 1); + if (ix - iy >= CUTOFF << 23 || isinf(x)) + return (1 / x); + if (iy - ix >= CUTOFF << 23) + return (x / y / y); + if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23) + return (x / (x * x + y * y)); + set_float_word(scale, 0x7f800000 - ix); + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} + +#if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC +__host__ __device__ inline +complex catanhf(complex z) +{ + float x, y, ax, ay, rx, ry; + const volatile float pio2_lo = 6.1232339957367659e-17f; /* 0x11a62633145c07.0p-106 */ + const float pio2_hi = 1.5707963267948966e0f;/* 0x1921fb54442d18.0p-52 */ + + + x = z.real(); + y = z.imag(); + ax = fabsf(x); + ay = fabsf(y); + + + if (y == 0 && ax <= 1) + return (complex(atanhf(x), y)); + + if (x == 0) + return (complex(x, atanf(y))); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (complex(copysignf(0, x), y + y)); + if (isinf(y)) + return (complex(copysignf(0, x), + copysignf(pio2_hi + pio2_lo, y))); + return (complex(x + 0.0f + (y + 0.0f), x + 0.0f + (y + 0.0f))); + } + + const float RECIP_EPSILON = 1.0f / FLT_EPSILON; + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) + return (complex(real_part_reciprocal(x, y), + copysignf(pio2_hi + pio2_lo, y))); + + const float SQRT_3_EPSILON = 5.9801995673e-4f; /* 0x9cc471.0p-34 */ + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { + raise_inexact(); + return (z); + } + + const float m_ln2 = 6.9314718056e-1f; /* 0xb17218.0p-24 */ + if (ax == 1 && ay < FLT_EPSILON) + rx = (m_ln2 - logf(ay)) / 2; + else + rx = log1pf(4 * ax / sum_squares(ax - 1, ay)) / 4; + + if (ax == 1) + ry = atan2f(2, -ay) / 2; + else if (ay < FLT_EPSILON) + ry = atan2f(2 * ay, (1 - ax) * (1 + ax)) / 2; + else + ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; + + return (complex(copysignf(rx, x), copysignf(ry, y))); +} + +__host__ __device__ inline +complexcatanf(complex z){ + complex w = catanhf(complex(z.imag(), z.real())); + return (complex(w.imag(), w.real())); +} +#endif + +} // namespace complex + +} // namespace detail + + +template <> +__host__ __device__ +inline complex acos(const complex& z){ + return detail::complex::cacosf(z); +} + +template <> +__host__ __device__ +inline complex asin(const complex& z){ + return detail::complex::casinf(z); +} + +#if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC +template <> +__host__ __device__ +inline complex atan(const complex& z){ + return detail::complex::catanf(z); +} +#endif + +template <> +__host__ __device__ +inline complex acosh(const complex& z){ + return detail::complex::cacoshf(z); +} + + +template <> +__host__ __device__ +inline complex asinh(const complex& z){ + return detail::complex::casinhf(z); +} + +#if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC +template <> +__host__ __device__ +inline complex atanh(const complex& z){ + return detail::complex::catanhf(z); +} +#endif + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/ccosh.h b/thrust/thrust/detail/complex/ccosh.h new file mode 100644 index 00000000000..24467ac073b --- /dev/null +++ b/thrust/thrust/detail/complex/ccosh.h @@ -0,0 +1,223 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* adapted from FreeBSD: + * lib/msun/src/s_ccosh.c + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +/* + * Hyperbolic cosine of a complex argument z = x + i y. + * + * cosh(z) = cosh(x+iy) + * = cosh(x) cos(y) + i sinh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + */ + +__host__ __device__ inline +thrust::complex ccosh(const thrust::complex& z){ + + + const double huge = 8.98846567431157953864652595395e+307; // 0x1p1023 + double x, y, h; + uint32_t hx, hy, ix, iy, lx, ly; + + x = z.real(); + y = z.imag(); + + extract_words(hx, lx, x); + extract_words(hy, ly, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (ix < 0x7ff00000 && iy < 0x7ff00000) { + if ((iy | ly) == 0) + return (thrust::complex(::cosh(x), x * y)); + if (ix < 0x40360000) /* small x: normal case */ + return (thrust::complex(::cosh(x) * ::cos(y), ::sinh(x) * ::sin(y))); + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (ix < 0x40862e42) { + /* x < 710: exp(|x|) won't overflow */ + h = ::exp(::fabs(x)) * 0.5; + return (thrust::complex(h * cos(y), copysign(h, x) * sin(y))); + } else if (ix < 0x4096bbaa) { + /* x < 1455: scale to avoid overflow */ + thrust::complex z_; + z_ = ldexp_cexp(thrust::complex(fabs(x), y), -1); + return (thrust::complex(z_.real(), z_.imag() * copysign(1.0, x))); + } else { + /* x >= 1455: the result always overflows */ + h = huge * x; + return (thrust::complex(h * h * cos(y), h * sin(y))); + } + } + + /* + * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if ((ix | lx) == 0 && iy >= 0x7ff00000) + return (thrust::complex(y - y, copysign(0.0, x * (y - y)))); + + /* + * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0. + * + * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0. + * The sign of 0 in the result is unspecified. + */ + if ((iy | ly) == 0 && ix >= 0x7ff00000) { + if (((hx & 0xfffff) | lx) == 0) + return (thrust::complex(x * x, copysign(0.0, x) * y)); + return (thrust::complex(x * x, copysign(0.0, (x + x) * y))); + } + + /* + * cosh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * cosh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (ix < 0x7ff00000 && iy >= 0x7ff00000) + return (thrust::complex(y - y, x * (y - y))); + + /* + * cosh(+-Inf + I NaN) = +Inf + I d(NaN). + * + * cosh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y) + */ + if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { + if (iy >= 0x7ff00000) + return (thrust::complex(x * x, x * (y - y))); + return (thrust::complex((x * x) * cos(y), x * sin(y))); + } + + /* + * cosh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * cosh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * cosh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return (thrust::complex((x * x) * (y - y), (x + x) * (y - y))); +} + + +__host__ __device__ inline +thrust::complex ccos(const thrust::complex& z){ + /* ccos(z) = ccosh(I * z) */ + return (ccosh(thrust::complex(-z.imag(), z.real()))); +} + +} // namespace complex + +} // namespace detail + +template +__host__ __device__ +inline complex cos(const complex& z){ + const ValueType re = z.real(); + const ValueType im = z.imag(); + return complex(std::cos(re) * std::cosh(im), + -std::sin(re) * std::sinh(im)); +} + +template +__host__ __device__ +inline complex cosh(const complex& z){ + const ValueType re = z.real(); + const ValueType im = z.imag(); + return complex(std::cosh(re) * std::cos(im), + std::sinh(re) * std::sin(im)); +} + +template <> +__host__ __device__ +inline thrust::complex cos(const thrust::complex& z){ + return detail::complex::ccos(z); +} + +template <> +__host__ __device__ +inline thrust::complex cosh(const thrust::complex& z){ + return detail::complex::ccosh(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/ccoshf.h b/thrust/thrust/detail/complex/ccoshf.h new file mode 100644 index 00000000000..8c91c26a21b --- /dev/null +++ b/thrust/thrust/detail/complex/ccoshf.h @@ -0,0 +1,151 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* adapted from FreeBSD: + * lib/msun/src/s_ccoshf.c + */ + + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ inline +complex ccoshf(const complex& z){ + float x, y, h; + uint32_t hx, hy, ix, iy; + const float huge = 1.70141183460469231731687303716e+38; //0x1p127; + + + x = z.real(); + y = z.imag(); + + get_float_word(hx, x); + get_float_word(hy, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + if (ix < 0x7f800000 && iy < 0x7f800000) { + if (iy == 0){ + return (complex(coshf(x), x * y)); + } + if (ix < 0x41100000){ /* small x: normal case */ + return (complex(coshf(x) * cosf(y), sinhf(x) * sinf(y))); + } + /* |x| >= 9, so cosh(x) ~= exp(|x|) */ + if (ix < 0x42b17218) { + /* x < 88.7: expf(|x|) won't overflow */ + h = expf(fabsf(x)) * 0.5f; + return (complex(h * cosf(y), copysignf(h, x) * sinf(y))); + } else if (ix < 0x4340b1e7) { + /* x < 192.7: scale to avoid overflow */ + thrust::complex z_; + z_ = ldexp_cexpf(complex(fabsf(x), y), -1); + return (complex(z_.real(), z_.imag() * copysignf(1.0f, x))); + } else { + /* x >= 192.7: the result always overflows */ + h = huge * x; + return (complex(h * h * cosf(y), h * sinf(y))); + } + } + + if (ix == 0 && iy >= 0x7f800000){ + return (complex(y - y, copysignf(0.0f, x * (y - y)))); + } + if (iy == 0 && ix >= 0x7f800000) { + if ((hx & 0x7fffff) == 0) + return (complex(x * x, copysignf(0.0f, x) * y)); + return (complex(x * x, copysignf(0.0f, (x + x) * y))); + } + + if (ix < 0x7f800000 && iy >= 0x7f800000){ + return (complex(y - y, x * (y - y))); + } + + if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) { + if (iy >= 0x7f800000) + return (complex(x * x, x * (y - y))); + return (complex((x * x) * cosf(y), x * sinf(y))); + } + return (complex((x * x) * (y - y), (x + x) * (y - y))); +} + +__host__ __device__ inline +complex ccosf(const complex& z){ + return (ccoshf(complex(-z.imag(), z.real()))); +} + +} // namespace complex + +} // namespace detail + +template <> +__host__ __device__ +inline complex cos(const complex& z){ + return detail::complex::ccosf(z); +} + +template <> +__host__ __device__ +inline complex cosh(const complex& z){ + return detail::complex::ccoshf(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/cexp.h b/thrust/thrust/detail/complex/cexp.h new file mode 100644 index 00000000000..3377d083fc4 --- /dev/null +++ b/thrust/thrust/detail/complex/cexp.h @@ -0,0 +1,193 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* adapted from FreeBSD: + * lib/msun/src/s_cexp.c + * lib/msun/src/k_exp.c + * + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ +/* + * Compute exp(x), scaled to avoid spurious overflow. An exponent is + * returned separately in 'expt'. + * + * Input: ln(DBL_MAX) <= x < ln(2 * DBL_MAX / DBL_MIN_DENORM) ~= 1454.91 + * Output: 2**1023 <= y < 2**1024 + */ +__host__ __device__ inline + double frexp_exp(double x, int *expt){ + const uint32_t k = 1799; /* constant for reduction */ + const double kln2 = 1246.97177782734161156; /* k * ln2 */ + + double exp_x; + uint32_t hx; + + /* + * We use exp(x) = exp(x - kln2) * 2**k, carefully chosen to + * minimize |exp(kln2) - 2**k|. We also scale the exponent of + * exp_x to MAX_EXP so that the result can be multiplied by + * a tiny number without losing accuracy due to denormalization. + */ + exp_x = exp(x - kln2); + get_high_word(hx, exp_x); + *expt = (hx >> 20) - (0x3ff + 1023) + k; + set_high_word(exp_x, (hx & 0xfffff) | ((0x3ff + 1023) << 20)); + return (exp_x); +} + + +__host__ __device__ inline +complex ldexp_cexp(complex z, int expt){ + double x, y, exp_x, scale1, scale2; + int ex_expt, half_expt; + + x = z.real(); + y = z.imag(); + exp_x = frexp_exp(x, &ex_expt); + expt += ex_expt; + + /* + * Arrange so that scale1 * scale2 == 2**expt. We use this to + * compensate for scalbn being horrendously slow. + */ + half_expt = expt / 2; + insert_words(scale1, (0x3ff + half_expt) << 20, 0); + half_expt = expt - half_expt; + insert_words(scale2, (0x3ff + half_expt) << 20, 0); + + return (complex(cos(y) * exp_x * scale1 * scale2, + sin(y) * exp_x * scale1 * scale2)); +} + + +__host__ __device__ inline +complex cexp(const complex& z){ + double x, y, exp_x; + uint32_t hx, hy, lx, ly; + + const uint32_t + exp_ovfl = 0x40862e42, /* high bits of MAX_EXP * ln2 ~= 710 */ + cexp_ovfl = 0x4096b8e4; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ + + + x = z.real(); + y = z.imag(); + + extract_words(hy, ly, y); + hy &= 0x7fffffff; + + /* cexp(x + I 0) = exp(x) + I 0 */ + if ((hy | ly) == 0) + return (complex(exp(x), y)); + extract_words(hx, lx, x); + /* cexp(0 + I y) = cos(y) + I sin(y) */ + if (((hx & 0x7fffffff) | lx) == 0) + return (complex(cos(y), sin(y))); + + if (hy >= 0x7ff00000) { + if (lx != 0 || (hx & 0x7fffffff) != 0x7ff00000) { + /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ + return (complex(y - y, y - y)); + } else if (hx & 0x80000000) { + /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */ + return (complex(0.0, 0.0)); + } else { + /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */ + return (complex(x, y - y)); + } + } + + if (hx >= exp_ovfl && hx <= cexp_ovfl) { + /* + * x is between 709.7 and 1454.3, so we must scale to avoid + * overflow in exp(x). + */ + return (ldexp_cexp(z, 0)); + } else { + /* + * Cases covered here: + * - x < exp_ovfl and exp(x) won't overflow (common case) + * - x > cexp_ovfl, so exp(x) * s overflows for all s > 0 + * - x = +-Inf (generated by exp()) + * - x = NaN (spurious inexact exception from y) + */ + exp_x = std::exp(x); + return (complex(exp_x * cos(y), exp_x * sin(y))); + } +} + +} // namespace complex + +} // namespace detail + +template +__host__ __device__ +inline complex exp(const complex& z){ + return polar(std::exp(z.real()),z.imag()); +} + +template <> +__host__ __device__ +inline complex exp(const complex& z){ + return detail::complex::cexp(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/cexpf.h b/thrust/thrust/detail/complex/cexpf.h new file mode 100644 index 00000000000..a666de61340 --- /dev/null +++ b/thrust/thrust/detail/complex/cexpf.h @@ -0,0 +1,171 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* adapted from FreeBSD: + * lib/msun/src/s_cexpf.c + * lib/msun/src/k_exp.c + * + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +__host__ __device__ inline +float frexp_expf(float x, int *expt){ + const uint32_t k = 235; /* constant for reduction */ + const float kln2 = 162.88958740F; /* k * ln2 */ + + // should this be a double instead? + float exp_x; + uint32_t hx; + + exp_x = expf(x - kln2); + get_float_word(hx, exp_x); + *expt = (hx >> 23) - (0x7f + 127) + k; + set_float_word(exp_x, (hx & 0x7fffff) | ((0x7f + 127) << 23)); + return (exp_x); +} + +__host__ __device__ inline +complex +ldexp_cexpf(complex z, int expt) +{ + float x, y, exp_x, scale1, scale2; + int ex_expt, half_expt; + + x = z.real(); + y = z.imag(); + exp_x = frexp_expf(x, &ex_expt); + expt += ex_expt; + + half_expt = expt / 2; + set_float_word(scale1, (0x7f + half_expt) << 23); + half_expt = expt - half_expt; + set_float_word(scale2, (0x7f + half_expt) << 23); + + return (complex(std::cos(y) * exp_x * scale1 * scale2, + std::sin(y) * exp_x * scale1 * scale2)); +} + +__host__ __device__ inline +complex cexpf(const complex& z){ + float x, y, exp_x; + uint32_t hx, hy; + + const uint32_t + exp_ovfl = 0x42b17218, /* MAX_EXP * ln2 ~= 88.722839355 */ + cexp_ovfl = 0x43400074; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ + + x = z.real(); + y = z.imag(); + + get_float_word(hy, y); + hy &= 0x7fffffff; + + /* cexp(x + I 0) = exp(x) + I 0 */ + if (hy == 0) + return (complex(std::exp(x), y)); + get_float_word(hx, x); + /* cexp(0 + I y) = cos(y) + I sin(y) */ + if ((hx & 0x7fffffff) == 0){ + return (complex(std::cos(y), std::sin(y))); + } + if (hy >= 0x7f800000) { + if ((hx & 0x7fffffff) != 0x7f800000) { + /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ + return (complex(y - y, y - y)); + } else if (hx & 0x80000000) { + /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */ + return (complex(0.0, 0.0)); + } else { + /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */ + return (complex(x, y - y)); + } + } + + if (hx >= exp_ovfl && hx <= cexp_ovfl) { + /* + * x is between 88.7 and 192, so we must scale to avoid + * overflow in expf(x). + */ + return (ldexp_cexpf(z, 0)); + } else { + /* + * Cases covered here: + * - x < exp_ovfl and exp(x) won't overflow (common case) + * - x > cexp_ovfl, so exp(x) * s overflows for all s > 0 + * - x = +-Inf (generated by exp()) + * - x = NaN (spurious inexact exception from y) + */ + exp_x = std::exp(x); + return (complex(exp_x * std::cos(y), exp_x * std::sin(y))); + } +} + +} // namespace complex + +} // namespace detail + +template <> +__host__ __device__ +inline complex exp(const complex& z){ + return detail::complex::cexpf(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/clog.h b/thrust/thrust/detail/complex/clog.h new file mode 100644 index 00000000000..b74419fe404 --- /dev/null +++ b/thrust/thrust/detail/complex/clog.h @@ -0,0 +1,222 @@ +/* + * Copyright 2008-2021 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2012 Stephen Montgomery-Smith + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* adapted from FreeBSDs msun:*/ + + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +/* round down to 18 = 54/3 bits */ +__host__ __device__ inline +double trim(double x){ + uint32_t hi; + get_high_word(hi, x); + insert_words(x, hi &0xfffffff8, 0); + return x; +} + + +__host__ __device__ inline +complex clog(const complex& z){ + + // Adapted from FreeBSDs msun + double x, y; + double ax, ay; + double x0, y0, x1, y1, x2, y2, t, hm1; + double val[12]; + int i, sorted; + const double e = 2.7182818284590452354; + + x = z.real(); + y = z.imag(); + + /* Handle NaNs using the general formula to mix them right. */ + if (x != x || y != y){ + return (complex(std::log(norm(z)), std::atan2(y, x))); + } + + ax = std::abs(x); + ay = std::abs(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + /* + * To avoid unnecessary overflow, if x and y are very large, divide x + * and y by M_E, and then add 1 to the logarithm. This depends on + * M_E being larger than sqrt(2). + * There is a potential loss of accuracy caused by dividing by M_E, + * but this case should happen extremely rarely. + */ + // if (ay > 5e307){ + // For high values of ay -> hypotf(DBL_MAX,ay) = inf + // We expect that for values at or below ay = 5e307 this should not happen + if (ay > 5e307){ + return (complex(std::log(hypot(x / e, y / e)) + 1.0, std::atan2(y, x))); + } + if (ax == 1.) { + if (ay < 1e-150){ + return (complex((ay * 0.5) * ay, std::atan2(y, x))); + } + return (complex(log1p(ay * ay) * 0.5, std::atan2(y, x))); + } + + /* + * Because atan2 and hypot conform to C99, this also covers all the + * edge cases when x or y are 0 or infinite. + */ + if (ax < 1e-50 || ay < 1e-50 || ax > 1e50 || ay > 1e50){ + return (complex(std::log(hypot(x, y)), std::atan2(y, x))); + } + + /* + * From this point on, we don't need to worry about underflow or + * overflow in calculating ax*ax or ay*ay. + */ + + /* Some easy cases. */ + + if (ax >= 1.0){ + return (complex(log1p((ax-1)*(ax+1) + ay*ay) * 0.5, atan2(y, x))); + } + + if (ax*ax + ay*ay <= 0.7){ + return (complex(std::log(ax*ax + ay*ay) * 0.5, std::atan2(y, x))); + } + + /* + * Take extra care so that ULP of real part is small if hypot(x,y) is + * moderately close to 1. + */ + + + x0 = trim(ax); + ax = ax-x0; + x1 = trim(ax); + x2 = ax-x1; + y0 = trim(ay); + ay = ay-y0; + y1 = trim(ay); + y2 = ay-y1; + + val[0] = x0*x0; + val[1] = y0*y0; + val[2] = 2*x0*x1; + val[3] = 2*y0*y1; + val[4] = x1*x1; + val[5] = y1*y1; + val[6] = 2*x0*x2; + val[7] = 2*y0*y2; + val[8] = 2*x1*x2; + val[9] = 2*y1*y2; + val[10] = x2*x2; + val[11] = y2*y2; + + /* Bubble sort. */ + + do { + sorted = 1; + for (i=0;i<11;i++) { + if (val[i] < val[i+1]) { + sorted = 0; + t = val[i]; + val[i] = val[i+1]; + val[i+1] = t; + } + } + } while (!sorted); + + hm1 = -1; + for (i=0;i<12;i++){ + hm1 += val[i]; + } + return (complex(0.5 * log1p(hm1), atan2(y, x))); +} + +} // namespace complex + +} // namespace detail + +template +__host__ __device__ +inline complex log(const complex& z){ + return complex(std::log(thrust::abs(z)),thrust::arg(z)); +} + +template <> +__host__ __device__ +inline complex log(const complex& z){ + return detail::complex::clog(z); +} + +template +__host__ __device__ +inline complex log10(const complex& z){ + // Using the explicit literal prevents compile time warnings in + // devices that don't support doubles + return thrust::log(z)/ValueType(2.30258509299404568402); +} + +THRUST_NAMESPACE_END + diff --git a/thrust/thrust/detail/complex/clogf.h b/thrust/thrust/detail/complex/clogf.h new file mode 100644 index 00000000000..7d9ef0c7ada --- /dev/null +++ b/thrust/thrust/detail/complex/clogf.h @@ -0,0 +1,208 @@ +/* + * Copyright 2008-2021 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2012 Stephen Montgomery-Smith + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* adapted from FreeBSDs msun:*/ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +/* round down to 8 = 24/3 bits */ +__host__ __device__ inline +float trim(float x){ + uint32_t hx; + get_float_word(hx, x); + hx &= 0xffff0000; + float ret; + set_float_word(ret,hx); + return ret; +} + + +__host__ __device__ inline +complex clogf(const complex& z){ + + // Adapted from FreeBSDs msun + float x, y; + float ax, ay; + float x0, y0, x1, y1, x2, y2, t, hm1; + float val[12]; + int i, sorted; + const float e = 2.7182818284590452354f; + + x = z.real(); + y = z.imag(); + + /* Handle NaNs using the general formula to mix them right. */ + if (x != x || y != y){ + return (complex(std::log(norm(z)), std::atan2(y, x))); + } + + ax = std::abs(x); + ay = std::abs(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + /* + * To avoid unnecessary overflow, if x and y are very large, divide x + * and y by M_E, and then add 1 to the logarithm. This depends on + * M_E being larger than sqrt(2). + * There is a potential loss of accuracy caused by dividing by M_E, + * but this case should happen extremely rarely. + */ + // For high values of ay -> hypotf(FLT_MAX,ay) = inf + // We expect that for values at or below ay = 1e34f this should not happen + if (ay > 1e34f){ + return (complex(std::log(hypotf(x / e, y / e)) + 1.0f, std::atan2(y, x))); + } + if (ax == 1.f) { + if (ay < 1e-19f){ + return (complex((ay * 0.5f) * ay, std::atan2(y, x))); + } + return (complex(log1pf(ay * ay) * 0.5f, std::atan2(y, x))); + } + + /* + * Because atan2 and hypot conform to C99, this also covers all the + * edge cases when x or y are 0 or infinite. + */ + if (ax < 1e-6f || ay < 1e-6f || ax > 1e6f || ay > 1e6f){ + return (complex(std::log(hypotf(x, y)), std::atan2(y, x))); + } + + /* + * From this point on, we don't need to worry about underflow or + * overflow in calculating ax*ax or ay*ay. + */ + + /* Some easy cases. */ + + if (ax >= 1.0f){ + return (complex(log1pf((ax-1.f)*(ax+1.f) + ay*ay) * 0.5f, atan2(y, x))); + } + + if (ax*ax + ay*ay <= 0.7f){ + return (complex(std::log(ax*ax + ay*ay) * 0.5f, std::atan2(y, x))); + } + + /* + * Take extra care so that ULP of real part is small if hypot(x,y) is + * moderately close to 1. + */ + + + x0 = trim(ax); + ax = ax-x0; + x1 = trim(ax); + x2 = ax-x1; + y0 = trim(ay); + ay = ay-y0; + y1 = trim(ay); + y2 = ay-y1; + + val[0] = x0*x0; + val[1] = y0*y0; + val[2] = 2*x0*x1; + val[3] = 2*y0*y1; + val[4] = x1*x1; + val[5] = y1*y1; + val[6] = 2*x0*x2; + val[7] = 2*y0*y2; + val[8] = 2*x1*x2; + val[9] = 2*y1*y2; + val[10] = x2*x2; + val[11] = y2*y2; + + /* Bubble sort. */ + + do { + sorted = 1; + for (i=0;i<11;i++) { + if (val[i] < val[i+1]) { + sorted = 0; + t = val[i]; + val[i] = val[i+1]; + val[i+1] = t; + } + } + } while (!sorted); + + hm1 = -1; + for (i=0;i<12;i++){ + hm1 += val[i]; + } + return (complex(0.5f * log1pf(hm1), atan2(y, x))); +} + +} // namespace complex + +} // namespace detail + +template <> +__host__ __device__ +inline complex log(const complex& z){ + return detail::complex::clogf(z); +} + +THRUST_NAMESPACE_END + diff --git a/thrust/thrust/detail/complex/complex.inl b/thrust/thrust/detail/complex/complex.inl new file mode 100644 index 00000000000..d288a58f8fd --- /dev/null +++ b/thrust/thrust/detail/complex/complex.inl @@ -0,0 +1,363 @@ +/* + * Copyright 2008-2021 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN + +/* --- Constructors --- */ + +#if THRUST_CPP_DIALECT < 2011 +template +__host__ __device__ +complex::complex() +{ + real(T()); + imag(T()); +} +#endif + +template +__host__ __device__ +complex::complex(const T& re) +#if THRUST_CPP_DIALECT >= 2011 + // Initialize the storage in the member initializer list using C++ unicorn + // initialization. This allows `complex` to work. + : data{re, T()} +{} +#else +{ + real(re); + imag(T()); +} +#endif + + +template +__host__ __device__ +complex::complex(const T& re, const T& im) +#if THRUST_CPP_DIALECT >= 2011 + // Initialize the storage in the member initializer list using C++ unicorn + // initialization. This allows `complex` to work. + : data{re, im} +{} +#else +{ + real(re); + imag(im); +} +#endif + +#if THRUST_CPP_DIALECT < 2011 +template +__host__ __device__ +complex::complex(const complex& z) +{ + real(z.real()); + imag(z.imag()); +} +#endif + +template +template +__host__ __device__ +complex::complex(const complex& z) +#if THRUST_CPP_DIALECT >= 2011 + // Initialize the storage in the member initializer list using C++ unicorn + // initialization. This allows `complex` to work. + // We do a functional-style cast here to suppress conversion warnings. + : data{T(z.real()), T(z.imag())} +{} +#else +{ + real(T(z.real())); + imag(T(z.imag())); +} +#endif + +template +__host__ THRUST_STD_COMPLEX_DEVICE +complex::complex(const std::complex& z) +#if THRUST_CPP_DIALECT >= 2011 + // Initialize the storage in the member initializer list using C++ unicorn + // initialization. This allows `complex` to work. + : data{THRUST_STD_COMPLEX_REAL(z), THRUST_STD_COMPLEX_IMAG(z)} +{} +#else +{ + real(THRUST_STD_COMPLEX_REAL(z)); + imag(THRUST_STD_COMPLEX_IMAG(z)); +} +#endif + +template +template +__host__ THRUST_STD_COMPLEX_DEVICE +complex::complex(const std::complex& z) +#if THRUST_CPP_DIALECT >= 2011 + // Initialize the storage in the member initializer list using C++ unicorn + // initialization. This allows `complex` to work. + // We do a functional-style cast here to suppress conversion warnings. + : data{T(THRUST_STD_COMPLEX_REAL(z)), T(THRUST_STD_COMPLEX_IMAG(z))} +{} +#else +{ + real(T(THRUST_STD_COMPLEX_REAL(z))); + imag(T(THRUST_STD_COMPLEX_IMAG(z))); +} +#endif + + + +/* --- Assignment Operators --- */ + +template +__host__ __device__ +complex& complex::operator=(const T& re) +{ + real(re); + imag(T()); + return *this; +} + +#if THRUST_CPP_DIALECT < 2011 +template +__host__ __device__ +complex& complex::operator=(const complex& z) +{ + real(z.real()); + imag(z.imag()); + return *this; +} +#endif + +template +template +__host__ __device__ +complex& complex::operator=(const complex& z) +{ + real(T(z.real())); + imag(T(z.imag())); + return *this; +} + +template +__host__ THRUST_STD_COMPLEX_DEVICE +complex& complex::operator=(const std::complex& z) +{ + real(THRUST_STD_COMPLEX_REAL(z)); + imag(THRUST_STD_COMPLEX_IMAG(z)); + return *this; +} + +template +template +__host__ THRUST_STD_COMPLEX_DEVICE +complex& complex::operator=(const std::complex& z) +{ + real(T(THRUST_STD_COMPLEX_REAL(z))); + imag(T(THRUST_STD_COMPLEX_IMAG(z))); + return *this; +} + + + +/* --- Compound Assignment Operators --- */ + +template +template +__host__ __device__ +complex& complex::operator+=(const complex& z) +{ + *this = *this + z; + return *this; +} + +template +template +__host__ __device__ +complex& complex::operator-=(const complex& z) +{ + *this = *this - z; + return *this; +} + +template +template +__host__ __device__ +complex& complex::operator*=(const complex& z) +{ + *this = *this * z; + return *this; +} + +template +template +__host__ __device__ +complex& complex::operator/=(const complex& z) +{ + *this = *this / z; + return *this; +} + +template +template +__host__ __device__ +complex& complex::operator+=(const U& z) +{ + *this = *this + z; + return *this; +} + +template +template +__host__ __device__ +complex& complex::operator-=(const U& z) +{ + *this = *this - z; + return *this; +} + +template +template +__host__ __device__ +complex& complex::operator*=(const U& z) +{ + *this = *this * z; + return *this; +} + +template +template +__host__ __device__ +complex& complex::operator/=(const U& z) +{ + *this = *this / z; + return *this; +} + + + +/* --- Equality Operators --- */ + +template +__host__ __device__ +bool operator==(const complex& x, const complex& y) +{ + return x.real() == y.real() && x.imag() == y.imag(); +} + +template +__host__ THRUST_STD_COMPLEX_DEVICE +bool operator==(const complex& x, const std::complex& y) +{ + return x.real() == THRUST_STD_COMPLEX_REAL(y) && x.imag() == THRUST_STD_COMPLEX_IMAG(y); +} + +template +__host__ THRUST_STD_COMPLEX_DEVICE +bool operator==(const std::complex& x, const complex& y) +{ + return THRUST_STD_COMPLEX_REAL(x) == y.real() && THRUST_STD_COMPLEX_IMAG(x) == y.imag(); +} + +template +__host__ __device__ +bool operator==(const T0& x, const complex& y) +{ + return x == y.real() && y.imag() == T1(); +} + +template +__host__ __device__ +bool operator==(const complex& x, const T1& y) +{ + return x.real() == y && x.imag() == T1(); +} + +template +__host__ __device__ +bool operator!=(const complex& x, const complex& y) +{ + return !(x == y); +} + +template +__host__ THRUST_STD_COMPLEX_DEVICE +bool operator!=(const complex& x, const std::complex& y) +{ + return !(x == y); +} + +template +__host__ THRUST_STD_COMPLEX_DEVICE +bool operator!=(const std::complex& x, const complex& y) +{ + return !(x == y); +} + +template +__host__ __device__ +bool operator!=(const T0& x, const complex& y) +{ + return !(x == y); +} + +template +__host__ __device__ +bool operator!=(const complex& x, const T1& y) +{ + return !(x == y); +} + +template +struct proclaim_trivially_relocatable > : thrust::true_type {}; + +THRUST_NAMESPACE_END + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + diff --git a/thrust/thrust/detail/complex/cpow.h b/thrust/thrust/detail/complex/cpow.h new file mode 100644 index 00000000000..cefca13bd17 --- /dev/null +++ b/thrust/thrust/detail/complex/cpow.h @@ -0,0 +1,65 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN + +template +__host__ __device__ +complex::type> +pow(const complex& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return exp(log(complex(x)) * complex(y)); +} + +template +__host__ __device__ +complex::type> +pow(const complex& x, const T1& y) +{ + typedef typename detail::promoted_numerical_type::type T; + return exp(log(complex(x)) * T(y)); +} + +template +__host__ __device__ +complex::type> +pow(const T0& x, const complex& y) +{ + typedef typename detail::promoted_numerical_type::type T; + // Find `log` by ADL. + using std::log; + return exp(log(T(x)) * complex(y)); +} + +THRUST_NAMESPACE_END + diff --git a/thrust/thrust/detail/complex/cproj.h b/thrust/thrust/detail/complex/cproj.h new file mode 100644 index 00000000000..f9e6a1a632d --- /dev/null +++ b/thrust/thrust/detail/complex/cproj.h @@ -0,0 +1,80 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ +__host__ __device__ +inline complex cprojf(const complex& z){ + if(!isinf(z.real()) && !isinf(z.imag())){ + return z; + }else{ + // std::numeric_limits::infinity() doesn't run on the GPU + return complex(infinity(), copysignf(0.0, z.imag())); + } +} + +__host__ __device__ +inline complex cproj(const complex& z){ + if(!isinf(z.real()) && !isinf(z.imag())){ + return z; + }else{ + // std::numeric_limits::infinity() doesn't run on the GPU + return complex(infinity(), copysign(0.0, z.imag())); + } +} + +} + +} + +template +__host__ __device__ +inline thrust::complex proj(const thrust::complex& z){ + return detail::complex::cproj(z); +} + + +template <> +__host__ __device__ +inline thrust::complex proj(const thrust::complex& z){ + return detail::complex::cproj(z); +} + +template <> +__host__ __device__ +inline thrust::complex proj(const thrust::complex& z){ + return detail::complex::cprojf(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/csinh.h b/thrust/thrust/detail/complex/csinh.h new file mode 100644 index 00000000000..2ef8400e740 --- /dev/null +++ b/thrust/thrust/detail/complex/csinh.h @@ -0,0 +1,215 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* adapted from FreeBSD: + * lib/msun/src/s_csinh.c + */ + + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ inline +complex csinh(const complex& z){ + double x, y, h; + uint32_t hx, hy, ix, iy, lx, ly; + const double huge = 8.98846567431157953864652595395e+307; // 0x1p1023; + + x = z.real(); + y = z.imag(); + + extract_words(hx, lx, x); + extract_words(hy, ly, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (ix < 0x7ff00000 && iy < 0x7ff00000) { + if ((iy | ly) == 0) + return (complex(sinh(x), y)); + if (ix < 0x40360000) /* small x: normal case */ + return (complex(sinh(x) * cos(y), cosh(x) * sin(y))); + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (ix < 0x40862e42) { + /* x < 710: exp(|x|) won't overflow */ + h = exp(fabs(x)) * 0.5; + return (complex(copysign(h, x) * cos(y), h * sin(y))); + } else if (ix < 0x4096bbaa) { + /* x < 1455: scale to avoid overflow */ + complex z_ = ldexp_cexp(complex(fabs(x), y), -1); + return (complex(z_.real() * copysign(1.0, x), z_.imag())); + } else { + /* x >= 1455: the result always overflows */ + h = huge * x; + return (complex(h * cos(y), h * h * sin(y))); + } + } + + /* + * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN). + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if ((ix | lx) == 0 && iy >= 0x7ff00000) + return (complex(copysign(0.0, x * (y - y)), y - y)); + + /* + * sinh(+-Inf +- I 0) = +-Inf + I +-0. + * + * sinh(NaN +- I 0) = d(NaN) + I +-0. + */ + if ((iy | ly) == 0 && ix >= 0x7ff00000) { + if (((hx & 0xfffff) | lx) == 0) + return (complex(x, y)); + return (complex(x, copysign(0.0, y))); + } + + /* + * sinh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * sinh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (ix < 0x7ff00000 && iy >= 0x7ff00000) + return (complex(y - y, x * (y - y))); + + /* + * sinh(+-Inf + I NaN) = +-Inf + I d(NaN). + * The sign of Inf in the result is unspecified. Choice = normally + * the same as d(NaN). + * + * sinh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y) + */ + if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { + if (iy >= 0x7ff00000) + return (complex(x * x, x * (y - y))); + return (complex(x * cos(y), infinity() * sin(y))); + } + + /* + * sinh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * sinh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * sinh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return (complex((x * x) * (y - y), (x + x) * (y - y))); +} + +__host__ __device__ inline +complex csin(complex z){ + /* csin(z) = -I * csinh(I * z) */ + z = csinh(complex(-z.imag(), z.real())); + return (complex(z.imag(), -z.real())); +} + +} // namespace complex + +} // namespace detail + +template +__host__ __device__ +inline complex sin(const complex& z){ + const ValueType re = z.real(); + const ValueType im = z.imag(); + return complex(std::sin(re) * std::cosh(im), + std::cos(re) * std::sinh(im)); +} + + +template +__host__ __device__ +inline complex sinh(const complex& z){ + const ValueType re = z.real(); + const ValueType im = z.imag(); + return complex(std::sinh(re) * std::cos(im), + std::cosh(re) * std::sin(im)); +} + +template <> +__host__ __device__ +inline complex sin(const complex& z){ + return detail::complex::csin(z); +} + +template <> +__host__ __device__ +inline complex sinh(const complex& z){ + return detail::complex::csinh(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/csinhf.h b/thrust/thrust/detail/complex/csinhf.h new file mode 100644 index 00000000000..44be109cbc8 --- /dev/null +++ b/thrust/thrust/detail/complex/csinhf.h @@ -0,0 +1,152 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* adapted from FreeBSD: + * lib/msun/src/s_csinhf.c + */ + + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ inline +complex csinhf(const complex& z){ + + float x, y, h; + uint32_t hx, hy, ix, iy; + + const float huge = 1.70141183460469231731687303716e+38; //0x1p127; + + x = z.real(); + y = z.imag(); + + get_float_word(hx, x); + get_float_word(hy, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + if (ix < 0x7f800000 && iy < 0x7f800000) { + if (iy == 0) + return (complex(sinhf(x), y)); + if (ix < 0x41100000) /* small x: normal case */ + return (complex(sinhf(x) * cosf(y), coshf(x) * sinf(y))); + + /* |x| >= 9, so cosh(x) ~= exp(|x|) */ + if (ix < 0x42b17218) { + /* x < 88.7: expf(|x|) won't overflow */ + h = expf(fabsf(x)) * 0.5f; + return (complex(copysignf(h, x) * cosf(y), h * sinf(y))); + } else if (ix < 0x4340b1e7) { + /* x < 192.7: scale to avoid overflow */ + complex z_ = ldexp_cexpf(complex(fabsf(x), y), -1); + return (complex(z_.real() * copysignf(1.0f, x), z_.imag())); + } else { + /* x >= 192.7: the result always overflows */ + h = huge * x; + return (complex(h * cosf(y), h * h * sinf(y))); + } + } + + if (ix == 0 && iy >= 0x7f800000) + return (complex(copysignf(0, x * (y - y)), y - y)); + + if (iy == 0 && ix >= 0x7f800000) { + if ((hx & 0x7fffff) == 0) + return (complex(x, y)); + return (complex(x, copysignf(0.0f, y))); + } + + if (ix < 0x7f800000 && iy >= 0x7f800000) + return (complex(y - y, x * (y - y))); + + if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) { + if (iy >= 0x7f800000) + return (complex(x * x, x * (y - y))); + return (complex(x * cosf(y), infinity() * sinf(y))); + } + + return (complex((x * x) * (y - y), (x + x) * (y - y))); +} + +__host__ __device__ inline +complex csinf(complex z){ + z = csinhf(complex(-z.imag(), z.real())); + return (complex(z.imag(), -z.real())); +} + +} // namespace complex + +} // namespace detail + +template <> +__host__ __device__ +inline complex sin(const complex& z){ + return detail::complex::csinf(z); +} + +template <> +__host__ __device__ +inline complex sinh(const complex& z){ + return detail::complex::csinhf(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/csqrt.h b/thrust/thrust/detail/complex/csqrt.h new file mode 100644 index 00000000000..41eeca29245 --- /dev/null +++ b/thrust/thrust/detail/complex/csqrt.h @@ -0,0 +1,162 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2007 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Adapted from FreeBSD by Filipe Maia : + * freebsd/lib/msun/src/s_csqrt.c + */ + + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ inline +complex csqrt(const complex& z){ + complex result; + double a, b; + double t; + int scale; + + /* We risk spurious overflow for components >= DBL_MAX / (1 + sqrt(2)). */ + const double THRESH = 7.446288774449766337959726e+307; + + a = z.real(); + b = z.imag(); + + /* Handle special cases. */ + if (z == 0.0) + return (complex(0.0, b)); + if (isinf(b)) + return (complex(infinity(), b)); + if (isnan(a)) { + t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ + return (complex(a, t)); /* return NaN + NaN i */ + } + if (isinf(a)) { + /* + * csqrt(inf + NaN i) = inf + NaN i + * csqrt(inf + y i) = inf + 0 i + * csqrt(-inf + NaN i) = NaN +- inf i + * csqrt(-inf + y i) = 0 + inf i + */ + if (signbit(a)) + return (complex(fabs(b - b), copysign(a, b))); + else + return (complex(a, copysign(b - b, b))); + } + /* + * The remaining special case (b is NaN) is handled just fine by + * the normal code path below. + */ + + // DBL_MIN*2 + const double low_thresh = 4.450147717014402766180465e-308; + scale = 0; + + if (fabs(a) >= THRESH || fabs(b) >= THRESH) { + /* Scale to avoid overflow. */ + a *= 0.25; + b *= 0.25; + scale = 1; + }else if (fabs(a) <= low_thresh && fabs(b) <= low_thresh) { + /* Scale to avoid underflow. */ + a *= 4.0; + b *= 4.0; + scale = 2; + } + + + /* Algorithm 312, CACM vol 10, Oct 1967. */ + if (a >= 0.0) { + t = sqrt((a + hypot(a, b)) * 0.5); + result = complex(t, b / (2 * t)); + } else { + t = sqrt((-a + hypot(a, b)) * 0.5); + result = complex(fabs(b) / (2 * t), copysign(t, b)); + } + + /* Rescale. */ + if (scale == 1) + return (result * 2.0); + else if (scale == 2) + return (result * 0.5); + else + return (result); +} + +} // namespace complex + +} // namespace detail + +template +__host__ __device__ +inline complex sqrt(const complex& z){ + return thrust::polar(std::sqrt(thrust::abs(z)),thrust::arg(z)/ValueType(2)); +} + +template <> +__host__ __device__ +inline complex sqrt(const complex& z){ + return detail::complex::csqrt(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/csqrtf.h b/thrust/thrust/detail/complex/csqrtf.h new file mode 100644 index 00000000000..766995315f2 --- /dev/null +++ b/thrust/thrust/detail/complex/csqrtf.h @@ -0,0 +1,157 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2007 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Adapted from FreeBSD by Filipe Maia : + * freebsd/lib/msun/src/s_csqrt.c + */ + + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ inline +complex csqrtf(const complex& z){ + float a = z.real(), b = z.imag(); + float t; + int scale; + complex result; + + /* We risk spurious overflow for components >= FLT_MAX / (1 + sqrt(2)). */ + const float THRESH = 1.40949553037932e+38f; + + /* Handle special cases. */ + if (z == 0.0f) + return (complex(0, b)); + if (isinf(b)) + return (complex(infinity(), b)); + if (isnan(a)) { + t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ + return (complex(a, t)); /* return NaN + NaN i */ + } + if (isinf(a)) { + /* + * csqrtf(inf + NaN i) = inf + NaN i + * csqrtf(inf + y i) = inf + 0 i + * csqrtf(-inf + NaN i) = NaN +- inf i + * csqrtf(-inf + y i) = 0 + inf i + */ + if (signbit(a)) + return (complex(fabsf(b - b), copysignf(a, b))); + else + return (complex(a, copysignf(b - b, b))); + } + /* + * The remaining special case (b is NaN) is handled just fine by + * the normal code path below. + */ + + /* + * Unlike in the FreeBSD code we'll avoid using double precision as + * not all hardware supports it. + */ + + // FLT_MIN*2 + const float low_thresh = 2.35098870164458e-38f; + scale = 0; + + if (fabsf(a) >= THRESH || fabsf(b) >= THRESH) { + /* Scale to avoid overflow. */ + a *= 0.25f; + b *= 0.25f; + scale = 1; + }else if (fabsf(a) <= low_thresh && fabsf(b) <= low_thresh) { + /* Scale to avoid underflow. */ + a *= 4.f; + b *= 4.f; + scale = 2; + } + + /* Algorithm 312, CACM vol 10, Oct 1967. */ + if (a >= 0.0f) { + t = sqrtf((a + hypotf(a, b)) * 0.5f); + result = complex(t, b / (2.0f * t)); + } else { + t = sqrtf((-a + hypotf(a, b)) * 0.5f); + result = complex(fabsf(b) / (2.0f * t), copysignf(t, b)); + } + + /* Rescale. */ + if (scale == 1) + return (result * 2.0f); + else if (scale == 2) + return (result * 0.5f); + else + return (result); +} + +} // namespace complex + +} // namespace detail + +template <> +__host__ __device__ +inline complex sqrt(const complex& z){ + return detail::complex::csqrtf(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/ctanh.h b/thrust/thrust/detail/complex/ctanh.h new file mode 100644 index 00000000000..d7a4c8b74a6 --- /dev/null +++ b/thrust/thrust/detail/complex/ctanh.h @@ -0,0 +1,210 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Adapted from FreeBSD by Filipe Maia : + * freebsd/lib/msun/src/s_ctanh.c + */ + +/* + * Hyperbolic tangent of a complex argument z = x + i y. + * + * The algorithm is from: + * + * W. Kahan. Branch Cuts for Complex Elementary Functions or Much + * Ado About Nothing's Sign Bit. In The State of the Art in + * Numerical Analysis, pp. 165 ff. Iserles and Powell, eds., 1987. + * + * Method: + * + * Let t = tan(x) + * beta = 1/cos^2(y) + * s = sinh(x) + * rho = cosh(x) + * + * We have: + * + * tanh(z) = sinh(z) / cosh(z) + * + * sinh(x) cos(y) + i cosh(x) sin(y) + * = --------------------------------- + * cosh(x) cos(y) + i sinh(x) sin(y) + * + * cosh(x) sinh(x) / cos^2(y) + i tan(y) + * = ------------------------------------- + * 1 + sinh^2(x) / cos^2(y) + * + * beta rho s + i t + * = ---------------- + * 1 + beta s^2 + * + * Modifications: + * + * I omitted the original algorithm's handling of overflow in tan(x) after + * verifying with nearpi.c that this can't happen in IEEE single or double + * precision. I also handle large x differently. + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ inline +complex ctanh(const complex& z){ + double x, y; + double t, beta, s, rho, denom; + uint32_t hx, ix, lx; + + x = z.real(); + y = z.imag(); + + extract_words(hx, lx, x); + ix = hx & 0x7fffffff; + + /* + * ctanh(NaN + i 0) = NaN + i 0 + * + * ctanh(NaN + i y) = NaN + i NaN for y != 0 + * + * The imaginary part has the sign of x*sin(2*y), but there's no + * special effort to get this right. + * + * ctanh(+-Inf +- i Inf) = +-1 +- 0 + * + * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite + * + * The imaginary part of the sign is unspecified. This special + * case is only needed to avoid a spurious invalid exception when + * y is infinite. + */ + if (ix >= 0x7ff00000) { + if ((ix & 0xfffff) | lx) /* x is NaN */ + return (complex(x, (y == 0 ? y : x * y))); + set_high_word(x, hx - 0x40000000); /* x = copysign(1, x) */ + return (complex(x, copysign(0.0, isinf(y) ? y : sin(y) * cos(y)))); + } + + /* + * ctanh(x + i NAN) = NaN + i NaN + * ctanh(x +- i Inf) = NaN + i NaN + */ + if (!isfinite(y)) + return (complex(y - y, y - y)); + + /* + * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the + * approximation sinh^2(huge) ~= exp(2*huge) / 4. + * We use a modified formula to avoid spurious overflow. + */ + if (ix >= 0x40360000) { /* x >= 22 */ + double exp_mx = exp(-fabs(x)); + return (complex(copysign(1.0, x), + 4.0 * sin(y) * cos(y) * exp_mx * exp_mx)); + } + + /* Kahan's algorithm */ + t = tan(y); + beta = 1.0 + t * t; /* = 1 / cos^2(y) */ + s = sinh(x); + rho = sqrt(1.0 + s * s); /* = cosh(x) */ + denom = 1.0 + beta * s * s; + return (complex((beta * rho * s) / denom, t / denom)); +} + +__host__ __device__ inline +complex ctan(complex z){ + /* ctan(z) = -I * ctanh(I * z) */ + z = ctanh(complex(-z.imag(), z.real())); + return (complex(z.imag(), -z.real())); +} + +} // namespace complex + +} // namespace detail + + +template +__host__ __device__ +inline complex tan(const complex& z){ + return sin(z)/cos(z); +} + +template +__host__ __device__ +inline complex tanh(const complex& z){ + // This implementation seems better than the simple sin/cos + return (thrust::exp(ValueType(2)*z)-ValueType(1))/ + (thrust::exp(ValueType(2)*z)+ValueType(1)); +} + +template <> +__host__ __device__ +inline complex tan(const complex& z){ + return detail::complex::ctan(z); +} + +template <> +__host__ __device__ +inline complex tanh(const complex& z){ + return detail::complex::ctanh(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/ctanhf.h b/thrust/thrust/detail/complex/ctanhf.h new file mode 100644 index 00000000000..a7c8b4331b6 --- /dev/null +++ b/thrust/thrust/detail/complex/ctanhf.h @@ -0,0 +1,134 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Adapted from FreeBSD by Filipe Maia, filipe.c.maia@gmail.com: + * freebsd/lib/msun/src/s_ctanhf.c + */ + +/* + * Hyperbolic tangent of a complex argument z. See ctanh.c for details. + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +__host__ __device__ inline +complex ctanhf(const complex& z){ + float x, y; + float t, beta, s, rho, denom; + uint32_t hx, ix; + + x = z.real(); + y = z.imag(); + + get_float_word(hx, x); + ix = hx & 0x7fffffff; + + if (ix >= 0x7f800000) { + if (ix & 0x7fffff) + return (complex(x, (y == 0.0f ? y : x * y))); + set_float_word(x, hx - 0x40000000); + return (complex(x, + copysignf(0, isinf(y) ? y : sinf(y) * cosf(y)))); + } + + if (!isfinite(y)) + return (complex(y - y, y - y)); + + if (ix >= 0x41300000) { /* x >= 11 */ + float exp_mx = expf(-fabsf(x)); + return (complex(copysignf(1.0f, x), + 4.0f * sinf(y) * cosf(y) * exp_mx * exp_mx)); + } + + t = tanf(y); + beta = 1.0f + t * t; + s = sinhf(x); + rho = sqrtf(1.0f + s * s); + denom = 1.0f + beta * s * s; + return (complex((beta * rho * s) / denom, t / denom)); +} + + __host__ __device__ inline + complex ctanf(complex z){ + z = ctanhf(complex(-z.imag(), z.real())); + return (complex(z.imag(), -z.real())); + } + +} // namespace complex + +} // namespace detail + +template <> +__host__ __device__ +inline complex tan(const complex& z){ + return detail::complex::ctanf(z); +} + +template <> +__host__ __device__ +inline complex tanh(const complex& z){ + return detail::complex::ctanhf(z); +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/complex/math_private.h b/thrust/thrust/detail/complex/math_private.h new file mode 100644 index 00000000000..35c239dd46a --- /dev/null +++ b/thrust/thrust/detail/complex/math_private.h @@ -0,0 +1,145 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* adapted from FreeBSD: + * lib/msun/src/math_private.h + */ +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +THRUST_NAMESPACE_BEGIN +namespace detail{ +namespace complex{ + +using thrust::complex; + +typedef union +{ + float value; + uint32_t word; +} ieee_float_shape_type; + +__host__ __device__ +inline void get_float_word(uint32_t & i, float d){ + ieee_float_shape_type gf_u; + gf_u.value = (d); + (i) = gf_u.word; +} + +__host__ __device__ +inline void get_float_word(int32_t & i, float d){ + ieee_float_shape_type gf_u; + gf_u.value = (d); + (i) = gf_u.word; +} + +__host__ __device__ +inline void set_float_word(float & d, uint32_t i){ + ieee_float_shape_type sf_u; + sf_u.word = (i); + (d) = sf_u.value; +} + +// Assumes little endian ordering +typedef union +{ + double value; + struct + { + uint32_t lsw; + uint32_t msw; + } parts; + struct + { + uint64_t w; + } xparts; +} ieee_double_shape_type; + +__host__ __device__ inline +void get_high_word(uint32_t & i,double d){ + ieee_double_shape_type gh_u; + gh_u.value = (d); + (i) = gh_u.parts.msw; +} + +/* Set the more significant 32 bits of a double from an int. */ +__host__ __device__ inline +void set_high_word(double & d, uint32_t v){ + ieee_double_shape_type sh_u; + sh_u.value = (d); + sh_u.parts.msw = (v); + (d) = sh_u.value; +} + + +__host__ __device__ inline +void insert_words(double & d, uint32_t ix0, uint32_t ix1){ + ieee_double_shape_type iw_u; + iw_u.parts.msw = (ix0); + iw_u.parts.lsw = (ix1); + (d) = iw_u.value; +} + +/* Get two 32 bit ints from a double. */ +__host__ __device__ inline +void extract_words(uint32_t & ix0,uint32_t & ix1, double d){ + ieee_double_shape_type ew_u; + ew_u.value = (d); + (ix0) = ew_u.parts.msw; + (ix1) = ew_u.parts.lsw; +} + +/* Get two 32 bit ints from a double. */ +__host__ __device__ inline +void extract_words(int32_t & ix0,int32_t & ix1, double d){ + ieee_double_shape_type ew_u; + ew_u.value = (d); + (ix0) = ew_u.parts.msw; + (ix1) = ew_u.parts.lsw; +} + +} // namespace complex + +} // namespace detail + +THRUST_NAMESPACE_END + + +#include diff --git a/thrust/thrust/detail/complex/stream.h b/thrust/thrust/detail/complex/stream.h new file mode 100644 index 00000000000..2889118faf2 --- /dev/null +++ b/thrust/thrust/detail/complex/stream.h @@ -0,0 +1,82 @@ +/* + * Copyright 2008-2021 NVIDIA Corporation + * Copyright 2013 Filipe RNC Maia + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +THRUST_NAMESPACE_BEGIN +template +std::basic_ostream& operator<<(std::basic_ostream& os, const complex& z) +{ + os << '(' << z.real() << ',' << z.imag() << ')'; + return os; +} + +template +std::basic_istream& +operator>>(std::basic_istream& is, complex& z) +{ + ValueType re, im; + + charT ch; + is >> ch; + + if(ch == '(') + { + is >> re >> ch; + if (ch == ',') + { + is >> im >> ch; + if (ch == ')') + { + z = complex(re, im); + } + else + { + is.setstate(std::ios_base::failbit); + } + } + else if (ch == ')') + { + z = re; + } + else + { + is.setstate(std::ios_base::failbit); + } + } + else + { + is.putback(ch); + is >> re; + z = re; + } + return is; +} + +THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/type_traits.h b/thrust/thrust/detail/type_traits.h index 008cf0d1828..31598bc479c 100644 --- a/thrust/thrust/detail/type_traits.h +++ b/thrust/thrust/detail/type_traits.h @@ -660,13 +660,6 @@ template typedef T1 type; }; -template -struct has_promoted_numerical_type : public false_type {}; - -template -struct has_promoted_numerical_type::type>> - : public true_type {}; - template struct is_empty_helper : public T { diff --git a/thrust/thrust/system/detail/generic/sequence.inl b/thrust/thrust/system/detail/generic/sequence.inl index 966b01c94a6..c7a812e5113 100644 --- a/thrust/thrust/system/detail/generic/sequence.inl +++ b/thrust/thrust/system/detail/generic/sequence.inl @@ -25,12 +25,11 @@ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) # pragma system_header #endif // no system header + #include #include #include -#include - THRUST_NAMESPACE_BEGIN namespace system { @@ -90,20 +89,6 @@ struct compute_sequence_value:: return init + step * static_cast(i); } }; - -template -struct compute_sequence_value<::cuda::std::complex, ::cuda::std::__enable_if_t<::cuda::std::is_arithmetic::value>> -{ - ::cuda::std::complex init; - ::cuda::std::complex step; - - __thrust_exec_check_disable__ - __host__ __device__ - ::cuda::std::complex operator()(std::size_t i) const - { - return init + step * static_cast(i); - } -}; } template