From 5af9da9812e61587e7835d90343d92aef88dd263 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sat, 25 May 2024 17:00:11 -0600 Subject: [PATCH] Backup --- lapack/src/KokkosLapack_geqrf.hpp | 44 +-- .../tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp | 4 +- lapack/unit_test/Test_Lapack_geqrf.hpp | 370 ++++++++++++++---- 3 files changed, 311 insertions(+), 107 deletions(-) diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 4c920e9a74..a81ae2a436 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -34,7 +34,7 @@ namespace KokkosLapack { /// /// \tparam ExecutionSpace The space where the kernel will run. /// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. -/// \tparam TArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. /// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. /// /// \param space [in] Execution space instance used to specified how to execute @@ -48,10 +48,9 @@ namespace KokkosLapack { /// is represented as a product of elementary reflectors /// Q = H(1) H(2) . . . H(k), where k = min(M,N). /// Each H(i) has the form -/// H(i) = I - Tau * v * v**H -/// where tau is a complex scalar, and v is a complex vector -/// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on -/// exit in A(i+1:M,i), and tau in Tau(i). +/// H(i) = I - Tau(i) * v * v**H, +/// where v is a vector with v(1:i-1) = 0 and v(i) = 1; +/// v(i+1:M) is stored on exit in A(i+1:M,i). /// \param Tau [out] One-dimensional array of size min(M,N) that contains the /// scalar factors of the elementary reflectors. /// \param Info [out] One-dimensional array of integers and of size 1: @@ -59,8 +58,8 @@ namespace KokkosLapack { /// Info[0] < 0: if equal to '-i', the i-th argument had an /// illegal value /// -template -void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, +template +void geqrf(const ExecutionSpace& space, const AMatrix& A, const TauArray& Tau, const InfoArray& Info) { // NOTE: Currently, KokkosLapack::geqrf only supports LAPACK, MAGMA and // rocSOLVER TPLs. @@ -73,21 +72,21 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, typename AMatrix::memory_space>::accessible); static_assert( Kokkos::SpaceAccessibility::accessible); + typename TauArray::memory_space>::accessible); static_assert( Kokkos::SpaceAccessibility::accessible); static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: A must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, + static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: Tau must be Kokkos::View."); static_assert(Kokkos::is_view::value, "KokkosLapack::geqrf: Info must be Kokkos::View."); static_assert(static_cast(AMatrix::rank) == 2, "KokkosLapack::geqrf: A must have rank 2."); - static_assert(static_cast(TArray::rank) == 1, + static_assert(static_cast(TauArray::rank) == 1, "KokkosLapack::geqrf: Tau must have rank 1."); static_assert(static_cast(InfoArray::rank) == 1, "KokkosLapack::geqrf: Info must have rank 1."); @@ -118,9 +117,9 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, using AMatrix_Internal = Kokkos::View< typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, typename AMatrix::device_type, Kokkos::MemoryTraits>; - using TArray_Internal = - Kokkos::View>; using InfoArray_Internal = Kokkos::View>; - AMatrix_Internal A_i = A; - TArray_Internal Tau_i = Tau; + AMatrix_Internal A_i = A; + TauArray_Internal Tau_i = Tau; InfoArray_Internal Info_i = Info; - KokkosLapack::Impl::GEQRF::geqrf(space, A_i, Tau_i, Info_i); } @@ -140,7 +139,7 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, /// \brief Computes a QR factorization of a matrix A /// /// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. -/// \tparam TArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. /// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. /// /// \param A [in,out] On entry, the M-by-N matrix to be factorized. @@ -152,10 +151,9 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, /// is represented as a product of elementary reflectors /// Q = H(1) H(2) . . . H(k), where k = min(M,N). /// Each H(i) has the form -/// H(i) = I - Tau * v * v**H -/// where tau is a complex scalar, and v is a complex vector -/// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on -/// exit in A(i+1:M,i), and tau in Tau(i). +/// H(i) = I - Tau(i) * v * v**H, +/// where v is a vector with v(1:i-1) = 0 and v(i) = 1; +/// v(i+1:M) is stored on exit in A(i+1:M,i). /// \param Tau [out] One-dimensional array of size min(M,N) that contains the /// scalar factors of the elementary reflectors. /// \param Info [out] One-dimensional array of integers and of size 1: @@ -163,8 +161,8 @@ void geqrf(const ExecutionSpace& space, const AMatrix& A, const TArray& Tau, /// Info[0] < 0: if equal to '-i', the i-th argument had an /// illegal value /// -template -void geqrf(const AMatrix& A, const TArray& Tau, const InfoArray& Info) { +template +void geqrf(const AMatrix& A, const TauArray& Tau, const InfoArray& Info) { typename AMatrix::execution_space space{}; geqrf(space, A, Tau, Info); } diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index d9f88549aa..c7630cc783 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -172,7 +172,7 @@ KOKKOSLAPACK_GEQRF_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK -#if 0 // AquiEEP +#if 0 // TO DO // MAGMA #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA @@ -270,7 +270,7 @@ KOKKOSLAPACK_GEQRF_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA -#endif // AquiEEP +#endif // TO DO // CUSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index f619c8fba3..2a4533b8bc 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -24,63 +24,127 @@ (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ defined(TEST_THREADS_LAPACK_CPP))) -// AquiEEP - #include #include #include +#include +#include #include -//#include -//#include #include namespace Test { template void getQR(int const m, int const n, - typename ViewTypeA::HostMirror const& // h_A - , - typename ViewTypeTau::HostMirror const& // h_tau - , - typename ViewTypeA::HostMirror& // h_Q - , + typename ViewTypeA::HostMirror const& h_A, + typename ViewTypeTau::HostMirror const& h_tau, + typename ViewTypeA::HostMirror& h_Q, typename ViewTypeA::HostMirror& h_R, - typename ViewTypeA::HostMirror& // h_QR + typename ViewTypeA::HostMirror& h_QR ) { using ScalarA = typename ViewTypeA::value_type; + // Populate h_R for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - if constexpr (Kokkos::ArithTraits::is_complex) { - h_R(i, j).real() = 0.; - h_R(i, j).imag() = 0.; - } else { - h_R(i, j) = 0.; + if ((i <= j) && (i < n)) { + h_R(i,j) = h_A(i,j); + } + else { + h_R(i,j) = Kokkos::ArithTraits::zero(); } } } + // Instantiate the identity matrix ViewTypeA I("I", m, m); typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); + Kokkos::deep_copy(h_I,Kokkos::ArithTraits::zero()); for (int i(0); i < m; ++i) { - for (int j(0); j < m; ++j) { - if constexpr (Kokkos::ArithTraits::is_complex) { - if (i == j) { - h_I(i, j).real() = 1.; - } else { - h_I(i, j).real() = 0.; - } - h_I(i, j).imag() = 0.; - } else { - if (i == j) { - h_I(i, j) = 1.; - } else { - h_I(i, j) = 0.; - } - } + if constexpr (Kokkos::ArithTraits::is_complex) { + h_I(i,i).real() = 1.; + } else { + h_I(i,i) = 1.; } } + + // Populate h_Q + int minMN(std::min(m, n)); + ViewTypeTau v("v", m); + typename ViewTypeTau::HostMirror h_v = Kokkos::create_mirror_view(v); + + ViewTypeA Qk("Qk", m, m); + typename ViewTypeA::HostMirror h_Qk = Kokkos::create_mirror_view(Qk); + + ViewTypeA auxM("auxM", m, m); + typename ViewTypeA::HostMirror h_auxM = Kokkos::create_mirror_view(auxM); + + // Q = H(0) H(1) . . . H(min(M,N)-1), where for k=0,1,...,min(m,n)-1: + // H(k) = I - Tau(k) * v * v**H, and + // v is a vector of size m with: + // v(0:k-1) = 0, + // v(k) = 1, + // v(k+1:m-1) = A(k+1:m-1,k). + for (int k(0); k < minMN; ++k) { + Kokkos::deep_copy(h_v,Kokkos::ArithTraits::zero()); + h_v[k] = 1.; + for (int index(k+1); index < minMN; ++index) { + h_v[index] = h_A(index,k); + } + + // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}. + // void ger( const char trans[] + // , const typename AViewType::const_value_type & alpha + // , const XViewType & x + // , const YViewType & y + // , const AViewType & A + // ); + Kokkos::deep_copy(h_Qk, h_I); + KokkosBlas::ger( "H" + , -h_tau[k] + , h_v + , h_v + , h_Qk + ); + + // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). + // void gemm( const char transA[] + // , const char transB[] + // , typename AViewType::const_value_type & alpha + // , const AViewType & A + // , const BViewType & B + // , typename CViewType::const_value_type & beta + // , const CViewType & C + // ); + if (k == 0) { + Kokkos::deep_copy(h_Q, h_Qk); + } + else { + Kokkos::deep_copy(h_auxM, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm( "N" + , "N" + , 1. + , h_Q + , h_Qk + , 0. + , h_auxM + ); + Kokkos::deep_copy(h_Q, h_auxM); + } + } // for k + + Kokkos::deep_copy(h_QR, Kokkos::ArithTraits::zero()); + KokkosBlas::gemm( "N" + , "N" + , 1. + , h_Q + , h_R + , 0. + , h_QR + ); + + // AquiEEP: test Q^H Q = I } template @@ -88,7 +152,7 @@ void impl_test_geqrf(int m, int n) { using ViewTypeInfo = Kokkos::View; using execution_space = typename Device::execution_space; using ScalarA = typename ViewTypeA::value_type; - // using ats = Kokkos::ArithTraits; + using ats = Kokkos::ArithTraits; execution_space space{}; @@ -97,13 +161,14 @@ void impl_test_geqrf(int m, int n) { int minMN(std::min(m, n)); // Create device views - ViewTypeA A("A", m, n); - ViewTypeTau Tau("Tau", minMN); - ViewTypeInfo Info("Info", 1); + ViewTypeA A ("A", m, n); + ViewTypeA Aorig("Aorig", m, n); + ViewTypeTau Tau ("Tau", minMN); + ViewTypeInfo Info ("Info", 1); // Create host mirrors of device views. typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); - typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(Aorig); typename ViewTypeTau::HostMirror h_tau = Kokkos::create_mirror_view(Tau); typename ViewTypeInfo::HostMirror h_info = Kokkos::create_mirror_view(Info); @@ -124,7 +189,7 @@ void impl_test_geqrf(int m, int n) { for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - h_A(i, j).imag() = 0.; + h_A(i,j).imag() = 0.; } } } else { @@ -151,10 +216,10 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_Aorig, h_A); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#ifdef HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "A(" << i << "," << j << ") = " << h_A(i, j) << std::endl; + std::cout << "Aorig(" << i << "," << j << ") = " << h_A(i,j) << std::endl; } } #endif @@ -180,18 +245,86 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_tau, Tau); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "info[0] = " << h_info[0] << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { - std::cout << "R(" << i << "," << j << ") = " << h_A(i, j) << std::endl; + std::cout << "Aoutput(" << i << "," << j << ") = " << std::setprecision(16) << h_A(i,j) << std::endl; } } for (int i(0); i < minMN; ++i) { - std::cout << "tau(" << i << ") = " << h_tau[i] << std::endl; + std::cout << "tau(" << i << ") = " << h_tau[i] << std::setprecision(16) << std::endl; } #endif + const typename Kokkos::ArithTraits::mag_type absTol(1.e-8); + + if ((m == 3) && (n == 3)) { + std::vector> refMatrix(m); + for (int i(0); i < m; ++i) { + refMatrix[i].resize(n,Kokkos::ArithTraits::zero()); + } + + std::vector refTau(m,Kokkos::ArithTraits::zero()); + + if constexpr (Kokkos::ArithTraits::is_complex) { + refMatrix[0][0].real() = -14.; + refMatrix[0][1].real() = -21.; + refMatrix[0][2].real() = 14.; + + refMatrix[1][0].real() = 0.2307692307692308; + refMatrix[1][1].real() = -175.; + refMatrix[1][2].real() = 70.; + + refMatrix[2][0].real() = -0.1538461538461539; + refMatrix[2][1].real() = 1./18.; + refMatrix[2][2].real() = -35.; + + refTau[0].real() = 1.857142857142857; + refTau[1].real() = 1.993846153846154; + refTau[2].real() = 0.; + } + else { + refMatrix[0][0] = -14.; + refMatrix[0][1] = -21.; + refMatrix[0][2] = 14.; + + refMatrix[1][0] = 0.2307692307692308; + refMatrix[1][1] = -175.; + refMatrix[1][2] = 70.; + + refMatrix[2][0] = -0.1538461538461539; + refMatrix[2][1] = 1./18.; + refMatrix[2][2] = -35.; + + refTau[0] = 1.857142857142857; + refTau[1] = 1.993846153846154; + refTau[2] = 0.; + } + + { + bool test_flag_A = true; + for (int i(0); (i < m) && test_flag_A; ++i) { + for (int j(0); (j < n) && test_flag_A; ++j) { + if (ats::abs(h_A(i,j) - refMatrix[i][j]) > absTol) { + test_flag_A = false; + } + } + } + ASSERT_EQ(test_flag_A, true); + } + + { + bool test_flag_tau = true; + for (int i(0); (i < m) && test_flag_tau; ++i) { + if (ats::abs(h_tau[i] - refTau[i]) > absTol) { + test_flag_tau = false; + } + } + ASSERT_EQ(test_flag_tau, true); + } + } + ViewTypeA Q("Q", m, m); ViewTypeA R("R", m, n); ViewTypeA QR("QR", m, n); @@ -202,65 +335,135 @@ void impl_test_geqrf(int m, int n) { getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); -#if 1 // def HAVE_KOKKOSKERNELS_DEBUG +#ifdef HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < m; ++j) { - std::cout << "Q(" << i << "," << j << ") = " << h_Q(i, j) << std::endl; + std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; } } for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "R(" << i << "," << j << ") = " << h_R(i, j) << std::endl; + std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl; } } for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - std::cout << "QR(" << i << "," << j << ") = " << h_QR(i, j) << std::endl; + std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl; } } #endif if ((m == 3) && (n == 3)) { - } + std::vector> refQ(m); + for (int i(0); i < m; ++i) { + refQ[i].resize(n,Kokkos::ArithTraits::zero()); + } - // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). - // void gemm( const execution_space & space - // , const char transA[] - // , const char transB[] - // , typename AViewType::const_value_type & alpha - // , const AViewType & A - // , const BViewType & B - // , typename CViewType::const_value_type & beta - // , const CViewType & C - // ); - - // Rank-1 update of a general matrix: A = A + alpha * x * y^{T,H}. - // void ger( const ExecutionSpace & space - // , const char trans[] - // , const typename AViewType::const_value_type & alpha - // , const XViewType & x - // , const YViewType & y - // , const AViewType & A - // ); - - // Checking vs ref on CPU, this eps is about 10^-9 - // typedef typename ats::mag_type mag_type; - // const mag_type eps = 3.0e7 * ats::epsilon(); - bool test_flag = true; - for (int i = 0; i < n; i++) { -#if 0 - if (ats::abs(h_B(i) - h_X0(i)) > eps) { - test_flag = false; - printf( - " Error %d, pivot %c, padding %c: result( %.15lf ) !=" - "solution( %.15lf ) at (%d), error=%.15e, eps=%.15e\n", - N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), int(i), - ats::abs(h_B(i) - h_X0(i)), eps); - break; + std::vector> refR(m); + for (int i(0); i < m; ++i) { + refR[i].resize(n,Kokkos::ArithTraits::zero()); } + +#if 0 + Q = [ -6/7 69/175 58/175 + -3/7 -158/175 -6/175 + 2/7 -6/35 33/35 ] + + R = [ -14 -21 14 + 0 -175 70 + 0 0 -35 ] #endif + + if constexpr (Kokkos::ArithTraits::is_complex) { + refQ[0][0].real() = -6./7.; + refQ[0][1].real() = 69./175.; + refQ[0][2].real() = 58./175.; + + refQ[1][0].real() = -3./7.; + refQ[1][1].real() = -158./175.; + refQ[1][2].real() = -6./175.; + + refQ[2][0].real() = 2./7.; + refQ[2][1].real() = -6./35.; + refQ[2][2].real() = 33./35.; + + refR[0][0].real() = -14.; + refR[0][1].real() = -21.; + refR[0][2].real() = 14.; + + refR[1][1].real() = -175.; + refR[1][2].real() = 70.; + + refR[2][2].real() = -35.; + } + else { + refQ[0][0] = -6./7.; + refQ[0][1] = 69./175.; + refQ[0][2] = 58./175.; + + refQ[1][0] = -3./7.; + refQ[1][1] = -158./175.; + refQ[1][2] = -6./175.; + + refQ[2][0] = 2./7.; + refQ[2][1] = -6./35.; + refQ[2][2] = 33./35.; + + refR[0][0] = -14.; + refR[0][1] = -21.; + refR[0][2] = 14.; + + refR[1][1] = -175.; + refR[1][2] = 70.; + + refR[2][2] = -35.; + } + + { + bool test_flag_Q = true; + for (int i(0); (i < m) && test_flag_Q; ++i) { + for (int j(0); (j < n) && test_flag_Q; ++j) { + if (ats::abs(h_Q(i,j) - refQ[i][j]) > absTol) { + test_flag_Q = false; + } + } + } + ASSERT_EQ(test_flag_Q, true); + } + + { + bool test_flag_R = true; + for (int i(0); (i < m) && test_flag_R; ++i) { + for (int j(0); (j < n) && test_flag_R; ++j) { + if (ats::abs(h_R(i,j) - refR[i][j]) > absTol) { + test_flag_R = false; + } + } + } + ASSERT_EQ(test_flag_R, true); + } + } + + { + bool test_flag_QR = true; + for (int i(0); (i < m) && test_flag_QR; ++i) { + for (int j(0); (j < n) && test_flag_QR; ++j) { + if (ats::abs(h_QR(i,j) - h_Aorig(i,j)) > absTol) { + std::cout << "m = " << m + << ", n = " << n + << ", i = " << i + << ", j = " << j + << ", h_Aorig(i,j) = " << std::setprecision(16) << h_Aorig(i,j) + << ", h_QR(i,j) = " << std::setprecision(16) << h_QR(i,j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_QR(i,j) - h_Aorig(i,j)) + << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_QR = false; + } + } + } + ASSERT_EQ(test_flag_QR, true); } - ASSERT_EQ(test_flag, true); } } // namespace Test @@ -274,6 +477,9 @@ void test_geqrf() { using view_type_tau_ll = Kokkos::View; Test::impl_test_geqrf(3, 3); + Test::impl_test_geqrf(100, 100); + //Test::impl_test_geqrf(100, 70); // AquiEEP + Test::impl_test_geqrf(70, 100); #endif }