From 51bbcb3f2df53b3bbb475213329cf44f155ff286 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Wed, 10 May 2023 16:56:23 -0700 Subject: [PATCH 01/41] WIP: Refactor operator and vector linear algebra, adding ComplexVector, ComplexOperator, and the parallel (unassembled) ParOperator and ComplexParOperator This is a first step towards removing a lot of the dependency on PETSc for complex-valued linear algebra and streamlining the interface to support partial assembly. Includes the necessary changes for SpaceOperator, CurlCurlOperator, and LaplaceOperator in the matrix and vector construction. Also updates operator construction from driver classes. Next up, linear solvers. --- palace/drivers/drivensolver.cpp | 263 +++--- palace/drivers/eigensolver.cpp | 363 +++++---- palace/drivers/electrostaticsolver.cpp | 146 ++-- palace/drivers/electrostaticsolver.hpp | 4 +- palace/drivers/magnetostaticsolver.cpp | 136 ++-- palace/drivers/magnetostaticsolver.hpp | 4 +- palace/drivers/transientsolver.cpp | 6 +- palace/fem/freqdomain.hpp | 74 -- palace/fem/interpolator.hpp | 129 +++ palace/fem/lumpedelement.hpp | 2 +- palace/fem/multigrid.hpp | 44 +- palace/fem/operator.hpp | 105 --- palace/linalg/CMakeLists.txt | 4 + palace/linalg/ams.cpp | 3 + palace/linalg/arpack.hpp | 2 + palace/linalg/chebyshev.cpp | 282 ++++--- palace/linalg/chebyshev.hpp | 37 +- palace/linalg/complex.cpp | 899 +++++++++++++++++++++ palace/linalg/complex.hpp | 490 +++++++++++ palace/linalg/curlcurl.cpp | 19 +- palace/linalg/curlcurl.hpp | 10 +- palace/linalg/divfree.cpp | 8 +- palace/linalg/feast.cpp | 4 + palace/linalg/feast.hpp | 4 + palace/linalg/gmg.cpp | 2 + palace/linalg/gmg.hpp | 2 + palace/linalg/hypre.hpp | 2 + palace/linalg/jacobi.cpp | 28 + palace/linalg/jacobi.hpp | 35 + palace/linalg/operator.cpp | 444 ++++++++++ palace/linalg/operator.hpp | 235 ++++++ palace/linalg/petsc.cpp | 3 + palace/linalg/petsc.hpp | 4 +- palace/linalg/slepc.cpp | 3 + palace/linalg/vector.cpp | 83 ++ palace/linalg/vector.hpp | 50 ++ palace/main.cpp | 2 +- palace/models/curlcurloperator.cpp | 98 ++- palace/models/curlcurloperator.hpp | 30 +- palace/models/domainpostoperator.cpp | 19 +- palace/models/farfieldboundaryoperator.cpp | 12 +- palace/models/laplaceoperator.cpp | 104 ++- palace/models/laplaceoperator.hpp | 32 +- palace/models/lumpedportoperator.cpp | 10 +- palace/models/postoperator.cpp | 57 +- palace/models/postoperator.hpp | 30 +- palace/models/romoperator.cpp | 15 +- palace/models/romoperator.hpp | 6 +- palace/models/spaceoperator.cpp | 712 ++++++++-------- palace/models/spaceoperator.hpp | 178 ++-- palace/models/surfacepostoperator.cpp | 17 +- palace/models/timeoperator.cpp | 302 ++++--- palace/models/timeoperator.hpp | 12 +- palace/models/waveportoperator.cpp | 78 +- palace/models/waveportoperator.hpp | 2 +- palace/utils/configfile.cpp | 27 +- palace/utils/configfile.hpp | 7 + palace/utils/geodata.cpp | 16 +- 58 files changed, 4069 insertions(+), 1626 deletions(-) delete mode 100644 palace/fem/freqdomain.hpp create mode 100644 palace/fem/interpolator.hpp delete mode 100644 palace/fem/operator.hpp create mode 100644 palace/linalg/complex.cpp create mode 100644 palace/linalg/complex.hpp create mode 100644 palace/linalg/jacobi.cpp create mode 100644 palace/linalg/jacobi.hpp create mode 100644 palace/linalg/operator.cpp create mode 100644 palace/linalg/operator.hpp create mode 100644 palace/linalg/vector.cpp create mode 100644 palace/linalg/vector.hpp diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index 680a38fab..2d55986c2 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -5,7 +5,9 @@ #include #include +#include "linalg/complex.hpp" #include "linalg/ksp.hpp" +#include "linalg/operator.hpp" #include "linalg/pc.hpp" #include "linalg/petsc.hpp" #include "models/lumpedportoperator.hpp" @@ -22,6 +24,8 @@ namespace palace { +using namespace std::complex_literals; + void DrivenSolver::Solve(std::vector> &mesh, Timer &timer) const { @@ -112,30 +116,43 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in // simply by setting diagonal entries of the system matrix for the corresponding dofs. // Because the Dirichlet BC is always homogenous, no special elimination is required on // the RHS. Assemble the linear system for the initial frequency (so we can call - // KspSolver:: SetOperators). Compute everything at the first frequency step. - std::unique_ptr A = spaceop.GetSystemMatrixPetsc( - SpaceOperator::OperatorType::COMPLETE, omega0, mfem::Operator::DIAG_ONE); - std::unique_ptr NegCurl = spaceop.GetNegCurlMatrixPetsc(); + // KspSolver::SetOperators). Compute everything at the first frequency step. + + // XX TODO WIP + // std::unique_ptr A = spaceop.GetSystemMatrixPetsc( + // SpaceOperator::OperatorType::COMPLETE, omega0, mfem::Operator::DIAG_ONE); + + std::unique_ptr K = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::STIFFNESS, Operator::DIAG_ONE); + std::unique_ptr M = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::MASS, Operator::DIAG_ZERO); + std::unique_ptr C = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); + std::unique_ptr A2 = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::EXTRA, omega0, Operator::DIAG_ZERO); + std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); + std::unique_ptr A = spaceop.GetComplexSystemMatrix( + 1.0, 1i * omega0, -omega0 * omega0, K.get(), C.get(), M.get(), A2.get()); // Set up the linear solver and set operators for the first frequency step. The // preconditioner for the complex linear system is constructed from a real approximation // to the complex system matrix. - std::vector> P, AuxP; - spaceop.GetPreconditionerMatrix(omega0, P, AuxP); + std::vector> P, AuxP; + spaceop.GetPreconditionerMatrix(1.0, omega0, -omega0 * omega0, omega0, P, AuxP); - KspPreconditioner pc(iodata, spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), - &spaceop.GetH1Spaces()); - pc.SetOperator(P, &AuxP); + // KspPreconditioner pc(iodata, spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), + // &spaceop.GetH1Spaces()); + // pc.SetOperator(P, &AuxP); KspSolver ksp(A->GetComm(), iodata, "ksp_"); - ksp.SetPreconditioner(pc); - ksp.SetOperator(*A); + // ksp.SetPreconditioner(pc); //XX TODO! + // ksp.SetOperator(*A); // Set up RHS vector for the incident field at port boundaries, and the vector for the // first frequency step. - petsc::PetscParVector RHS(*NegCurl), E(*NegCurl), B(*NegCurl, true); - E.SetZero(); - B.SetZero(); + ComplexVector RHS(Curl->Width()), E(Curl->Width()), B(Curl->Height()); + E = 0.0; + B = 0.0; timer.construct_time += timer.Lap(); // Main frequency sweep loop. @@ -144,33 +161,35 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in auto t0 = timer.Now(); while (step < nstep) { - const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega); - Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1, - nstep, freq, Timer::Duration(timer.Now() - t0).count()); - - // Assemble the linear system and solve. - if (step > step0) - { - // Update frequency-dependent excitation and operators. - A = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::COMPLETE, omega, - mfem::Operator::DIAG_ONE, false); - spaceop.GetPreconditionerMatrix(omega, P, AuxP, false); - pc.SetOperator(P, &AuxP); - ksp.SetOperator(*A); - } - spaceop.GetFreqDomainExcitationVector(omega, RHS); - timer.construct_time += timer.Lap(); - - Mpi::Print("\n"); - ksp.Mult(RHS, E); - timer.solve_time += timer.Lap(); - + // const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega); + // Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1, + // nstep, freq, Timer::Duration(timer.Now() - t0).count()); + + // // Assemble the linear system and solve. + // if (step > step0) + // { + // // Update frequency-dependent excitation and operators. + // A = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::COMPLETE, omega, + // mfem::Operator::DIAG_ONE, false); + // spaceop.GetPreconditionerMatrix(omega, P, AuxP, false); + // pc.SetOperator(P, &AuxP); + // ksp.SetOperator(*A); + // } + // spaceop.GetFreqDomainExcitationVector(omega, RHS); + // timer.construct_time += timer.Lap(); + + // Mpi::Print("\n"); + // ksp.Mult(RHS, E); + // timer.solve_time += timer.Lap(); + + // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in + // PostOperator for all postprocessing operations. double E_elec = 0.0, E_mag = 0.0; - PostOperator::GetBField(omega, *NegCurl, E, B); - postop.SetEGridFunction(E); - postop.SetBGridFunction(B); + Curl->Mult(E, B); + B *= -1.0 / (1i * omega); + // postop.SetEGridFunction(E); //XX TODO petsc::PetscParVector + // postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); - // E.Print(); Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n", E.Norml2(), RHS.Norml2()); if (!iodata.solver.driven.only_port_post) { @@ -228,79 +247,79 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // Allocate negative curl matrix for postprocessing the B-field and vectors for the // high-dimensional field solution. - std::unique_ptr NegCurl = spaceop.GetNegCurlMatrixPetsc(); - petsc::PetscParVector E(*NegCurl), B(*NegCurl, true); - E.SetZero(); - B.SetZero(); - - // Configure the PROM operator which performs the parameter space sampling and basis - // construction during the offline phase as well as the PROM solution during the online - // phase. Initialize the basis with samples from the top and bottom of the frequency - // range of interest. Each call for an HDM solution adds the frequency sample to P_S and - // removes it from P \ P_S. - timer.construct_time += timer.Lap(); - Timer local_timer; - Mpi::Print("\nBeginning PROM construction offline phase:\n" - " {:d} points for frequency sweep over [{:.3e}, {:.3e}] GHz\n", - nstep - step0, - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega0), - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, - omega0 + (nstep - step0 - 1) * delta_omega)); - spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port stuff for offline - RomOperator prom(iodata, spaceop, nmax); - prom.Initialize(nstep - step0, omega0, delta_omega); - local_timer.construct_time += local_timer.Lap(); - - prom.SolveHDM(omega0, E, true); // Print matrix stats at first HDM solve - prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E, false); - local_timer.solve_time += local_timer.Lap(); - - // Greedy procedure for basis construction (offline phase). Basis is initialized with - // solutions at frequency sweep endpoints. - int iter = static_cast(prom.GetSampleFrequencies().size()), iter0 = iter; - double max_error = 1.0; - while (true) - { - // Compute maximum error in parameter domain with current PROM. - double omega_star; - max_error = prom.ComputeMaxError(ncand, omega_star); - local_timer.construct_time += local_timer.Lap(); - if (max_error < offline_tol || iter == nmax) - { - break; - } - - // Sample HDM and add solution to basis. - Mpi::Print( - "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n", - iter - iter0 + 1, prom.GetReducedDimension(), - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega_star), omega_star, - max_error); - prom.SolveHDM(omega_star, E); - local_timer.solve_time += local_timer.Lap(); - iter++; - } - { - std::vector samples(prom.GetSampleFrequencies()); - // samples.Sort(); - for (auto &sample : samples) - { - sample = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, sample); - } - Mpi::Print("\nAdaptive sampling{} {:d} frequency samples:\n" - " n = {:d}, error = {:.3e}, tol = {:.3e}\n", - (iter == nmax) ? " reached maximum" : " converged with", iter, - prom.GetReducedDimension(), max_error, offline_tol); - utils::PrettyPrint(samples, " Sampled frequencies (GHz):"); - } - SaveMetadata(prom.GetTotalKspMult(), prom.GetTotalKspIter()); - const auto local_construction_time = timer.Lap(); - timer.construct_time += local_construction_time; - Mpi::Print(" Total offline phase elapsed time: {:.2e} s\n" - " Parameter space sampling: {:.2e} s, HDM solves: {:.2e} s\n", - Timer::Duration(local_construction_time).count(), - Timer::Duration(local_timer.construct_time).count(), - Timer::Duration(local_timer.solve_time).count()); // Timings on rank 0 + std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); + ComplexVector E(Curl->Width()), B(Curl->Height()); + E = 0.0; + B = 0.0; + + // // Configure the PROM operator which performs the parameter space sampling and basis + // // construction during the offline phase as well as the PROM solution during the online + // // phase. Initialize the basis with samples from the top and bottom of the frequency + // // range of interest. Each call for an HDM solution adds the frequency sample to P_S + // and + // // removes it from P \ P_S. + // timer.construct_time += timer.Lap(); + // Timer local_timer; + // Mpi::Print("\nBeginning PROM construction offline phase:\n" + // " {:d} points for frequency sweep over [{:.3e}, {:.3e}] GHz\n", + // nstep - step0, + // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega0), + // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, + // omega0 + (nstep - step0 - 1) * delta_omega)); + // spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port stuff for + // offline RomOperator prom(iodata, spaceop, nmax); prom.Initialize(nstep - step0, omega0, + // delta_omega); local_timer.construct_time += local_timer.Lap(); + + // prom.SolveHDM(omega0, E, true); // Print matrix stats at first HDM solve + // prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E, false); + // local_timer.solve_time += local_timer.Lap(); + + // // Greedy procedure for basis construction (offline phase). Basis is initialized with + // // solutions at frequency sweep endpoints. + // int iter = static_cast(prom.GetSampleFrequencies().size()), iter0 = iter; + // double max_error = 1.0; + // while (true) + // { + // // Compute maximum error in parameter domain with current PROM. + // double omega_star; + // max_error = prom.ComputeMaxError(ncand, omega_star); + // local_timer.construct_time += local_timer.Lap(); + // if (max_error < offline_tol || iter == nmax) + // { + // break; + // } + + // // Sample HDM and add solution to basis. + // Mpi::Print( + // "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n", + // iter - iter0 + 1, prom.GetReducedDimension(), + // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega_star), omega_star, + // max_error); + // prom.SolveHDM(omega_star, E); + // local_timer.solve_time += local_timer.Lap(); + // iter++; + // } + // { + // std::vector samples(prom.GetSampleFrequencies()); + // // samples.Sort(); + // for (auto &sample : samples) + // { + // sample = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, sample); + // } + // Mpi::Print("\nAdaptive sampling{} {:d} frequency samples:\n" + // " n = {:d}, error = {:.3e}, tol = {:.3e}\n", + // (iter == nmax) ? " reached maximum" : " converged with", iter, + // prom.GetReducedDimension(), max_error, offline_tol); + // utils::PrettyPrint(samples, " Sampled frequencies (GHz):"); + // } + // SaveMetadata(prom.GetTotalKspMult(), prom.GetTotalKspIter()); + // const auto local_construction_time = timer.Lap(); + // timer.construct_time += local_construction_time; + // Mpi::Print(" Total offline phase elapsed time: {:.2e} s\n" + // " Parameter space sampling: {:.2e} s, HDM solves: {:.2e} s\n", + // Timer::Duration(local_construction_time).count(), + // Timer::Duration(local_timer.construct_time).count(), + // Timer::Duration(local_timer.solve_time).count()); // Timings on rank 0 // Main fast frequency sweep loop (online phase). Mpi::Print("\nBeginning fast frequency sweep online phase\n"); @@ -314,20 +333,24 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1, nstep, freq, Timer::Duration(timer.Now() - t0).count()); - // Assemble the linear system and solve. - prom.AssemblePROM(omega); - timer.construct_time += timer.Lap(); + // XX TODO PROM - Mpi::Print("\n"); - prom.SolvePROM(E); - timer.solve_time += timer.Lap(); + // // Assemble the linear system and solve. + // prom.AssemblePROM(omega); + // timer.construct_time += timer.Lap(); + + // Mpi::Print("\n"); + // prom.SolvePROM(E); + // timer.solve_time += timer.Lap(); + // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in + // PostOperator for all postprocessing operations. double E_elec = 0.0, E_mag = 0.0; - PostOperator::GetBField(omega, *NegCurl, E, B); + Curl->Mult(E, B); + B *= -1.0 / (1i * omega); postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); - // E.Print(); Mpi::Print(" Sol. ||E|| = {:.6e}\n", E.Norml2()); if (!iodata.solver.driven.only_port_post) { diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 11369c158..9173b7216 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -4,12 +4,12 @@ #include "eigensolver.hpp" #include -#include "fem/freqdomain.hpp" -#include "fem/operator.hpp" #include "linalg/arpack.hpp" +#include "linalg/complex.hpp" #include "linalg/divfree.hpp" #include "linalg/feast.hpp" #include "linalg/ksp.hpp" +#include "linalg/operator.hpp" #include "linalg/pc.hpp" #include "linalg/petsc.hpp" #include "linalg/slepc.hpp" @@ -33,18 +33,18 @@ void EigenSolver::Solve(std::vector> &mesh, // computational range. The damping matrix may be nullptr. timer.Lap(); SpaceOperator spaceop(iodata, mesh); - std::unique_ptr K = spaceop.GetSystemMatrixPetsc( - SpaceOperator::OperatorType::STIFFNESS, mfem::Operator::DIAG_ONE); - std::unique_ptr M = spaceop.GetSystemMatrixPetsc( - SpaceOperator::OperatorType::MASS, mfem::Operator::DIAG_ZERO); - std::unique_ptr C = spaceop.GetSystemMatrixPetsc( - SpaceOperator::OperatorType::DAMPING, mfem::Operator::DIAG_ZERO); - std::unique_ptr NegCurl = spaceop.GetNegCurlMatrixPetsc(); + std::unique_ptr K = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::STIFFNESS, Operator::DIAG_ONE); + std::unique_ptr M = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::MASS, Operator::DIAG_ZERO); + std::unique_ptr C = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); + std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); SaveMetadata(spaceop.GetNDSpace()); // Configure objects for postprocessing. PostOperator postop(iodata, spaceop, "eigenmode"); - petsc::PetscParVector E(*NegCurl), B(*NegCurl, true); + ComplexVector E(Curl->Width()), B(Curl->Height()); // Define and configure the eigensolver to solve the eigenvalue problem: // (K + λ C + λ² M) u = 0 or K u = -λ² M u @@ -79,6 +79,9 @@ void EigenSolver::Solve(std::vector> &mesh, #endif if (type == config::EigenSolverData::Type::FEAST) { + +#if 0 // XX TODO DISABLE FEAST EIGENSOLVER FOR NOW + Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); #if defined(PALACE_WITH_SLEPC) if (C) @@ -93,6 +96,8 @@ void EigenSolver::Solve(std::vector> &mesh, K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, iodata.problem.verbose); } +#endif + #endif } else if (type == config::EigenSolverData::Type::ARPACK) @@ -143,26 +148,32 @@ void EigenSolver::Solve(std::vector> &mesh, EigenSolverBase::ScaleType scale = iodata.solver.eigenmode.scale ? EigenSolverBase::ScaleType::NORM_2 : EigenSolverBase::ScaleType::NONE; - if (C) - { - eigen->SetOperators(*K, *C, *M, scale); - } - else - { - eigen->SetOperators(*K, *M, scale); - } - eigen->SetNumModes(iodata.solver.eigenmode.n, iodata.solver.eigenmode.max_size); - eigen->SetTol(iodata.solver.eigenmode.tol); - eigen->SetMaxIter(iodata.solver.eigenmode.max_it); - Mpi::Print(" Scaling γ = {:.3e}, δ = {:.3e}\n", eigen->GetScalingGamma(), - eigen->GetScalingDelta()); - - const double target = iodata.solver.eigenmode.target; - const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, target); - std::unique_ptr A; - std::vector> P, AuxP; - std::unique_ptr ksp; - std::unique_ptr pc; + + // XX TODO REVISIT BELOW... + + // if (C) + // { + // eigen->SetOperators(*K, *C, *M, scale); + // } + // else + // { + // eigen->SetOperators(*K, *M, scale); + // } + // eigen->SetNumModes(iodata.solver.eigenmode.n, iodata.solver.eigenmode.max_size); + // eigen->SetTol(iodata.solver.eigenmode.tol); + // eigen->SetMaxIter(iodata.solver.eigenmode.max_it); + // Mpi::Print(" Scaling γ = {:.3e}, δ = {:.3e}\n", eigen->GetScalingGamma(), + // eigen->GetScalingDelta()); + + // const double target = iodata.solver.eigenmode.target; + // const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, + // target); std::unique_ptr A; + // std::vector> P, AuxP; + // std::unique_ptr ksp; + // std::unique_ptr pc; + +#if 0 // XX TODO DISABLE FEAST EIGENSOLVER FOR NOW + #if defined(PALACE_WITH_SLEPC) auto *feast = dynamic_cast(eigen.get()); if (feast) @@ -213,140 +224,164 @@ void EigenSolver::Solve(std::vector> &mesh, } else #endif - { - Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); - if (C) - { - // Search for eigenvalues closest to λ = iσ. - eigen->SetShiftInvert(0.0, target); - if (type == config::EigenSolverData::Type::ARPACK) - { - // ARPACK searches based on eigenvalues of the transformed problem. The eigenvalue - // 1/(λ-σ) will be a large-magnitude negative imaginary number for an eigenvalue λ - // with frequency close to but not below the target σ. - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::SMALLEST_IMAGINARY); - } - else - { - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_IMAGINARY); - } - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); - } - else - { - // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = σ². - eigen->SetShiftInvert(target * target, 0.0); - if (type == config::EigenSolverData::Type::ARPACK) - { - // ARPACK searches based on eigenvalues of the transformed problem. 1/(μ-σ²) will be - // a large-magnitude positive real number for an eigenvalue μ with frequency close - // to but below the target σ². - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_REAL); - } - else - { - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_REAL); - } - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); - } - // Set up the linear solver required for solving systems involving the shifted operator - // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The - // preconditioner for complex linear systems is constructed from a real approximation - // to the complex system matrix. - A = utils::GetSystemMatrixShell(target, *K, *M, C.get()); - spaceop.GetPreconditionerMatrix(target, P, AuxP); - - pc = std::make_unique(iodata, spaceop.GetDbcMarker(), - spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); - pc->SetOperator(P, &AuxP); - - ksp = std::make_unique(A->GetComm(), iodata, "ksp_"); - ksp->SetPreconditioner(*pc); - ksp->SetOperator(*A); - ksp->SetTabLevel(1); - eigen->SetLinearSolver(*ksp); - } - - // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The - // constructed matrix just references the real SPD part of the mass matrix (no copy is - // performed). - std::unique_ptr Mr; - if (iodata.solver.eigenmode.mass_orthog) - { - // Mpi::Print(" Basis uses M-inner product\n"); - // Mr = std::make_unique( - // mesh.back()->GetComm(), - // std::make_unique(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL))); - - Mpi::Print(" Basis uses (K + M)-inner product\n"); - auto KM = std::make_unique(K->GetNumRows(), K->GetNumCols()); - KM->AddOperator(*K->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - KM->AddOperator(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - Mr = std::make_unique(mesh.back()->GetComm(), std::move(KM)); - - Mr->SetRealSymmetric(); - eigen->SetBMat(*Mr); - } - - // Construct a divergence-free projector so the eigenvalue solve is performed in the space - // orthogonal to the zero eigenvalues of the stiffness matrix. - std::unique_ptr divfree; - if (iodata.solver.linear.divfree_max_it > 0) - { - constexpr int divfree_verbose = 0; - divfree = std::make_unique( - spaceop.GetMaterialOp(), spaceop.GetAuxBdrMarker(), spaceop.GetNDSpace(), - spaceop.GetH1Spaces(), iodata.solver.linear.divfree_tol, - iodata.solver.linear.divfree_max_it, divfree_verbose); - eigen->SetProjector(*divfree); - } +#endif - // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and is - // projected appropriately. - if (iodata.solver.eigenmode.init_v0) - { - petsc::PetscParVector v0(*K); - if (iodata.solver.eigenmode.init_v0_const) - { - Mpi::Print(" Using constant starting vector\n"); - v0 = 1.0; - } - else - { - Mpi::Print(" Using random starting vector\n"); - v0.SetRandom(); - } - v0.ZeroRows(spaceop.GetDbcTDofList()); - if (divfree) - { - divfree->Mult(v0); - } - eigen->SetInitialSpace(v0); // Copies the vector - // { - // std::unique_ptr Grad = spaceop.GetGradMatrixPetsc(); - // petsc::PetscParVector r0(*Grad, false); - // Grad->MultTranspose(v0, r0); - // r0.Print(); - // } - } - timer.construct_time += timer.Lap(); + // { + // Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); + // if (C) + // { + // // Search for eigenvalues closest to λ = iσ. + // eigen->SetShiftInvert(0.0, target); + // if (type == config::EigenSolverData::Type::ARPACK) + // { + // // ARPACK searches based on eigenvalues of the transformed problem. The + // eigenvalue + // // 1/(λ-σ) will be a large-magnitude negative imaginary number for an eigenvalue + // λ + // // with frequency close to but not below the target σ. + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::SMALLEST_IMAGINARY); + // } + // else + // { + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_IMAGINARY); + // } + // // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); + // } + // else + // { + // // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = + // σ². eigen->SetShiftInvert(target * target, 0.0); if (type == + // config::EigenSolverData::Type::ARPACK) + // { + // // ARPACK searches based on eigenvalues of the transformed problem. 1/(μ-σ²) will + // be + // // a large-magnitude positive real number for an eigenvalue μ with frequency + // close + // // to but below the target σ². + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_REAL); + // } + // else + // { + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_REAL); + // } + // // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); + // } + + // // Set up the linear solver required for solving systems involving the shifted + // operator + // // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The + // // preconditioner for complex linear systems is constructed from a real approximation + // // to the complex system matrix. + + // XX TODO REPLACE... WITH SUM OPERATOR! + + // A = utils::GetSystemMatrixShell(target, *K, *M, C.get()); + + // // XX TODO MOVE THIS FOR LOG FILE... + // spaceop.GetPreconditionerMatrix(target, P, AuxP); + + // pc = std::make_unique(iodata, spaceop.GetDbcMarker(), + // spaceop.GetNDSpaces(), + // &spaceop.GetH1Spaces()); + // pc->SetOperator(P, &AuxP); + + // ksp = std::make_unique(A->GetComm(), iodata, "ksp_"); + // ksp->SetPreconditioner(*pc); + // ksp->SetOperator(*A); + // ksp->SetTabLevel(1); + // eigen->SetLinearSolver(*ksp); + // } + + // // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The + // // constructed matrix just references the real SPD part of the mass matrix (no copy is + // // performed). + // std::unique_ptr Mr; + // if (iodata.solver.eigenmode.mass_orthog) + // { + // // Mpi::Print(" Basis uses M-inner product\n"); + // // Mr = std::make_unique( + // // mesh.back()->GetComm(), + // // + // std::make_unique(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL))); + + // Mpi::Print(" Basis uses (K + M)-inner product\n"); + // auto KM = std::make_unique(K->GetNumRows(), K->GetNumCols()); + // KM->AddOperator(*K->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); + // KM->AddOperator(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); + // Mr = std::make_unique(mesh.back()->GetComm(), + // std::move(KM)); + + // Mr->SetRealSymmetric(); + // eigen->SetBMat(*Mr); + // } + + // // Construct a divergence-free projector so the eigenvalue solve is performed in the + // space + // // orthogonal to the zero eigenvalues of the stiffness matrix. + // std::unique_ptr divfree; + // if (iodata.solver.linear.divfree_max_it > 0) + // { + // constexpr int divfree_verbose = 0; + // divfree = std::make_unique( + // spaceop.GetMaterialOp(), spaceop.GetAuxBdrMarker(), spaceop.GetNDSpace(), + // spaceop.GetH1Spaces(), iodata.solver.linear.divfree_tol, + // iodata.solver.linear.divfree_max_it, divfree_verbose); + // eigen->SetProjector(*divfree); + // } + + // // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and + // is + // // projected appropriately. + // if (iodata.solver.eigenmode.init_v0) + // { + // petsc::PetscParVector v0(*K); + // if (iodata.solver.eigenmode.init_v0_const) + // { + // Mpi::Print(" Using constant starting vector\n"); + // v0 = 1.0; + // } + // else + // { + // Mpi::Print(" Using random starting vector\n"); + // v0.SetRandom(); + // } + // v0.ZeroRows(spaceop.GetDbcTDofList()); + // if (divfree) + // { + // divfree->Mult(v0); + // } + // eigen->SetInitialSpace(v0); // Copies the vector + // // { + // // std::unique_ptr Grad = spaceop.GetGradMatrixPetsc(); + // // petsc::PetscParVector r0(*Grad, false); + // // Grad->MultTranspose(v0, r0); + // // r0.Print(); + // // } + // } + // timer.construct_time += timer.Lap(); // Eigenvalue problem solve. Mpi::Print("\n"); int num_conv = 0; - num_conv = eigen->Solve(); -#if defined(PALACE_WITH_SLEPC) - if (!ksp) - { - const auto &feast = dynamic_cast(*eigen); - SaveMetadata(feast.GetTotalKspMult(), feast.GetTotalKspIter()); - } - else -#endif - { - SaveMetadata(ksp->GetTotalNumMult(), ksp->GetTotalNumIter()); - } + // num_conv = eigen->Solve(); + + // #if 0 // XX TODO DISABLE FEAST EIGENSOLVER FOR NOW + + // #if defined(PALACE_WITH_SLEPC) + // if (!ksp) + // { + // const auto &feast = dynamic_cast(*eigen); + // SaveMetadata(feast.GetTotalKspMult(), feast.GetTotalKspIter()); + // } + // else + // #endif + + // #endif + + // { + // SaveMetadata(ksp->GetTotalNumMult(), ksp->GetTotalNumIter()); + // } timer.solve_time += timer.Lap(); // Postprocess the results. @@ -378,11 +413,13 @@ void EigenSolver::Solve(std::vector> &mesh, Mpi::Print("\n"); } - // Set the internal GridFunctions in PostOperator for all postprocessing operations. - eigen->GetEigenvector(i, E); - PostOperator::GetBField(omega, *NegCurl, E, B); - postop.SetEGridFunction(E); - postop.SetBGridFunction(B); + // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in + // PostOperator for all postprocessing operations. + // eigen->GetEigenvector(i, E); //XX TODO petsc::PetscParVector + Curl->Mult(E, B); + B *= -1.0 / (1i * omega); + // postop.SetEGridFunction(E); //XX TODO petsc::PetscParVector + // postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), omega.real()); // Postprocess the mode. diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp index 855eab7a0..a8320d967 100644 --- a/palace/drivers/electrostaticsolver.cpp +++ b/palace/drivers/electrostaticsolver.cpp @@ -5,6 +5,7 @@ #include #include "linalg/gmg.hpp" +#include "linalg/operator.hpp" #include "linalg/pc.hpp" #include "models/laplaceoperator.hpp" #include "models/postoperator.hpp" @@ -23,49 +24,51 @@ void ElectrostaticSolver::Solve(std::vector> &mes // dofs. The eliminated matrix is stored in order to construct the RHS vector for nonzero // prescribed BC values. timer.Lap(); - std::vector> K, Ke; + std::vector> K; LaplaceOperator laplaceop(iodata, mesh); - laplaceop.GetStiffnessMatrix(K, Ke); + laplaceop.GetStiffnessMatrix(K); SaveMetadata(laplaceop.GetH1Space()); - // Set up the linear solver. - std::unique_ptr pc = - ConfigurePreconditioner(iodata, laplaceop.GetDbcMarker(), laplaceop.GetH1Spaces()); - auto *gmg = dynamic_cast(pc.get()); - if (gmg) - { - gmg->SetOperator(K); - } - else - { - pc->SetOperator(*K.back()); - } - - mfem::IterativeSolver::PrintLevel print = - mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - if (iodata.problem.verbose > 0) - { - print.Summary(); - if (iodata.problem.verbose > 1) - { - print.Iterations(); - if (iodata.problem.verbose > 2) - { - print.All(); - } - } - } - mfem::CGSolver pcg(mesh.back()->GetComm()); - pcg.SetRelTol(iodata.solver.linear.tol); - pcg.SetMaxIter(iodata.solver.linear.max_it); - pcg.SetPrintLevel(print); - pcg.SetOperator(*K.back()); // Call before SetPreconditioner, PC operator set separately - pcg.SetPreconditioner(*pc); - if (iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::DEFAULT && - iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) - { - Mpi::Warning("Electrostatic problem type always uses CG as the Krylov solver!\n"); - } + // XX TODO REVISIT BELOW... + + // // Set up the linear solver. + // std::unique_ptr pc = + // ConfigurePreconditioner(iodata, laplaceop.GetDbcMarker(), laplaceop.GetH1Spaces()); + // auto *gmg = dynamic_cast(pc.get()); + // if (gmg) + // { + // gmg->SetOperator(K); + // } + // else + // { + // pc->SetOperator(*K.back()); + // } + + // mfem::IterativeSolver::PrintLevel print = + // mfem::IterativeSolver::PrintLevel().Warnings().Errors(); + // if (iodata.problem.verbose > 0) + // { + // print.Summary(); + // if (iodata.problem.verbose > 1) + // { + // print.Iterations(); + // if (iodata.problem.verbose > 2) + // { + // print.All(); + // } + // } + // } + // mfem::CGSolver pcg(mesh.back()->GetComm()); + // pcg.SetRelTol(iodata.solver.linear.tol); + // pcg.SetMaxIter(iodata.solver.linear.max_it); + // pcg.SetPrintLevel(print); + // pcg.SetOperator(*K.back()); // Call before SetPreconditioner, PC operator set + // separately pcg.SetPreconditioner(*pc); if (iodata.solver.linear.ksp_type != + // config::LinearSolverData::KspType::DEFAULT && + // iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) + // { + // Mpi::Warning("Electrostatic problem type always uses CG as the Krylov solver!\n"); + // } // Terminal indices are the set of boundaries over which to compute the capacitance // matrix. Terminal boundaries are aliases for ports. @@ -74,8 +77,8 @@ void ElectrostaticSolver::Solve(std::vector> &mes MFEM_VERIFY(nstep > 0, "No terminal boundaries specified for electrostatic simulation!"); // Right-hand side term and solution vector storage. - mfem::Vector RHS(K.back()->Height()); - std::vector V(nstep); + Vector RHS(K.back()->Height()); + std::vector V(nstep); timer.construct_time += timer.Lap(); // Main loop over terminal boundaries. @@ -88,27 +91,28 @@ void ElectrostaticSolver::Solve(std::vector> &mes Mpi::Print("\nIt {:d}/{:d}: Index = {:d} (elapsed time = {:.2e} s)\n", step + 1, nstep, idx, Timer::Duration(timer.Now() - t0).count()); - // Form and solve the linear system for a prescribed nonzero voltage on the specified - // terminal. - Mpi::Print("\n"); - V[step].SetSize(RHS.Size()); - laplaceop.GetExcitationVector(idx, *K.back(), *Ke.back(), V[step], RHS); - timer.construct_time += timer.Lap(); - - pcg.Mult(RHS, V[step]); - if (!pcg.GetConverged()) - { - Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - pcg.GetNumIterations()); - } - ksp_it += pcg.GetNumIterations(); - timer.solve_time += timer.Lap(); - - // V[step]->Print(); - Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n", - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), V[step], V[step])), - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), RHS, RHS))); - timer.postpro_time += timer.Lap(); + // // Form and solve the linear system for a prescribed nonzero voltage on the + // specified + // // terminal. + // Mpi::Print("\n"); + // V[step].SetSize(RHS.Size()); + // laplaceop.GetExcitationVector(idx, *K.back(), *Ke.back(), V[step], RHS); + // timer.construct_time += timer.Lap(); + + // pcg.Mult(RHS, V[step]); + // if (!pcg.GetConverged()) + // { + // Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", + // pcg.GetNumIterations()); + // } + // ksp_it += pcg.GetNumIterations(); + // timer.solve_time += timer.Lap(); + + // // V[step]->Print(); + // Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n", + // std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), V[step], V[step])), + // std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), RHS, RHS))); + // timer.postpro_time += timer.Lap(); // Next terminal. step++; @@ -122,8 +126,7 @@ void ElectrostaticSolver::Solve(std::vector> &mes } void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator &postop, - const std::vector &V, - Timer &timer) const + const std::vector &V, Timer &timer) const { // Postprocess the Maxwell capacitance matrix. See p. 97 of the COMSOL AC/DC Module manual // for the associated formulas based on the electric field energy based on a unit voltage @@ -131,11 +134,11 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & // charges from the prescribed voltage to get C directly as: // Q_i = ∫ ρ dV = ∫ ∇ ⋅ (ε E) dV = ∫ (ε E) ⋅ n dS // and C_ij = Q_i/V_j. The energy formulation avoids having to locally integrate E = -∇V. - std::unique_ptr NegGrad = laplaceop.GetNegGradMatrix(); + std::unique_ptr Grad = laplaceop.GetGradMatrix(); const std::map> &terminal_sources = laplaceop.GetSources(); int nstep = static_cast(terminal_sources.size()); mfem::DenseMatrix C(nstep), Cm(nstep); - mfem::Vector E(NegGrad->Height()), Vij(NegGrad->Width()); + Vector E(Grad->Height()), Vij(Grad->Width()); if (iodata.solver.electrostatic.n_post > 0) { Mpi::Print("\n"); @@ -143,8 +146,10 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & int i = 0; for (const auto &[idx, data] : terminal_sources) { - // Set the internal GridFunctions in PostOperator for all postprocessing operations. - PostOperator::GetEField(*NegGrad, V[i], E); + // Compute E = -∇V on the true dofs, and set the internal GridFunctions in PostOperator + // for all postprocessing operations. + E = 0.0; + Grad->AddMult(V[i], E, -1.0); postop.SetEGridFunction(E); postop.SetVGridFunction(V[i]); double Ue = postop.GetEFieldEnergy(); @@ -179,7 +184,8 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & else if (j > i) { add(V[i], V[j], Vij); - PostOperator::GetEField(*NegGrad, Vij, E); + E = 0.0; + Grad->AddMult(Vij, E, -1.0); postop.SetEGridFunction(E); double Ue = postop.GetEFieldEnergy(); C(i, j) = Ue - 0.5 * (C(i, i) + C(j, j)); diff --git a/palace/drivers/electrostaticsolver.hpp b/palace/drivers/electrostaticsolver.hpp index 58af068c6..d74a8812d 100644 --- a/palace/drivers/electrostaticsolver.hpp +++ b/palace/drivers/electrostaticsolver.hpp @@ -8,6 +8,7 @@ #include #include #include "drivers/basesolver.hpp" +#include "linalg/vector.hpp" namespace mfem { @@ -16,7 +17,6 @@ template class Array; class DenseMatrix; class ParMesh; -class Vector; } // namespace mfem @@ -35,7 +35,7 @@ class ElectrostaticSolver : public BaseSolver { private: void Postprocess(LaplaceOperator &laplaceop, PostOperator &postop, - const std::vector &V, Timer &timer) const; + const std::vector &V, Timer &timer) const; void PostprocessTerminals(const std::map> &terminal_sources, const mfem::DenseMatrix &C, const mfem::DenseMatrix &Cinv, diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp index b00666ddc..b80c9d82a 100644 --- a/palace/drivers/magnetostaticsolver.cpp +++ b/palace/drivers/magnetostaticsolver.cpp @@ -5,6 +5,7 @@ #include #include "linalg/gmg.hpp" +#include "linalg/operator.hpp" #include "linalg/pc.hpp" #include "models/curlcurloperator.hpp" #include "models/postoperator.hpp" @@ -23,49 +24,52 @@ void MagnetostaticSolver::Solve(std::vector> &mes // handled eliminating the rows and columns of the system matrix for the corresponding // dofs. timer.Lap(); - std::vector> K; + std::vector> K; CurlCurlOperator curlcurlop(iodata, mesh); curlcurlop.GetStiffnessMatrix(K); SaveMetadata(curlcurlop.GetNDSpace()); - // Set up the linear solver. - std::unique_ptr pc = - ConfigurePreconditioner(iodata, curlcurlop.GetDbcMarker(), curlcurlop.GetNDSpaces()); - auto *gmg = dynamic_cast(pc.get()); - if (gmg) - { - gmg->SetOperator(K); - } - else - { - pc->SetOperator(*K.back()); - } + // XX TODO REVISIT BELOW... - mfem::IterativeSolver::PrintLevel print = - mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - if (iodata.problem.verbose > 0) - { - print.Summary(); - if (iodata.problem.verbose > 1) - { - print.Iterations(); - if (iodata.problem.verbose > 2) - { - print.All(); - } - } - } - mfem::CGSolver pcg(mesh.back()->GetComm()); - pcg.SetRelTol(iodata.solver.linear.tol); - pcg.SetMaxIter(iodata.solver.linear.max_it); - pcg.SetPrintLevel(print); - pcg.SetOperator(*K.back()); // Call before SetPreconditioner, PC operator set separately - pcg.SetPreconditioner(*pc); - if (iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::DEFAULT && - iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) - { - Mpi::Warning("Magnetostatic problem type always uses CG as the Krylov solver!\n"); - } + // // Set up the linear solver. + // std::unique_ptr pc = + // ConfigurePreconditioner(iodata, curlcurlop.GetDbcMarker(), + // curlcurlop.GetNDSpaces()); + // auto *gmg = dynamic_cast(pc.get()); + // if (gmg) + // { + // gmg->SetOperator(K); + // } + // else + // { + // pc->SetOperator(*K.back()); + // } + + // mfem::IterativeSolver::PrintLevel print = + // mfem::IterativeSolver::PrintLevel().Warnings().Errors(); + // if (iodata.problem.verbose > 0) + // { + // print.Summary(); + // if (iodata.problem.verbose > 1) + // { + // print.Iterations(); + // if (iodata.problem.verbose > 2) + // { + // print.All(); + // } + // } + // } + // mfem::CGSolver pcg(mesh.back()->GetComm()); + // pcg.SetRelTol(iodata.solver.linear.tol); + // pcg.SetMaxIter(iodata.solver.linear.max_it); + // pcg.SetPrintLevel(print); + // pcg.SetOperator(*K.back()); // Call before SetPreconditioner, PC operator set + // separately pcg.SetPreconditioner(*pc); if (iodata.solver.linear.ksp_type != + // config::LinearSolverData::KspType::DEFAULT && + // iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) + // { + // Mpi::Warning("Magnetostatic problem type always uses CG as the Krylov solver!\n"); + // } // Terminal indices are the set of boundaries over which to compute the inductance matrix. PostOperator postop(iodata, curlcurlop, "magnetostatic"); @@ -74,8 +78,8 @@ void MagnetostaticSolver::Solve(std::vector> &mes "No surface current boundaries specified for magnetostatic simulation!"); // Source term and solution vector storage. - mfem::Vector RHS(K.back()->Height()); - std::vector A(nstep); + Vector RHS(K.back()->Height()); + std::vector A(nstep); timer.construct_time += timer.Lap(); // Main loop over current source boundaries. @@ -88,27 +92,25 @@ void MagnetostaticSolver::Solve(std::vector> &mes Mpi::Print("\nIt {:d}/{:d}: Index = {:d} (elapsed time = {:.2e} s)\n", step + 1, nstep, idx, Timer::Duration(timer.Now() - t0).count()); - // Form and solve the linear system for a prescribed current on the specified source. - Mpi::Print("\n"); - A[step].SetSize(RHS.Size()); - A[step] = 0.0; - curlcurlop.GetExcitationVector(idx, RHS); - timer.construct_time += timer.Lap(); + // // Form and solve the linear system for a prescribed current on the specified + // source. Mpi::Print("\n"); A[step].SetSize(RHS.Size()); A[step] = 0.0; + // curlcurlop.GetExcitationVector(idx, RHS); + // timer.construct_time += timer.Lap(); - pcg.Mult(RHS, A[step]); - if (!pcg.GetConverged()) - { - Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - pcg.GetNumIterations()); - } - ksp_it += pcg.GetNumIterations(); - timer.solve_time += timer.Lap(); + // pcg.Mult(RHS, A[step]); + // if (!pcg.GetConverged()) + // { + // Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", + // pcg.GetNumIterations()); + // } + // ksp_it += pcg.GetNumIterations(); + // timer.solve_time += timer.Lap(); - // A[step]->Print(); - Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n", - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), A[step], A[step])), - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), RHS, RHS))); - timer.postpro_time += timer.Lap(); + // // A[step]->Print(); + // Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n", + // std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), A[step], A[step])), + // std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), RHS, RHS))); + // timer.postpro_time += timer.Lap(); // Next source. step++; @@ -122,8 +124,7 @@ void MagnetostaticSolver::Solve(std::vector> &mes } void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator &postop, - const std::vector &A, - Timer &timer) const + const std::vector &A, Timer &timer) const { // Postprocess the Maxwell inductance matrix. See p. 97 of the COMSOL AC/DC Module manual // for the associated formulas based on the magnetic field energy based on a current @@ -132,12 +133,12 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator // Φ_i = ∫ B ⋅ n_j dS // and M_ij = Φ_i/I_j. The energy formulation avoids having to locally integrate B = // ∇ x A. - std::unique_ptr Curl = curlcurlop.GetCurlMatrix(); + std::unique_ptr Curl = curlcurlop.GetCurlMatrix(); const SurfaceCurrentOperator &surf_j_op = curlcurlop.GetSurfaceCurrentOp(); int nstep = static_cast(surf_j_op.Size()); mfem::DenseMatrix M(nstep), Mm(nstep); - mfem::Vector B(Curl->Height()), Aij(Curl->Width()); - mfem::Vector Iinc(nstep); + Vector B(Curl->Height()), Aij(Curl->Width()); + Vector Iinc(nstep); if (iodata.solver.magnetostatic.n_post > 0) { Mpi::Print("\n"); @@ -150,8 +151,9 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator Iinc(i) = data.GetExcitationCurrent(); MFEM_VERIFY(Iinc(i) > 0.0, "Zero current excitation for magnetostatic solver!"); - // Set the internal GridFunctions in PostOperator for all postprocessing operations. - PostOperator::GetBField(*Curl, A[i], B); + // Compute B = ∇ x A on the true dofs, and set the internal GridFunctions in + // PostOperator for all postprocessing operations. + Curl->Mult(A[i], B); postop.SetBGridFunction(B); postop.SetAGridFunction(A[i]); double Um = postop.GetHFieldEnergy(); @@ -186,7 +188,7 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator else if (j > i) { add(A[i], A[j], Aij); - PostOperator::GetBField(*Curl, Aij, B); + Curl->Mult(Aij, B); postop.SetBGridFunction(B); double Um = postop.GetHFieldEnergy(); M(i, j) = Um / (Iinc(i) * Iinc(j)) - diff --git a/palace/drivers/magnetostaticsolver.hpp b/palace/drivers/magnetostaticsolver.hpp index eb0fcd009..aab9e9668 100644 --- a/palace/drivers/magnetostaticsolver.hpp +++ b/palace/drivers/magnetostaticsolver.hpp @@ -7,13 +7,13 @@ #include #include #include "drivers/basesolver.hpp" +#include "linalg/vector.hpp" namespace mfem { class DenseMatrix; class ParMesh; -class Vector; } // namespace mfem @@ -33,7 +33,7 @@ class MagnetostaticSolver : public BaseSolver { private: void Postprocess(CurlCurlOperator &curlcurlop, PostOperator &postop, - const std::vector &A, Timer &timer) const; + const std::vector &A, Timer &timer) const; void PostprocessTerminals(const SurfaceCurrentOperator &surf_j_op, const mfem::DenseMatrix &M, const mfem::DenseMatrix &Minv, diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp index 22e07f798..29e51d9a3 100644 --- a/palace/drivers/transientsolver.cpp +++ b/palace/drivers/transientsolver.cpp @@ -4,6 +4,7 @@ #include "transientsolver.hpp" #include +#include "linalg/vector.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" #include "models/spaceoperator.hpp" @@ -98,12 +99,11 @@ void TransientSolver::Solve(std::vector> &mesh, timer.solve_time += timer.Lap(); double E_elec = 0.0, E_mag = 0.0; - const mfem::Vector &E = timeop.GetE(); - const mfem::Vector &B = timeop.GetB(); + const Vector &E = timeop.GetE(); + const Vector &B = timeop.GetB(); postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp()); - // E.Print(); Mpi::Print(" Sol. ||E|| = {:.6e}, ||B|| = {:.6e}\n", std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), E, E)), std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), B, B))); diff --git a/palace/fem/freqdomain.hpp b/palace/fem/freqdomain.hpp deleted file mode 100644 index 7328a2d88..000000000 --- a/palace/fem/freqdomain.hpp +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_FEM_FREQ_DOMAIN_HPP -#define PALACE_FEM_FREQ_DOMAIN_HPP - -#include -#include "fem/operator.hpp" -#include "linalg/petsc.hpp" - -namespace palace::utils -{ - -// -// Some utility methods for frequency domain problems. -// - -// Convinience method for constructing a the frequency domain matrix-vector product with the -// operator K + iω C - ω² M + A2(ω). -inline std::unique_ptr GetSystemMatrixShell( - double omega, const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, - const petsc::PetscParMatrix *C = nullptr, const petsc::PetscParMatrix *A2 = nullptr) -{ - constexpr auto ExtractReal = petsc::PetscParMatrix::ExtractStructure::REAL; - constexpr auto ExtractImag = petsc::PetscParMatrix::ExtractStructure::IMAGINARY; - auto Ar = std::make_unique(K.GetNumRows(), K.GetNumCols()); - auto Ai = std::make_unique(K.GetNumRows(), K.GetNumCols()); - if (K.HasReal()) - { - Ar->AddOperator(*K.GetOperator(ExtractReal)); - } - if (K.HasImag()) - { - Ai->AddOperator(*K.GetOperator(ExtractImag)); - } - if (M.HasReal()) - { - Ar->AddOperator(*M.GetOperator(ExtractReal), -omega * omega); - } - if (M.HasImag()) - { - Ai->AddOperator(*M.GetOperator(ExtractImag), -omega * omega); - } - if (C) - { - if (C->HasReal()) - { - Ai->AddOperator(*C->GetOperator(ExtractReal), omega); - } - if (C->HasImag()) - { - Ar->AddOperator(*C->GetOperator(ExtractImag), -omega); - } - } - if (A2) - { - if (A2->HasReal()) - { - Ar->AddOperator(*A2->GetOperator(ExtractReal)); - } - if (A2->HasImag()) - { - Ai->AddOperator(*A2->GetOperator(ExtractImag)); - } - } - auto A = - std::make_unique(K.GetComm(), std::move(Ar), std::move(Ai)); - A->SetSymmetric(); - return A; -} - -} // namespace palace::utils - -#endif // PALACE_FEM_FREQ_DOMAIN_HPP diff --git a/palace/fem/interpolator.hpp b/palace/fem/interpolator.hpp new file mode 100644 index 000000000..f0a9f7fbb --- /dev/null +++ b/palace/fem/interpolator.hpp @@ -0,0 +1,129 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_FEM_INTERPOLATION_HPP +#define PALACE_FEM_INTERPOLATION_HPP + +#include +#include +#include +#include +#include "utils/communication.hpp" +#include "utils/iodata.hpp" + +namespace palace +{ + +// +// A class which wraps MFEM's GSLIB interface for high-order field interpolation. +// +class InterpolationOperator +{ +private: +#if defined(MFEM_USE_GSLIB) + mfem::FindPointsGSLIB op; +#endif + std::vector op_idx; + +public: +#if defined(MFEM_USE_GSLIB) + InterpolationOperator(const IoData &iodata, mfem::ParMesh &mesh) : op(mesh.GetComm()) +#else + InterpolationOperator(const IoData &iodata, mfem::ParMesh &mesh) +#endif + { +#if defined(MFEM_USE_GSLIB) + // Set up probes interpolation. All processes search for all points. + if (iodata.domains.postpro.probe.empty()) + { + return; + } + const double bb_t = 0.1; // MFEM defaults + const double newton_tol = 1.0e-12; + const int npts = static_cast(iodata.domains.postpro.probe.size()); + MFEM_VERIFY( + mesh.Dimension() == mesh.SpaceDimension(), + "Probe postprocessing functionality requires mesh dimension == space dimension!"); + mfem::Vector xyz(npts * mesh.SpaceDimension()); + op_idx.resize(npts); + int i = 0; + for (const auto &[idx, data] : iodata.domains.postpro.probe) + { + // Use default ordering byNODES. + xyz(i) = data.x; + xyz(npts + i) = data.y; + if (mesh.SpaceDimension() == 3) + { + xyz(2 * npts + i) = data.z; + } + op_idx[i++] = idx; + } + op.Setup(mesh, bb_t, newton_tol, npts); + op.FindPoints(xyz, mfem::Ordering::byNODES); + op.SetDefaultInterpolationValue(0.0); + i = 0; + for (const auto &[idx, data] : iodata.domains.postpro.probe) + { + if (op.GetCode()[i++] == 2) + { + Mpi::Warning("Probe {:d} at ({:.3e}, {:.3e}, {:.3e}) m could not be found!\n" + "Using default value 0.0!\n", + idx, iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.x), + iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.y), + iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.z)); + } + } +#else + MFEM_VERIFY(iodata.domains.postpro.probe.empty(), + "InterpolationOperator class requires MFEM_USE_GSLIB!"); +#endif + } + + std::vector ProbeField(const mfem::ParGridFunction &U) + { +#if defined(MFEM_USE_GSLIB) + // Interpolated vector values are returned from GSLIB interpolator byNODES, which we + // transform to byVDIM for output. + const int npts = op.GetCode().Size(); + const int dim = U.VectorDim(); + std::vector vals(npts * dim); + mfem::Vector v(npts * dim); + op.Interpolate(U, v); + for (int d = 0; d < dim; d++) + { + for (int i = 0; i < npts; i++) + { + vals[i * dim + d] = v(d * npts + i); + } + } + return vals; +#else + MFEM_ABORT("InterpolationOperator class requires MFEM_USE_GSLIB!"); + return {}; +#endif + } + + std::vector> ProbeField(const mfem::ParComplexGridFunction &U, + bool has_imaginary) + { + std::vector vr = ProbeField(U.real()); + if (has_imaginary) + { + std::vector vi = ProbeField(U.imag()); + std::vector> vals(vr.size()); + std::transform(vr.begin(), vr.end(), vi.begin(), vals.begin(), + [](double xr, double xi) { return std::complex(xr, xi); }); + return vals; + } + else + { + return std::vector>(vr.begin(), vr.end()); + } + } + + const auto &GetProbes() const { return op_idx; } +}; + +} // namespace palace + +#endif // PALACE_FEM_INTERPOLATION_HPP diff --git a/palace/fem/lumpedelement.hpp b/palace/fem/lumpedelement.hpp index 4e98a6e81..a6bcb7ca2 100644 --- a/palace/fem/lumpedelement.hpp +++ b/palace/fem/lumpedelement.hpp @@ -33,7 +33,7 @@ class LumpedElementData mfem::ParLinearForm s(&fespace); mfem::ConstantCoefficient one_func(1.0); s.AddBoundaryIntegrator(new BoundaryLFIntegrator(one_func), attr_marker); - s.UseFastAssembly(true); + s.UseFastAssembly(false); s.Assemble(); return s(ones); } diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp index fa361a819..b35b50f7f 100644 --- a/palace/fem/multigrid.hpp +++ b/palace/fem/multigrid.hpp @@ -7,7 +7,7 @@ #include #include #include -#include "fem/operator.hpp" +#include "linalg/operator.hpp" namespace palace::utils { @@ -66,33 +66,51 @@ std::vector> ConstructFECollections(bool pc_pmg, b return fecs; } -// Construct a heirarchy of finite element spaces given a sequence of meshes and +// Construct a hierarchy of finite element spaces given a sequence of meshes and // finite element collections. Dirichlet boundary conditions are additionally // marked. template mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy( const std::vector> &mesh, const std::vector> &fecs, - const mfem::Array &dbc_marker) + const mfem::Array *dbc_marker = nullptr, + std::vector> *dbc_tdof_lists = nullptr) { - MFEM_VERIFY(!mesh.empty() && !fecs.empty(), + MFEM_VERIFY(!mesh.empty() && !fecs.empty() && + (!dbc_tdof_lists || dbc_tdof_lists->empty()), "Empty mesh or FE collection for FE space construction!"); auto *fespace = new mfem::ParFiniteElementSpace(mesh[0].get(), fecs[0].get()); + if (dbc_marker && dbc_tdof_lists) + { + fespace->GetEssentialTrueDofs(*dbc_marker, dbc_tdof_lists->emplace_back()); + } mfem::ParFiniteElementSpaceHierarchy fespaces(mesh[0].get(), fespace, false, true); + // h-refinement for (std::size_t l = 1; l < mesh.size(); l++) { fespace = new mfem::ParFiniteElementSpace(mesh[l].get(), fecs[0].get()); - auto *P = - new ZeroWrapTransferOperator(fespaces.GetFinestFESpace(), *fespace, dbc_marker); + if (dbc_marker && dbc_tdof_lists) + { + fespace->GetEssentialTrueDofs(*dbc_marker, dbc_tdof_lists->emplace_back()); + } + auto *P = new ParOperator( + std::make_unique(fespaces.GetFinestFESpace(), *fespace), + fespaces.GetFinestFESpace(), *fespace); fespaces.AddLevel(mesh[l].get(), fespace, P, false, true, true); } + // p-refinement for (std::size_t l = 1; l < fecs.size(); l++) { fespace = new mfem::ParFiniteElementSpace(mesh.back().get(), fecs[l].get()); - auto *P = - new ZeroWrapTransferOperator(fespaces.GetFinestFESpace(), *fespace, dbc_marker); + if (dbc_marker && dbc_tdof_lists) + { + fespace->GetEssentialTrueDofs(*dbc_marker, dbc_tdof_lists->emplace_back()); + } + auto *P = new ParOperator( + std::make_unique(fespaces.GetFinestFESpace(), *fespace), + fespaces.GetFinestFESpace(), *fespace); fespaces.AddLevel(mesh.back().get(), fespace, P, false, true, true); } return fespaces; @@ -103,12 +121,18 @@ mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy( // conditions as they need not be incorporated in any inter-space projectors. template mfem::ParFiniteElementSpaceHierarchy -ConstructFiniteElementSpaceHierarchy(mfem::ParMesh &mesh, const FECollection &fec) +ConstructFiniteElementSpaceHierarchy(mfem::ParMesh &mesh, const FECollection &fec, + const mfem::Array *dbc_marker = nullptr, + mfem::Array *dbc_tdof_list = nullptr) { auto *fespace = new mfem::ParFiniteElementSpace(&mesh, &fec); + if (dbc_marker && dbc_tdof_list) + { + fespace->GetEssentialTrueDofs(*dbc_marker, *dbc_tdof_list); + } return mfem::ParFiniteElementSpaceHierarchy(&mesh, fespace, false, true); } } // namespace palace::utils -#endif // PALACE_FEM_MULTIGRID_HPP \ No newline at end of file +#endif // PALACE_FEM_MULTIGRID_HPP diff --git a/palace/fem/operator.hpp b/palace/fem/operator.hpp deleted file mode 100644 index 589def462..000000000 --- a/palace/fem/operator.hpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_FEM_OPERATOR_HPP -#define PALACE_FEM_OPERATOR_HPP - -#include -#include -#include -#include - -namespace palace -{ - -// -// Derived operator classes extending those already in MFEM. -// - -// Wraps a transfer operator between finite element spaces to account for eliminated -// essential BC. -class ZeroWrapTransferOperator : public mfem::Operator -{ -private: - const mfem::TrueTransferOperator P; - mfem::Array coarse_dbc_tdof_list; - -public: - ZeroWrapTransferOperator(mfem::ParFiniteElementSpace &coarse_fespace, - mfem::ParFiniteElementSpace &fine_fespace, - const mfem::Array &dbc_marker) - : P(coarse_fespace, fine_fespace) - { - height = P.Height(); - width = P.Width(); - coarse_fespace.GetEssentialTrueDofs(dbc_marker, coarse_dbc_tdof_list); - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override { P.Mult(x, y); } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - P.MultTranspose(x, y); - y.SetSubVector(coarse_dbc_tdof_list, 0.0); - } -}; - -// Wraps a reference to an existing, externally owned operator. -class ReferenceOperator : public mfem::Operator -{ -private: - const mfem::Operator &op; - -public: - ReferenceOperator(const mfem::Operator &oper) - : mfem::Operator(oper.Height(), oper.Width()), op(oper) - { - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override { op.Mult(x, y); } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - op.MultTranspose(x, y); - } -}; - -// Wrap a sequence of operators of the same dimensions and optional coefficients. -class SumOperator : public mfem::Operator -{ -private: - std::vector, double>> op; - -public: - SumOperator(int s) : mfem::Operator(s) {} - SumOperator(int h, int w) : mfem::Operator(h, w) {} - - void AddOperator(const mfem::Operator &oper, double c = 1.0) - { - MFEM_VERIFY(oper.Height() == height && oper.Width() == width, - "Invalid Operator dimensions for SumOperator!"); - op.emplace_back(std::cref(oper), c); - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - y = 0.0; - for (const auto &[oper, c] : op) - { - oper.get().AddMult(x, y, c); - } - } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - y = 0.0; - for (const auto &[oper, c] : op) - { - oper.get().AddMultTranspose(x, y, c); - } - } -}; - -} // namespace palace - -#endif // PALACE_FEM_OPERATOR_HPP diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt index e2f45b969..4d640f9fb 100644 --- a/palace/linalg/CMakeLists.txt +++ b/palace/linalg/CMakeLists.txt @@ -11,17 +11,21 @@ target_sources(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/ams.cpp ${CMAKE_CURRENT_SOURCE_DIR}/arpack.cpp ${CMAKE_CURRENT_SOURCE_DIR}/chebyshev.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/complex.cpp ${CMAKE_CURRENT_SOURCE_DIR}/curlcurl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/distrelaxation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/divfree.cpp ${CMAKE_CURRENT_SOURCE_DIR}/feast.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gmg.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hypre.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/jacobi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ksp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mumps.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/operator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/petsc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/slepc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/strumpack.cpp ${CMAKE_CURRENT_SOURCE_DIR}/superlu.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/vector.cpp ) diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 400abf987..5bd696be6 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -94,6 +94,9 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_ mfem::FiniteElementCollection *h1_fec = nullptr; if (!h1_fespace) { + + // XX TODO REUSE FROM INPUT... + h1_fec = new mfem::H1_FECollection(nd_fespace.GetMaxElementOrder(), nd_fespace.GetParMesh()->Dimension()); h1_fespace = new mfem::ParFiniteElementSpace(nd_fespace.GetParMesh(), h1_fec); diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp index aa80d1466..dc818a7d0 100644 --- a/palace/linalg/arpack.hpp +++ b/palace/linalg/arpack.hpp @@ -15,6 +15,8 @@ #include #include "linalg/eigen.hpp" +// XX TODO REMOVE PETSc DEPENDENCE + namespace palace { diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 0e9a4f487..565629a1f 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -5,56 +5,57 @@ #include #include -#include "linalg/pc.hpp" -#include "linalg/petsc.hpp" +#include "linalg/operator.hpp" namespace palace { -namespace -{ - -using mfem::ForallWrap; - -class SymmetricScaledOperator : public mfem::Operator -{ -private: - const mfem::Operator &A; - const mfem::Vector &d; - mutable mfem::Vector z; - -public: - SymmetricScaledOperator(const mfem::Operator &op, const mfem::Vector &v) - : mfem::Operator(op.Height()), A(op), d(v), z(v.Size()) - { - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - A.Mult(x, z); - { - const int N = height; - const auto *D = d.Read(); - const auto *Z = z.Read(); - auto *Y = y.Write(); - MFEM_FORALL(i, N, { Y[i] = D[i] * Z[i]; }); - } - } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - { - const int N = height; - const auto *D = d.Read(); - const auto *X = x.Read(); - auto *Z = z.Write(); - MFEM_FORALL(i, N, { Z[i] = D[i] * X[i]; }); - } - A.Mult(z, y); - } -}; - -} // namespace +// XX TODO REPLACE CHEBYSHEV ARRAYMULT WITH MULT/MULT TRANSPOSE (NO NEED FOR ARRAY MULT...) + +// namespace +// { + +// using mfem::ForallWrap; // XX TODO NEEDED? + +// class SymmetricScaledOperator : public mfem::Operator +// { +// private: +// const mfem::Operator &A; +// const mfem::Vector &d; +// mutable mfem::Vector z; + +// public: +// SymmetricScaledOperator(const mfem::Operator &op, const mfem::Vector &v) +// : mfem::Operator(op.Height()), A(op), d(v), z(v.Size()) +// { +// } + +// void Mult(const mfem::Vector &x, mfem::Vector &y) const override +// { +// A.Mult(x, z); +// { +// const int N = height; +// const auto *D = d.Read(); +// const auto *Z = z.Read(); +// auto *Y = y.Write(); +// mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Y[i] = D[i] * Z[i]; }); +// } +// } + +// void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override +// { +// { +// const int N = height; +// const auto *D = d.Read(); +// const auto *X = x.Read(); +// auto *Z = z.Write(); +// mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Z[i] = D[i] * X[i]; }); +// } +// A.Mult(z, y); +// } +// }; + +// } // namespace ChebyshevSmoother::ChebyshevSmoother(MPI_Comm c, const mfem::Array &tdof_list, int smooth_it, int poly_order) @@ -67,6 +68,11 @@ void ChebyshevSmoother::SetOperator(const mfem::Operator &op) A = &op; height = A->Height(); width = A->Width(); + r.SetSize(height); + d.SetSize(height); + + // XX TODO: AS FOR TIME OPERATOR, TDOF_LIST SHOULD NOT BE NEEDED AS WE HAVE IT IN THE + // OPERATOR DIAGONAL ALREADY?? Can just use AssembleDiagonal and .Reciprocal() // Configure symmetric diagonal scaling. const int N = height; @@ -75,98 +81,158 @@ void ChebyshevSmoother::SetOperator(const mfem::Operator &op) A->AssembleDiagonal(diag); const auto *D = diag.Read(); auto *DI = dinv.Write(); - MFEM_FORALL(i, N, { - MFEM_ASSERT_KERNEL(D[i] != 0.0, "Zero diagonal entry in Chebyshev smoother!"); - DI[i] = 1.0 / D[i]; - }); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + MFEM_ASSERT_KERNEL(D[i] != 0.0, + "Zero diagonal entry in Chebyshev smoother!"); + DI[i] = 1.0 / D[i]; + }); const auto *I = dbc_tdof_list.Read(); - MFEM_FORALL(i, dbc_tdof_list.Size(), { - DI[I[i]] = 1.0; // Assumes operator DiagonalPolicy::ONE - }); + mfem::forall(dbc_tdof_list.Size(), + [=] MFEM_HOST_DEVICE(int i) + { + DI[I[i]] = 1.0; // Assumes operator DiagonalPolicy::ONE + }); // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See // mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing: // polynomial versus Gauss-Seidel, JCP (2003). - petsc::PetscShellMatrix DinvA(comm, std::make_unique(*A, dinv)); - lambda_max = 1.1 * DinvA.Norm2(); + DiagonalOperator Dinv(dinv); + SymmetricProductOperator DinvA(Dinv, *A); + lambda_max = 1.1 * linalg::SpectralNorm(comm, DinvA, false); } -void ChebyshevSmoother::ArrayMult(const mfem::Array &X, - mfem::Array &Y) const +void ChebyshevSmoother::Mult(const mfem::Vector &x, mfem::Vector &y) const { - // Initialize. - const int nrhs = X.Size(); - mfem::Array R(nrhs), D(nrhs); - std::vector rrefs(nrhs), drefs(nrhs); - if (nrhs * height != r.Size()) - { - r.SetSize(nrhs * height); - d.SetSize(nrhs * height); - } - for (int j = 0; j < nrhs; j++) - { - rrefs[j].MakeRef(r, j * height, height); - drefs[j].MakeRef(d, j * height, height); - R[j] = &rrefs[j]; - D[j] = &drefs[j]; - } - // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) { if (iterative_mode || it > 0) { - A->ArrayMult(Y, R); - for (int j = 0; j < nrhs; j++) - { - subtract(*X[j], *R[j], *R[j]); - } + A->Mult(y, r); + subtract(x, r, r); } else { - for (int j = 0; j < nrhs; j++) - { - *R[j] = *X[j]; - *Y[j] = 0.0; - } + r = x; + y = 0.0; } - // 4th-kind Chebyshev smoother + // 4th-kind Chebyshev smoother, from Phillips and Fischer or Lottes (with k -> k + 1 + // shift due to 1-based indexing). { + const int N = height; const auto *DI = dinv.Read(); - for (int j = 0; j < nrhs; j++) - { - const auto *RR = R[j]->Read(); - auto *DD = D[j]->ReadWrite(); - MFEM_FORALL(i, height, { DD[i] = 4.0 / (3.0 * lambda_max) * DI[i] * RR[i]; }); - } + const auto *R = r.Read(); + auto *D = d.ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { D[i] = 4.0 / (3.0 * lambda_max) * DI[i] * R[i]; }); } for (int k = 1; k < order; k++) { - A->ArrayAddMult(D, R, -1.0); + A->AddMult(d, r, -1.0); { - // From Phillips and Fischer or Lottes (with k -> k + 1 shift due to 1-based - // indexing) + const int N = height; const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); const auto *DI = dinv.Read(); - for (int j = 0; j < nrhs; j++) - { - const auto *RR = R[j]->Read(); - auto *YY = Y[j]->ReadWrite(); - auto *DD = D[j]->ReadWrite(); - MFEM_FORALL(i, height, { - YY[i] += DD[i]; - DD[i] = sd * DD[i] + sr * DI[i] * RR[i]; - }); - } + const auto *R = r.Read(); + auto *Y = y.ReadWrite(); + auto *D = d.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + Y[i] += D[i]; + D[i] = sd * D[i] + sr * DI[i] * R[i]; + }); } } - for (int j = 0; j < nrhs; j++) - { - *Y[j] += *D[j]; - } + y += d; } } +// void ChebyshevSmoother::ArrayMult(const mfem::Array &X, +// mfem::Array &Y) const +// { +// // Initialize. +// const int nrhs = X.Size(); +// const int N = height; +// mfem::Array R(nrhs), D(nrhs); +// std::vector rrefs(nrhs), drefs(nrhs); +// if (nrhs * N != r.Size()) +// { +// r.SetSize(nrhs * N); +// d.SetSize(nrhs * N); +// } +// for (int j = 0; j < nrhs; j++) +// { +// rrefs[j].MakeRef(r, j * N, N); +// drefs[j].MakeRef(d, j * N, N); +// R[j] = &rrefs[j]; +// D[j] = &drefs[j]; +// } + +// // Apply smoother: y = y + p(A) (x - A y) . +// for (int it = 0; it < pc_it; it++) +// { +// if (iterative_mode || it > 0) +// { +// A->ArrayMult(Y, R); +// for (int j = 0; j < nrhs; j++) +// { +// subtract(*X[j], *R[j], *R[j]); +// } +// } +// else +// { +// for (int j = 0; j < nrhs; j++) +// { +// *R[j] = *X[j]; +// *Y[j] = 0.0; +// } +// } + +// // 4th-kind Chebyshev smoother +// { +// const auto *DI = dinv.Read(); +// for (int j = 0; j < nrhs; j++) +// { +// const auto *RR = R[j]->Read(); +// auto *DD = D[j]->ReadWrite(); + +// mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) +// { DD[i] = 4.0 / (3.0 * lambda_max) * DI[i] * RR[i]; }); +// } +// } +// for (int k = 1; k < order; k++) +// { +// A->ArrayAddMult(D, R, -1.0); +// { +// // From Phillips and Fischer or Lottes (with k -> k + 1 shift due to 1-based +// // indexing) +// const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); +// const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); +// const auto *DI = dinv.Read(); +// for (int j = 0; j < nrhs; j++) +// { +// const auto *RR = R[j]->Read(); +// auto *YY = Y[j]->ReadWrite(); +// auto *DD = D[j]->ReadWrite(); +// mfem::forall(N, +// [=] MFEM_HOST_DEVICE(int i) +// { +// YY[i] += DD[i]; +// DD[i] = sd * DD[i] + sr * DI[i] * RR[i]; +// }); +// } +// } +// } +// for (int j = 0; j < nrhs; j++) +// { +// *Y[j] += *D[j]; +// } +// } +// } + } // namespace palace diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index 202e607b0..dbf066809 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -27,7 +27,7 @@ class ChebyshevSmoother : public mfem::Solver // Number of smoother iterations and polynomial order. const int pc_it, order; - // Diagonal scaling of the operator. + // Inverse diagonal scaling of the operator. mfem::Vector dinv; // Maximum operator eigenvalue for Chebyshev polynomial smoothing. @@ -42,28 +42,31 @@ class ChebyshevSmoother : public mfem::Solver void SetOperator(const mfem::Operator &op) override; - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - mfem::Array X(1); - mfem::Array Y(1); - X[0] = &x; - Y[0] = &y; - ArrayMult(X, Y); - } - - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override; + void Mult(const mfem::Vector &x, mfem::Vector &y) const override; void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override { Mult(x, y); // Assumes operator symmetry } - void ArrayMultTranspose(const mfem::Array &X, - mfem::Array &Y) const override - { - ArrayMult(X, Y); // Assumes operator symmetry - } + // XX TODO REMOVE... + // void Mult(const mfem::Vector &x, mfem::Vector &y) const override + // { + // mfem::Array X(1); + // mfem::Array Y(1); + // X[0] = &x; + // Y[0] = &y; + // ArrayMult(X, Y); + // } + + // void ArrayMult(const mfem::Array &X, + // mfem::Array &Y) const override; + + // void ArrayMultTranspose(const mfem::Array &X, + // mfem::Array &Y) const override + // { + // ArrayMult(X, Y); // Assumes operator symmetry + // } }; } // namespace palace diff --git a/palace/linalg/complex.cpp b/palace/linalg/complex.cpp new file mode 100644 index 000000000..95593abcf --- /dev/null +++ b/palace/linalg/complex.cpp @@ -0,0 +1,899 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "complex.hpp" + +#include + +namespace palace +{ + +ComplexVector::ComplexVector(int n) : Vector(2 * n) +{ + xr_.MakeRef(*this, 0, n); + xi_.MakeRef(*this, n, n); +} + +ComplexVector::ComplexVector(const ComplexVector &x) : Vector(2 * x.Size()) +{ + xr_.MakeRef(*this, 0, x.Size()); + xi_.MakeRef(*this, x.Size(), x.Size()); + Set(x.Real(), x.Imag()); +} + +ComplexVector::ComplexVector(const Vector &xr, const Vector &xi) : Vector(2 * xr.Size()) +{ + MFEM_VERIFY(xr.Size() == xi.Size(), + "Mismatch in dimension of real and imaginary matrix parts!"); + xr_.MakeRef(*this, 0, xr.Size()); + xi_.MakeRef(*this, xr.Size(), xr.Size()); + Set(xr, xi); +} + +int ComplexVector::Size() const +{ + return Vector::Size() / 2; +} + +void ComplexVector::SetSize(int n) +{ + Vector::SetSize(2 * n); + xr_.MakeRef(*this, 0, n); + xi_.MakeRef(*this, n, n); +} + +ComplexVector &ComplexVector::operator=(const ComplexVector &y) +{ + Set(y.Real(), y.Imag()); + return *this; +} + +void ComplexVector::Set(const Vector &yr, const Vector &yi) +{ + MFEM_VERIFY(yr.Size() == yi.Size() && yr.Size() == Size(), + "Mismatch in dimension of real and imaginary matrix parts!"); + Real() = yr; + Imag() = yi; + RestoreReal(); + RestoreImag(); +} + +void ComplexVector::Conj() +{ + Imag() *= -1.0; + RestoreImag(); +} + +ComplexVector &ComplexVector::operator*=(std::complex s) +{ + if (s.imag() != 0.0) + { + const int N = Size(); + const double sr = s.real(); + const double si = s.imag(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = si * XR[i] + sr * XI[i]; + XR[i] = sr * XR[i] - si * XI[i]; + XI[i] = t; + }); + } + else if (s.real() != 0.0) + { + Real() *= s.real(); + Imag() *= s.real(); + } + RestoreReal(); + RestoreImag(); + return *this; +} + +std::complex ComplexVector::Dot(const ComplexVector &y) const +{ + return {mfem::InnerProduct(Real(), y.Real()) + mfem::InnerProduct(Imag(), y.Imag()), + mfem::InnerProduct(Imag(), y.Real()) - mfem::InnerProduct(Real(), y.Imag())}; +} + +std::complex ComplexVector::TransposeDot(const ComplexVector &y) const +{ + return {mfem::InnerProduct(Real(), y.Real()) - mfem::InnerProduct(Imag(), y.Imag()), + mfem::InnerProduct(Imag(), y.Real()) + mfem::InnerProduct(Real(), y.Imag())}; +} + +void ComplexVector::AXPY(std::complex alpha, const ComplexVector &y) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + const auto *YR = y.Real().Read(); + const auto *YI = y.Imag().Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + XR[i] += ar * YR[i] - ai * YI[i]; + XI[i] += ai * YR[i] + ar * YI[i]; + }); + RestoreReal(); + RestoreImag(); +} + +void ComplexVector::AXPBY(std::complex alpha, const ComplexVector &y, + std::complex beta) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + const double br = beta.real(); + const double bi = beta.imag(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + const auto *YR = y.Real().Read(); + const auto *YI = y.Imag().Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = bi * XR[i] + br * XI[i]; + XR[i] = ar * YR[i] - ai * YI[i] + br * XR[i] - bi * XI[i]; + XI[i] = ai * YR[i] + ar * YI[i] + t; + }); + RestoreReal(); + RestoreImag(); +} + +void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &y, + std::complex beta, const ComplexVector &z, + std::complex gamma) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + const double br = beta.real(); + const double bi = beta.imag(); + const double gr = gamma.real(); + const double gi = gamma.imag(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + const auto *YR = y.Real().Read(); + const auto *YI = y.Imag().Read(); + const auto *ZR = z.Real().Read(); + const auto *ZI = z.Imag().Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = gi * XR[i] + gr * XI[i]; + XR[i] = ar * YR[i] - ai * YI[i] + br * ZR[i] - bi * ZI[i] + gr * XR[i] - + gi * XI[i]; + XI[i] = ai * YR[i] + ar * YI[i] + bi * ZR[i] + br * ZI[i] + t; + }); + RestoreReal(); + RestoreImag(); +} + +void ComplexOperator::Mult(const Vector &x, Vector &y) const +{ + MFEM_ASSERT(x.Size() == 2 * width && y.Size() == 2 * height, + "Incompatible dimensions for ComplexOperator::Mult!"); + Vector xr, xi, yr, yi; + xr.MakeRef(const_cast(x), 0, width / 2); + xi.MakeRef(const_cast(x), width / 2, width / 2); + yr.MakeRef(y, 0, height / 2); + yi.MakeRef(y, height / 2, height / 2); + Mult(xr, xi, yr, yi); + yr.SyncAliasMemory(y); + yi.SyncAliasMemory(y); +} + +void ComplexOperator::MultTranspose(const Vector &x, Vector &y) const +{ + MFEM_ASSERT(x.Size() == 2 * height && y.Size() == 2 * width, + "Incompatible dimensions for ComplexOperator::MultTranspose!"); + Vector xr, xi, yr, yi; + xr.MakeRef(const_cast(x), 0, height / 2); + xi.MakeRef(const_cast(x), height / 2, height / 2); + yr.MakeRef(y, 0, width / 2); + yi.MakeRef(y, width / 2, width / 2); + MultTranspose(xr, xi, yr, yi); + yr.SyncAliasMemory(y); + yi.SyncAliasMemory(y); +} + +void ComplexOperator::MultHermitianTranspose(const Vector &x, Vector &y) const +{ + MFEM_ASSERT(x.Size() == 2 * height && y.Size() == 2 * width, + "Incompatible dimensions for ComplexOperator::MultHermitianTranspose!"); + Vector xr, xi, yr, yi; + xr.MakeRef(const_cast(x), 0, height / 2); + xi.MakeRef(const_cast(x), height / 2, height / 2); + yr.MakeRef(y, 0, width / 2); + yi.MakeRef(y, width / 2, width / 2); + MultHermitianTranspose(xr, xi, yr, yi); + yr.SyncAliasMemory(y); + yi.SyncAliasMemory(y); +} + +void ComplexOperator::AddMult(const Vector &x, Vector &y, const double a) const +{ + MFEM_ASSERT(x.Size() == 2 * width && y.Size() == 2 * height, + "Incompatible dimensions for ComplexOperator::AddMult!"); + Vector xr, xi, yr, yi; + xr.MakeRef(const_cast(x), 0, width / 2); + xi.MakeRef(const_cast(x), width / 2, width / 2); + yr.MakeRef(y, 0, height / 2); + yi.MakeRef(y, height / 2, height / 2); + AddMult(xr, xi, yr, yi, a); + yr.SyncAliasMemory(y); + yi.SyncAliasMemory(y); +} + +void ComplexOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const +{ + MFEM_ASSERT(x.Size() == 2 * height && y.Size() == 2 * width, + "Incompatible dimensions for ComplexOperator::AddMultTranspose!"); + Vector xr, xi, yr, yi; + xr.MakeRef(const_cast(x), 0, height / 2); + xi.MakeRef(const_cast(x), height / 2, height / 2); + yr.MakeRef(y, 0, width / 2); + yi.MakeRef(y, width / 2, width / 2); + AddMultTranspose(xr, xi, yr, yi, a); + yr.SyncAliasMemory(y); + yi.SyncAliasMemory(y); +} + +void ComplexOperator::AddMultHermitianTranspose(const Vector &x, Vector &y, + const double a) const +{ + MFEM_ASSERT(x.Size() == 2 * height && y.Size() == 2 * width, + "Incompatible dimensions for ComplexOperator::AddMultHermitianTranspose!"); + Vector xr, xi, yr, yi; + xr.MakeRef(const_cast(x), 0, height / 2); + xi.MakeRef(const_cast(x), height / 2, height / 2); + yr.MakeRef(y, 0, width / 2); + yi.MakeRef(y, width / 2, width / 2); + AddMultHermitianTranspose(xr, xi, yr, yi, a); + yr.SyncAliasMemory(y); + yi.SyncAliasMemory(y); +} + +ComplexParOperator::ComplexParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ComplexOperator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), + A_(std::move(A)), trial_fespace_(trial_fespace), test_fespace_(test_fespace), + use_R_(test_restrict), trial_dbc_tdof_list_(nullptr), test_dbc_tdof_list_(nullptr), + diag_policy_(DiagonalPolicy::DIAG_ONE) +{ + MFEM_VERIFY(A_, "Cannot construct ComplexParOperator from an empty matrix!"); + lxr_.SetSize(A_->Width()); + lxi_.SetSize(A_->Width()); + lyr_.SetSize(A_->Height()); + lyi_.SetSize(A_->Height()); + txr_.SetSize(width); + txi_.SetSize(width); + if (height != width) + { + tyr_.SetSize(height); + tyi_.SetSize(height); + } + else + { + tyr_.MakeRef(txr_, 0, height); + tyi_.MakeRef(txi_, 0, height); + } +} + +void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a, bool zero_real, + bool zero_imag) const +{ + MFEM_ASSERT(xr.Size() == width && xi.Size() == width && yr.Size() == height && + yi.Size() == height, + "Incompatible dimensions for ComplexParOperator::AddMult!"); + if (trial_dbc_tdof_list_) + { + txr_ = xr; + txi_ = xi; + txr_.SetSubVector(*trial_dbc_tdof_list_, 0.0); + txi_.SetSubVector(*trial_dbc_tdof_list_, 0.0); + } + if (!zero_real) + { + trial_fespace_.GetProlongationMatrix()->Mult(trial_dbc_tdof_list_ ? txr_ : xr, lxr_); + } + if (!zero_imag) + { + trial_fespace_.GetProlongationMatrix()->Mult(trial_dbc_tdof_list_ ? txi_ : xi, lxi_); + } + + // Apply the operator on the L-vector. + lyr_ = 0.0; + lyi_ = 0.0; + A_->AddMult(lxr_, lxi_, lyr_, lyi_, a, zero_real, zero_imag); + + if (test_dbc_tdof_list_) + { + if (!use_R_) + { + test_fespace_.GetProlongationMatrix()->MultTranspose(lyr_, tyr_); + test_fespace_.GetProlongationMatrix()->MultTranspose(lyi_, tyi_); + } + else + { + test_fespace_.GetRestrictionMatrix()->Mult(lyr_, tyr_); + test_fespace_.GetRestrictionMatrix()->Mult(lyi_, tyi_); + } + { + const int N = test_dbc_tdof_list_->Size(); + auto idx = test_dbc_tdof_list_->Read(); + auto TYR = tyr_.ReadWrite(); + auto TYI = tyi_.ReadWrite(); + if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) + { + const auto *XR = xr.Read(); + const auto *XI = xi.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TYR[id] = XR[id]; + TYI[id] = XI[id]; + }); + } + else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TYR[id] = TYI[id] = 0.0; + }); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + } + yr += tyr_; + yi += tyi_; + } + else + { + if (!use_R_) + { + test_fespace_.GetProlongationMatrix()->MultTranspose(lyr_, yr); + test_fespace_.GetProlongationMatrix()->MultTranspose(lyi_, yi); + } + else + { + test_fespace_.GetRestrictionMatrix()->Mult(lyr_, yr); + test_fespace_.GetRestrictionMatrix()->Mult(lyi_, yi); + } + } +} + +void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, const std::complex a, + bool zero_real, bool zero_imag) const +{ + MFEM_ASSERT(xr.Size() == height && xi.Size() == height && yr.Size() == width && + yi.Size() == width, + "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); + if (test_dbc_tdof_list_) + { + tyr_ = xr; + tyi_ = xi; + tyr_.SetSubVector(*test_dbc_tdof_list_, 0.0); + tyi_.SetSubVector(*test_dbc_tdof_list_, 0.0); + } + if (!use_R_) + { + if (!zero_real) + { + test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? tyr_ : xr, lyr_); + } + if (!zero_imag) + { + test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? tyi_ : xi, lyi_); + } + } + else + { + if (!zero_real) + { + test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? tyr_ : xr, + lyr_); + } + if (!zero_imag) + { + test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? tyi_ : xi, + lyi_); + } + } + + // Apply the operator on the L-vector. + lxr_ = 0.0; + lxi_ = 0.0; + A_->AddMultTranspose(lyr_, lyi_, lxr_, lxi_, a, zero_real, zero_imag); + + if (trial_dbc_tdof_list_) + { + trial_fespace_.GetProlongationMatrix()->MultTranspose(lxr_, txr_); + trial_fespace_.GetProlongationMatrix()->MultTranspose(lxi_, txi_); + { + const int N = trial_dbc_tdof_list_->Size(); + auto idx = trial_dbc_tdof_list_->Read(); + auto TXR = txr_.ReadWrite(); + auto TXI = txi_.ReadWrite(); + if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) + { + const auto *XR = xr.Read(); + const auto *XI = xi.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TXR[id] = XR[id]; + TXI[id] = XI[id]; + }); + } + else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TXR[id] = TXI[id] = 0.0; + }); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + } + yr += txr_; + yi += txi_; + } + else + { + trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lxr_, yr); + trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lxi_, yi); + } +} + +void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, + const std::complex a, + bool zero_real, bool zero_imag) const +{ + MFEM_ASSERT(xr.Size() == height && xi.Size() == height && yr.Size() == width && + yi.Size() == width, + "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); + if (test_dbc_tdof_list_) + { + tyr_ = xr; + tyi_ = xi; + tyr_.SetSubVector(*test_dbc_tdof_list_, 0.0); + tyi_.SetSubVector(*test_dbc_tdof_list_, 0.0); + } + if (!use_R_) + { + if (!zero_real) + { + test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? tyr_ : xr, lyr_); + } + if (!zero_imag) + { + test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? tyi_ : xi, lyi_); + } + } + else + { + if (!zero_real) + { + test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? tyr_ : xr, + lyr_); + } + if (!zero_imag) + { + test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? tyi_ : xi, + lyi_); + } + } + + // Apply the operator on the L-vector. + lxr_ = 0.0; + lxi_ = 0.0; + A_->AddMultHermitianTranspose(lyr_, lyi_, lxr_, lxi_, a, zero_real, zero_imag); + + if (trial_dbc_tdof_list_) + { + trial_fespace_.GetProlongationMatrix()->MultTranspose(lxr_, txr_); + trial_fespace_.GetProlongationMatrix()->MultTranspose(lxi_, txi_); + { + const int N = trial_dbc_tdof_list_->Size(); + auto idx = trial_dbc_tdof_list_->Read(); + auto TXR = txr_.ReadWrite(); + auto TXI = txi_.ReadWrite(); + if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) + { + const auto *XR = xr.Read(); + const auto *XI = xi.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TXR[id] = XR[id]; + TXI[id] = XI[id]; + }); + } + else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TXR[id] = TXI[id] = 0.0; + }); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + } + yr += txr_; + yi += txi_; + } + else + { + trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lxr_, yr); + trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lxi_, yi); + } +} + +ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&Ar, + std::unique_ptr &&Ai) + : ComplexOperator(Ar ? Ar->Height() : Ai->Height(), Ar ? Ar->Width() : Ai->Width()), + Ar_(std::move(Ar)), Ai_(std::move(Ai)) +{ + MFEM_VERIFY(Ar_ || Ai_, "Cannot construct ComplexWrapperOperator from an empty matrix!"); + MFEM_VERIFY((!Ar_ || !Ai_) || + (Ar_->Height() == Ai_->Height() && Ar_->Width() == Ai_->Width()), + "Mismatch in dimension of real and imaginary matrix parts!"); + txr_.SetSize(width); + txi_.SetSize(width); + if (height != width) + { + tyr_.SetSize(height); + tyi_.SetSize(height); + } + else + { + tyr_.MakeRef(txr_, 0, height); + tyi_.MakeRef(txi_, 0, height); + } +} + +void ComplexWrapperOperator::Mult(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real, bool zero_imag) const +{ + if (Ar_) + { + if (!zero_real) + { + Ar_->Mult(xr, yr); + } + if (!zero_imag) + { + Ar_->Mult(xi, yi); + } + } + else + { + yr = 0.0; + yi = 0.0; + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMult(xi, yr, -1.0); + } + if (!zero_real) + { + Ai_->AddMult(xr, yi, 1.0); + } + } +} + +void ComplexWrapperOperator::MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real, bool zero_imag) const +{ + if (Ar_) + { + if (!zero_real) + { + Ar_->MultTranspose(xr, yr); + } + if (!zero_imag) + { + Ar_->MultTranspose(xi, yi); + } + } + else + { + yr = 0.0; + yi = 0.0; + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMultTranspose(xi, yr, -1.0); + } + if (!zero_real) + { + Ai_->AddMultTranspose(xr, yi, 1.0); + } + } +} + +void ComplexWrapperOperator::MultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, bool zero_real, + bool zero_imag) const +{ + if (Ar_) + { + if (!zero_real) + { + Ar_->MultTranspose(xr, yr); + } + if (!zero_imag) + { + Ar_->MultTranspose(xi, yi); + } + } + else + { + yr = 0.0; + yi = 0.0; + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMultTranspose(xi, yr, 1.0); + } + if (!zero_real) + { + Ai_->AddMultTranspose(xr, yi, -1.0); + } + } +} + +void ComplexWrapperOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, const std::complex a, + bool zero_real, bool zero_imag) const +{ + if (a.real() != 0.0 && a.imag() != 0.0) + { + Mult(xr, xi, tyr_, tyi_, zero_real, zero_imag); + const int N = height; + const double ar = a.real(); + const double ai = a.imag(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + const auto *TYR = tyr_.Read(); + const auto *TYI = tyi_.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TYR[i] - ai * TYI[i]; + YI[i] += ai * TYR[i] + ar * TYI[i]; + }); + } + else if (a.real() != 0.0) + { + if (Ar_) + { + if (!zero_real) + { + Ar_->AddMult(xr, yr, a.real()); + } + if (!zero_imag) + { + Ar_->AddMult(xi, yi, a.real()); + } + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMult(xi, yr, -a.real()); + } + if (!zero_real) + { + Ai_->AddMult(xr, yi, a.real()); + } + } + } + else if (a.imag() != 0.0) + { + if (Ar_) + { + if (!zero_real) + { + Ar_->AddMult(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar_->AddMult(xi, yr, -a.imag()); + } + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMult(xi, yi, -a.imag()); + } + if (!zero_real) + { + Ai_->AddMult(xr, yr, -a.imag()); + } + } + } +} + +void ComplexWrapperOperator::AddMultTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, + const std::complex a, bool zero_real, + bool zero_imag) const +{ + if (a.real() != 0.0 && a.imag() != 0.0) + { + MultTranspose(xr, xi, txr_, txi_, zero_real, zero_imag); + const int N = width; + const double ar = a.real(); + const double ai = a.imag(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + const auto *TXR = txr_.Read(); + const auto *TXI = txi_.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TXR[i] - ai * TXI[i]; + YI[i] += ai * TXR[i] + ar * TXI[i]; + }); + } + else if (a.real() != 0.0) + { + if (Ar_) + { + if (!zero_real) + { + Ar_->AddMultTranspose(xr, yr, a.real()); + } + if (!zero_imag) + { + Ar_->AddMultTranspose(xi, yi, a.real()); + } + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMultTranspose(xi, yr, -a.real()); + } + if (!zero_real) + { + Ai_->AddMultTranspose(xr, yi, a.real()); + } + } + } + else if (a.imag() != 0.0) + { + if (Ar_) + { + if (!zero_real) + { + Ar_->AddMultTranspose(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar_->AddMultTranspose(xi, yr, -a.imag()); + } + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMultTranspose(xi, yi, -a.imag()); + } + if (!zero_real) + { + Ai_->AddMultTranspose(xr, yr, -a.imag()); + } + } + } +} + +void ComplexWrapperOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, + const std::complex a, + bool zero_real, bool zero_imag) const +{ + if (a.real() != 0.0 && a.imag() != 0.0) + { + MultHermitianTranspose(xr, xi, txr_, txi_, zero_real, zero_imag); + const int N = width; + const double ar = a.real(); + const double ai = a.imag(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + const auto *TXR = txr_.Read(); + const auto *TXI = txi_.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TXR[i] - ai * TXI[i]; + YI[i] += ai * TXR[i] + ar * TXI[i]; + }); + } + else if (a.real() != 0.0) + { + if (Ar_) + { + if (!zero_real) + { + Ar_->AddMultTranspose(xr, yr, a.real()); + } + if (!zero_imag) + { + Ar_->AddMultTranspose(xi, yi, a.real()); + } + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMultTranspose(xi, yr, a.real()); + } + if (!zero_real) + { + Ai_->AddMultTranspose(xr, yi, -a.real()); + } + } + } + else if (a.imag() != 0.0) + { + if (Ar_) + { + if (!zero_real) + { + Ar_->AddMultTranspose(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar_->AddMultTranspose(xi, yr, -a.imag()); + } + } + if (Ai_) + { + if (!zero_imag) + { + Ai_->AddMultTranspose(xi, yi, a.imag()); + } + if (!zero_real) + { + Ai_->AddMultTranspose(xr, yr, a.imag()); + } + } + } +} + +} // namespace palace diff --git a/palace/linalg/complex.hpp b/palace/linalg/complex.hpp new file mode 100644 index 000000000..316a71381 --- /dev/null +++ b/palace/linalg/complex.hpp @@ -0,0 +1,490 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_COMPLEX_HPP +#define PALACE_LINALG_COMPLEX_HPP + +#include +#include +#include +#include +#include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// Vector and operator classes for complex-valued linear algebra. +// + +// A complex-valued vector represented as two real vectors, one for each component. +class ComplexVector : public Vector +{ +private: + Vector xr_, xi_; + +public: + // Create a vector with the given size. + ComplexVector(int n = 0); + + // Copy constructor. + ComplexVector(const ComplexVector &x); + + // Copy constructor from separately provided real and imaginary parts. + ComplexVector(const Vector &xr, const Vector &xi); + + // Returns the vector size. + int Size() const; + + // Set the size of the vector. See the notes for Vector::SetSize for behavior in the + // cases where n is less than or greater than Size() or Capacity(). + void SetSize(int n); + + // Get const access to the real and imaginary vector parts. Assumes that these are + // synchronized following a Sync() call. + const Vector &Real() const { return xr_; } + const Vector &Imag() const { return xi_; } + + // Get access to the real and imaginary vector parts with required synchronization with + // the underlying storage. + Vector &Real() + { + xr_.SyncMemory(*this); + return xr_; + } + Vector &Imag() + { + xi_.SyncMemory(*this); + return xi_; + } + void RestoreReal() { xr_.SyncAliasMemory(*this); } + void RestoreImag() { xi_.SyncAliasMemory(*this); } + + // Copy assignment operator. This should probably not be used to modify the size of the + // vector. + ComplexVector &operator=(const ComplexVector &y); + + // Copy assignment from separately provided real and imaginary parts. + void Set(const Vector &yr, const Vector &yi); + + // Replace entries with complex conjugate. + void Conj(); + + // Scale all entries by s. + ComplexVector &operator*=(std::complex s); + + // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. + std::complex Dot(const ComplexVector &y) const; + std::complex TransposeDot(const ComplexVector &y) const; + + // In-place addition x += alpha * y. + void AXPY(std::complex alpha, const ComplexVector &y); + + // In-place addition x = alpha * y + beta * x. + void AXPBY(std::complex alpha, const ComplexVector &y, std::complex beta); + + // In-place addition x = alpha * y + beta * z + gamma * x. + void AXPBYPCZ(std::complex alpha, const ComplexVector &y, + std::complex beta, const ComplexVector &z, + std::complex gamma); + + // Update the memory location of the real and imaginary parts to match the underlying + // storage, or vice versa. + void Sync() + { + xr_.SyncMemory(*this); + xi_.SyncMemory(*this); + } + void SyncAlias() + { + xr_.SyncAliasMemory(*this); + xi_.SyncAliasMemory(*this); + } +}; + +// Abstract base class for complex-valued operators. +class ComplexOperator : public Operator +{ +public: + ComplexOperator(int s) : Operator(s) {} + ComplexOperator(int h, int w) : Operator(h, w) {} + + // Test whether or not the operator is purely real or imaginary. + virtual bool IsReal() const = 0; + virtual bool IsImag() const = 0; + + // Get access to the real and imaginary operator parts. + virtual const Operator &Real() const + { + MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); + return *this; + } + virtual Operator &Real() + { + MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); + return *this; + } + virtual const Operator &Imag() const + { + MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); + return *this; + } + virtual Operator &Imag() + { + MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); + return *this; + } + + void Mult(const Vector &x, Vector &y) const override; + + void Mult(const ComplexVector &x, ComplexVector &y) const + { + Mult(x.Real(), x.Imag(), y.Real(), y.Imag()); + } + + virtual void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const = 0; + + void MultTranspose(const Vector &x, Vector &y) const override; + + void MultTranspose(const ComplexVector &x, ComplexVector &y) const + { + MultTranspose(x.Real(), x.Imag(), y.Real(), y.Imag()); + } + + virtual void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const = 0; + + void MultHermitianTranspose(const Vector &x, Vector &y) const; + + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const + { + MultHermitianTranspose(x.Real(), x.Imag(), y.Real(), y.Imag()); + } + + virtual void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real = false, + bool zero_imag = false) const = 0; + + void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override; + + void AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const + { + AddMult(x.Real(), x.Imag(), y.Real(), y.Imag(), a); + } + + virtual void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const = 0; + + void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override; + + void AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const + { + AddMultTranspose(x.Real(), x.Imag(), y.Real(), y.Imag(), a); + } + + virtual void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const = 0; + + void AddMultHermitianTranspose(const Vector &x, Vector &y, const double a = 1.0) const; + + void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const + { + AddMultHermitianTranspose(x.Real(), x.Imag(), y.Real(), y.Imag(), a); + } + + virtual void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, const std::complex a = 1.0, + bool zero_real = false, + bool zero_imag = false) const = 0; +}; + +// A parallel complex-valued operator represented by RᵀAP for complex-valued A, constructed +// through the actions of Rᵀ, A, and P with possible eliminated essential BC. +class ComplexParOperator : public ComplexOperator +{ +private: + std::unique_ptr A_; + const mfem::ParFiniteElementSpace &trial_fespace_, &test_fespace_; + const bool use_R_; + + // Lists of constrained essential boundary true dofs for elimination. + mutable const mfem::Array *trial_dbc_tdof_list_, *test_dbc_tdof_list_; + + // Diagonal policy for constrained true dofs. + DiagonalPolicy diag_policy_; + + // Temporary storage for operator application. + mutable Vector lxr_, lxi_, lyr_, lyi_, txr_, txi_, tyr_, tyi_; + +public: + // Construct the complex-valued parallel operator, inheriting ownership of the local + // operator. + ComplexParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict = false); + + // Get access to the underlying local (L-vector) operator. + const ComplexOperator &GetOperator() const + { + MFEM_VERIFY(A_, "No local matrix available for ComplexParOperator::GetOperator!"); + return *A_; + } + + // Set essential boundary condition true dofs for square operators. + void SetEssentialTrueDofs(const mfem::Array &dbc_tdof_list, + DiagonalPolicy diag_policy) + { + MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " + "for rectangular ComplexParOperator!"); + trial_dbc_tdof_list_ = &dbc_tdof_list; + test_dbc_tdof_list_ = &dbc_tdof_list; + diag_policy_ = diag_policy; + } + + // Set essential boundary condition true dofs for rectangular operators. + void SetEssentialTrueDofs(const mfem::Array &trial_dbc_tdof_list, + const mfem::Array &test_dbc_tdof_list, + DiagonalPolicy diag_policy) + { + MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ZERO, + "Essential boundary condition true dof elimination for rectangular " + "ComplexParOperator only supports DiagonalPolicy::DIAG_ZERO!"); + trial_dbc_tdof_list_ = &trial_dbc_tdof_list; + test_dbc_tdof_list_ = &test_dbc_tdof_list; + diag_policy_ = diag_policy; + } + + // Get the essential boundary condition true dofs associated with the operator. May be + // nullptr. + void GetEssentialTrueDofs(const mfem::Array *&trial_dbc_tdof_list, + const mfem::Array *&test_dbc_tdof_list) + { + trial_dbc_tdof_list = trial_dbc_tdof_list_; + test_dbc_tdof_list = test_dbc_tdof_list_; + } + + // Set the diagonal policy for the operator. + void SetDiagonalPolicy(DiagonalPolicy diag_policy) { diag_policy_ = diag_policy; } + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return trial_fespace_.GetComm(); } + + bool IsReal() const override { return A_->IsReal(); } + bool IsImag() const override { return A_->IsImag(); } + + using ComplexOperator::AddMult; + using ComplexOperator::AddMultHermitianTranspose; + using ComplexOperator::AddMultTranspose; + using ComplexOperator::Mult; + using ComplexOperator::MultHermitianTranspose; + using ComplexOperator::MultTranspose; + + void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMult(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMultTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMultHermitianTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; +}; + +// A complex-valued operator represented using a block 2x2 equivalent-real formulation. +class ComplexWrapperOperator : public ComplexOperator +{ +private: + std::unique_ptr Ar_, Ai_; + + // Temporary storage for operator application. + mutable Vector txr_, txi_, tyr_, tyi_; + +public: + // Construct a complex operator which inherits ownershipt of the input real and imaginary + // parts. + ComplexWrapperOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai); + + bool IsReal() const override { return Ai_ == nullptr; } + bool IsImag() const override { return Ar_ == nullptr; } + + const Operator &Real() const override { return *Ar_; } + Operator &Real() override { return *Ar_; } + const Operator &Imag() const override { return *Ai_; } + Operator &Imag() override { return *Ai_; } + + using ComplexOperator::AddMult; + using ComplexOperator::AddMultHermitianTranspose; + using ComplexOperator::AddMultTranspose; + using ComplexOperator::Mult; + using ComplexOperator::MultHermitianTranspose; + using ComplexOperator::MultTranspose; + + void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override; + + void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override; + + void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, + bool zero_imag = false) const override; + + void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; +}; + +// Wrap a sequence of operators of the same dimensions and optional coefficients. +class ComplexSumOperator : public ComplexOperator +{ +private: + std::vector>> ops_; + +public: + ComplexSumOperator(int s) : ComplexOperator(s) {} + ComplexSumOperator(int h, int w) : ComplexOperator(h, w) {} + ComplexSumOperator(const ComplexOperator &op, std::complex c = 1.0) + : ComplexOperator(op.Height(), op.Width()) + { + AddOperator(op, c); + } + + void AddOperator(const ComplexOperator &op, std::complex c = 1.0) + { + MFEM_VERIFY(op.Height() == height && op.Width() == width, + "Invalid Operator dimensions for ComplexSumOperator!"); + ops_.emplace_back(&op, c); + } + + bool IsReal() const override + { + for (const auto &[op, c] : ops_) + { + if (!op->IsReal()) + { + return false; + } + } + return true; + } + + bool IsImag() const override + { + for (const auto &[op, c] : ops_) + { + if (!op->IsImag()) + { + return false; + } + } + return true; + } + + using ComplexOperator::AddMult; + using ComplexOperator::AddMultHermitianTranspose; + using ComplexOperator::AddMultTranspose; + using ComplexOperator::Mult; + using ComplexOperator::MultHermitianTranspose; + using ComplexOperator::MultTranspose; + + void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMult(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMultTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMultHermitianTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override + { + for (const auto &[op, c] : ops_) + { + op->AddMult(xr, xi, yr, yi, a * c, zero_real, zero_imag); + } + } + + void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override + { + for (const auto &[op, c] : ops_) + { + op->AddMultTranspose(xr, xi, yr, yi, a * c, zero_real, zero_imag); + } + } + + void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override + { + for (const auto &[op, c] : ops_) + { + op->AddMultTranspose(xr, xi, yr, yi, a * c, zero_real, zero_imag); + } + } +}; + +} // namespace palace + +#endif // PALACE_LINALG_COMPLEX_HPP diff --git a/palace/linalg/curlcurl.cpp b/palace/linalg/curlcurl.cpp index 15067d8e8..61dfab6da 100644 --- a/palace/linalg/curlcurl.cpp +++ b/palace/linalg/curlcurl.cpp @@ -11,15 +11,20 @@ namespace palace { -CurlCurlSolver::CurlCurlSolver(const MaterialOperator &mat_op, - const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, - double tol, int max_it, int print) +CurlCurlMassSolver::CurlCurlMassSolver(const MaterialOperator &mat_op, + const mfem::Array &dbc_marker, + mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + double tol, int max_it, int print) : mfem::Solver(nd_fespaces.GetFinestFESpace().GetTrueVSize()) { - MaterialPropertyCoefficient muinv_func(mat_op); - MaterialPropertyCoefficient epsilon_func(mat_op); + + // XX TODO NEW ParOperator FRAMEWORK + + constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; + constexpr MaterialPropertyType MatTypeEps = MaterialPropertyType::PERMITTIVITY_REAL; + MaterialPropertyCoefficient muinv_func(mat_op); + MaterialPropertyCoefficient epsilon_func(mat_op); MFEM_VERIFY(dbc_marker.Size() == nd_fespaces.GetFinestFESpace().GetParMesh()->bdr_attributes.Max(), "Invalid boundary marker for curl-curl solver!"); diff --git a/palace/linalg/curlcurl.hpp b/palace/linalg/curlcurl.hpp index a71e25257..65820825e 100644 --- a/palace/linalg/curlcurl.hpp +++ b/palace/linalg/curlcurl.hpp @@ -16,7 +16,7 @@ class MaterialOperator; // // This solver implements a solver for the operator K + M in the Nedelec space. // -class CurlCurlSolver : public mfem::Solver +class CurlCurlMassSolver : public mfem::Solver { private: // H(curl) norm operator A = K + M. @@ -30,10 +30,10 @@ class CurlCurlSolver : public mfem::Solver mutable mfem::Vector xr, xi, yr, yi; public: - CurlCurlSolver(const MaterialOperator &mat_op, const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, int max_it, - int print); + CurlCurlMassSolver(const MaterialOperator &mat_op, const mfem::Array &dbc_marker, + mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, + int max_it, int print); // Operator is set in constructor. void SetOperator(const mfem::Operator &op) override {} diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index 8c63d84a0..3d31cf804 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -12,6 +12,11 @@ namespace palace { +// XX TODO: THIS PROBABLY NEEDS TO CONSIDER IN ALL 3 BILINEAR FORMS THE EFFECTS OF +// THE INPUT BDR_MARKER? + +// XX TODO NEW ParOperator FRAMEWORK + DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, const mfem::Array &bdr_marker, mfem::ParFiniteElementSpace &nd_fespace, @@ -19,7 +24,8 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, int max_it, int print) : mfem::Solver(nd_fespace.GetTrueVSize()) { - MaterialPropertyCoefficient epsilon_func(mat_op); + constexpr MaterialPropertyType MatType = MaterialPropertyType::PERMITTIVITY_REAL; + MaterialPropertyCoefficient epsilon_func(mat_op); MFEM_VERIFY(bdr_marker.Size() == h1_fespaces.GetFinestFESpace().GetParMesh()->bdr_attributes.Max(), "Invalid boundary marker for divergence-free solver!"); diff --git a/palace/linalg/feast.cpp b/palace/linalg/feast.cpp index 24d1d7a6c..83e664c37 100644 --- a/palace/linalg/feast.cpp +++ b/palace/linalg/feast.cpp @@ -3,6 +3,8 @@ #include "feast.hpp" +#if 0 // XX TODO DISABLE FEAST FOR NOW + #if defined(PALACE_WITH_SLEPC) #include @@ -1292,3 +1294,5 @@ PetscErrorCode __mat_apply_FEAST_PEP(Mat A, Vec x, Vec y) } #endif + +#endif diff --git a/palace/linalg/feast.hpp b/palace/linalg/feast.hpp index 63547c422..813bbba66 100644 --- a/palace/linalg/feast.hpp +++ b/palace/linalg/feast.hpp @@ -4,6 +4,8 @@ #ifndef PALACE_LINALG_FEAST_HPP #define PALACE_LINALG_FEAST_HPP +#if 0 // XX TODO DISABLE FEAST FOR NOW + #if defined(PALACE_WITH_SLEPC) #include "linalg/petsc.hpp" @@ -284,4 +286,6 @@ class FeastPEPSolver : public FeastEigenSolver #endif +#endif + #endif // PALACE_LINALG_FEAST_HPP diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 3083e5dea..e71c19a29 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -112,6 +112,8 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const } GetProlongationAtLevel(l - 1).ArrayMultTranspose(R_[l], X_[l - 1]); + // XX TODO FIX DIRICHLET BCS HERE LIKE FOR DIST RELAXATION... + // Coarse grid correction. VCycle(l - 1, false); diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index eede99036..c17b4fe03 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -12,6 +12,8 @@ namespace palace { +// XX TODO STORE DBC TDOFS AND APPLY AFTER PROLONGATION TRANSPOSE + // // Geometric multigrid preconditioner using a given coarse solver for the provided // hierarchy of finite element spaces. Optionally can be configured to use auxiliary space diff --git a/palace/linalg/hypre.hpp b/palace/linalg/hypre.hpp index 3dbc954f9..606e5c2b9 100644 --- a/palace/linalg/hypre.hpp +++ b/palace/linalg/hypre.hpp @@ -9,6 +9,8 @@ namespace mfem { +// XX TODO REMOVE WHAT CAN BE REMOVED... + // Convenience wrapper for casting away the const on the pointers and dispatching onto the // original function that has the argument type: mfem::Array2D &. mfem::HypreParMatrix * diff --git a/palace/linalg/jacobi.cpp b/palace/linalg/jacobi.cpp new file mode 100644 index 000000000..61d610cd6 --- /dev/null +++ b/palace/linalg/jacobi.cpp @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "jacobi.hpp" + +#include + +namespace palace +{ + +JacobiSmoother::JacobiSmoother(const mfem::Vector &diag) : mfem::Solver(diag.Size()) +{ + dinv = diag; + // dinv.Reciprocal(); //XX TODO NEED MFEM PATCH +} + +void JacobiSmoother::Mult(const mfem::Vector &x, mfem::Vector &y) const +{ + MFEM_ASSERT(!iterative_mode, + "JacobiSmoother is not implemented for iterative_mode = true!"); + const int N = height; + const auto *DI = dinv.Read(); + const auto *X = x.Read(); + auto *Y = y.Write(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Y[i] = DI[i] * X[i]; }); +} + +} // namespace palace diff --git a/palace/linalg/jacobi.hpp b/palace/linalg/jacobi.hpp new file mode 100644 index 000000000..433d5aa3d --- /dev/null +++ b/palace/linalg/jacobi.hpp @@ -0,0 +1,35 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_JACOBI_SMOOTHER_HPP +#define PALACE_LINALG_JACOBI_SMOOTHER_HPP + +#include + +namespace palace +{ + +// +// Simple Jacobi smoother using a provided diagonal vector, usually the output of +// AssembleDiagonal() which allows for (approximatE) diagonal construction for matrix-free +// operators. +// +class JacobiSmoother : public mfem::Solver +{ +private: + // Inverse diagonal scaling of the operator. + mfem::Vector dinv; + +public: + JacobiSmoother(const mfem::Vector &diag); + + void SetOperator(const mfem::Operator &op) override {} + + void Mult(const mfem::Vector &x, mfem::Vector &y) const override; + + void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override { Mult(x, y); } +}; + +} // namespace palace + +#endif // PALACE_LINALG_JACOBI_SMOOTHER_HPP diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp new file mode 100644 index 000000000..6aa33ab1c --- /dev/null +++ b/palace/linalg/operator.cpp @@ -0,0 +1,444 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "operator.hpp" + +#include +#include "linalg/complex.hpp" +#include "utils/communication.hpp" + +namespace palace +{ + +ParOperator::ParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), A_(std::move(A)), + trial_fespace_(trial_fespace), test_fespace_(test_fespace), use_R_(test_restrict), + trial_dbc_tdof_list_(nullptr), test_dbc_tdof_list_(nullptr), + diag_policy_(DiagonalPolicy::DIAG_ONE), RAP_(nullptr) +{ + MFEM_VERIFY(A_, "Cannot construct ParOperator from an empty matrix!"); + lx_.SetSize(A_->Width()); + ly_.SetSize(A_->Height()); + tx_.SetSize(width); + if (height != width) + { + ty_.SetSize(height); + } + else + { + ty_.MakeRef(tx_, 0, height); + } +} + +void ParOperator::EliminateRHS(const Vector &x, Vector &b) const +{ + if (!trial_dbc_tdof_list_ || !test_dbc_tdof_list_) + { + return; + } + + MFEM_VERIFY(A_, "No local matrix available for ParOperator::EliminateRHS!"); + tx_ = 0.0; + { + const int N = trial_dbc_tdof_list_->Size(); + const auto *idx = trial_dbc_tdof_list_->Read(); + const auto *X = x.Read(); + auto *TX = tx_.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TX[id] = X[id]; + }); + } + + // Apply the unconstrained operator. + const mfem::Array *b_trial_dbc_tdof_list_ = trial_dbc_tdof_list_; + const mfem::Array *b_test_dbc_tdof_list_ = test_dbc_tdof_list_; + trial_dbc_tdof_list_ = test_dbc_tdof_list_ = nullptr; + AddMult(tx_, b, -1.0); + trial_dbc_tdof_list_ = b_trial_dbc_tdof_list_; + test_dbc_tdof_list_ = b_test_dbc_tdof_list_; + + { + const int N = test_dbc_tdof_list_->Size(); + auto idx = test_dbc_tdof_list_->Read(); + auto B = b.ReadWrite(); + if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) + { + const auto *X = x.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + B[id] = X[id]; + }); + } + else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + B[id] = 0.0; + }); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + } +} + +void ParOperator::AssembleDiagonal(Vector &diag) const +{ + if (RAP_) + { + RAP_->GetDiag(diag); + return; + } + + // For an AMR mesh, a convergent diagonal is assembled with |P|ᵀ dₗ, where |P| has + // entry-wise absolute values of the conforming prolongation operator. + MFEM_VERIFY(&trial_fespace_ == &test_fespace_, + "Diagonal assembly is only available for square ParOperator!"); + A_->AssembleDiagonal(ly_); + const Operator *P = test_fespace_.GetProlongationMatrix(); + if (const auto *hP = dynamic_cast(P)) + { + hP->AbsMultTranspose(1.0, ly_, 0.0, diag); + } + else + { + P->MultTranspose(ly_, diag); + } + + if (test_dbc_tdof_list_) + { + const int N = test_dbc_tdof_list_->Size(); + const auto *idx = test_dbc_tdof_list_->Read(); + auto *D = diag.ReadWrite(); + if (diag_policy_ == DiagonalPolicy::DIAG_ONE) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + D[id] = 1.0; + }); + } + else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + D[id] = 0.0; + }); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + } +} + +mfem::HypreParMatrix &ParOperator::ParallelAssemble() +{ + if (!RAP_) + { + auto *bfA = dynamic_cast(A_.get()); + auto *mbfA = dynamic_cast(A_.get()); + auto *lA = dynamic_cast(A_.get()); + if (bfA || lA) + { + MFEM_VERIFY(&trial_fespace_ == &test_fespace_ && (!lA || lA->Height() == lA->Width()), + "Only square ParOperator should use a BilinearForm or SparseMatrix!"); + if (bfA) + { + + // XX TODO MFEM PATCH + + // lA = bfA->HasSpMat() ? bfA->LoseMat() : + // mfem::ceed::CeedOperatorFullAssemble(*bfA); + } + mfem::HypreParMatrix *hA = + new mfem::HypreParMatrix(trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), + trial_fespace_.GetDofOffsets(), lA); + const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); + RAP_ = + std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); + delete hA; + if (bfA) + { + delete lA; + } + } + else if (mbfA) + { + + // XX TODO MFEM PATCH + + // lA = mbfA->HasSpMat() ? mbfA->LoseMat() + // : mfem::ceed::CeedOperatorFullAssemble(*mbfA, use_R_); + mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( + trial_fespace_.GetComm(), test_fespace_.GlobalVSize(), + trial_fespace_.GlobalVSize(), test_fespace_.GetDofOffsets(), + trial_fespace_.GetDofOffsets(), lA); + const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); + if (!use_R_) + { + const mfem::HypreParMatrix *Rt = test_fespace_.Dof_TrueDof_Matrix(); + RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), + true); + } + else + { + mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace_.GetRestrictionMatrix()); + mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( + trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), + trial_fespace_.GlobalTrueVSize(), trial_fespace_.GetDofOffsets(), + trial_fespace_.GetTrueDofOffsets(), sRt); + RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), + true); + delete sRt; + delete hRt; + } + delete hA; + delete lA; + } + else + { + MFEM_ABORT("ParOperator::ParallelAssemble requires A as a BilinearForm or " + "MixedBilinearForm!"); + } + + // Delete the original local operator. + A_.reset(); + + // Eliminate boundary conditions on the assembled matrix. + if (test_dbc_tdof_list_ || trial_dbc_tdof_list_) + { + if (test_dbc_tdof_list_ == trial_dbc_tdof_list_) + { + // Elimination for a square operator. + MFEM_VERIFY( + &trial_fespace_ == &test_fespace_, + "Only square ParOperator should have same trial and test eliminated tdofs!"); + RAP_->EliminateBC(*trial_dbc_tdof_list_, diag_policy_); + } + else + { + // Rectangular elimination sets all eliminated rows/columns to zero. + mfem::HypreParMatrix *RAPe = RAP_->EliminateCols(*trial_dbc_tdof_list_); + RAP_->EliminateRows(*test_dbc_tdof_list_); + delete RAPe; + } + } + } + return *RAP_; +} + +void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const +{ + if (RAP_) + { + RAP_->AddMult(x, y, a); + return; + } + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ParOperator::AddMult!"); + if (trial_dbc_tdof_list_) + { + tx_ = x; + tx_.SetSubVector(*trial_dbc_tdof_list_, 0.0); + } + trial_fespace_.GetProlongationMatrix()->Mult(trial_dbc_tdof_list_ ? tx_ : x, lx_); + + // Apply the operator on the L-vector. + A_->Mult(lx_, ly_); + + if (test_dbc_tdof_list_) + { + if (!use_R_) + { + test_fespace_.GetProlongationMatrix()->MultTranspose(ly_, ty_); + } + else + { + test_fespace_.GetRestrictionMatrix()->Mult(ly_, ty_); + } + const int N = test_dbc_tdof_list_->Size(); + auto idx = test_dbc_tdof_list_->Read(); + auto TY = ty_.ReadWrite(); + if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) + { + const auto *X = x.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TY[id] = X[id]; + }); + } + else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TY[id] = 0.0; + }); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + y.Add(a, ty_); + } + else + { + if (!use_R_) + { + test_fespace_.GetProlongationMatrix()->AddMultTranspose(ly_, y, a); + } + else + { + test_fespace_.GetRestrictionMatrix()->AddMult(ly_, y, a); + } + } +} + +void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const +{ + if (RAP_) + { + RAP_->AddMultTranspose(x, y, a); + return; + } + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ParOperator::AddMultTranspose!"); + if (test_dbc_tdof_list_) + { + ty_ = x; + ty_.SetSubVector(*test_dbc_tdof_list_, 0.0); + } + if (!use_R_) + { + test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? ty_ : x, ly_); + } + else + { + test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? ty_ : x, ly_); + } + + // Apply the operator on the L-vector. + A_->MultTranspose(ly_, lx_); + + if (trial_dbc_tdof_list_) + { + trial_fespace_.GetProlongationMatrix()->MultTranspose(lx_, tx_); + const int N = trial_dbc_tdof_list_->Size(); + auto idx = trial_dbc_tdof_list_->Read(); + auto TX = tx_.ReadWrite(); + if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) + { + const auto *X = x.Read(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TX[id] = X[id]; + }); + } + else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TX[id] = 0.0; + }); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + y.Add(a, tx_); + } + else + { + trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lx_, y, a); + } +} + +void DiagonalOperator::Mult(const Vector &x, Vector &y) const +{ + const int N = height; + const auto *D = d_.Read(); + const auto *X = x.Read(); + auto *Y = y.Write(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Y[i] = D[i] * X[i]; }); +} + +namespace linalg +{ + +double SpectralNorm(MPI_Comm comm, const Operator &A, bool sym, double tol, int max_it) +{ + // The SumOperator does not take ownership of A and allows the ComplexWrapperOperator + // to own its input. + ComplexWrapperOperator Ar(std::make_unique(A, 1.0), nullptr); + return SpectralNorm(comm, Ar, sym, tol, max_it); +} + +double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm, double tol, + int max_it) +{ + // XX TODO: Use ARPACK or SLEPc for this when configured. + // Power iteration loop: ||A||₂² = λₙ(Aᴴ A). + int it = 0; + double res = 0.0; + double l, l0 = 0.0; + ComplexVector u(A.Height()), v(A.Height()); + SetRandom(comm, u); + Normalize(comm, u); + while (it < max_it) + { + A.Mult(u, v); + if (herm) + { + u = v; + } + else + { + A.MultHermitianTranspose(v, u); + } + l = Normalize(comm, u); + if (it > 0) + { + res = std::abs(l - l0) / l0; + if (res < tol) + { + break; + } + } + l0 = l; + it++; + } + if (it >= max_it) + { + Mpi::Warning(comm, + "Power iteration did not converge in {:d} iterations, res = {:.3e}, " + "lambda = {:.3e}!\n", + it, res, l); + } + return herm ? l : std::sqrt(l); +} + +} // namespace linalg + +} // namespace palace diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp new file mode 100644 index 000000000..202160945 --- /dev/null +++ b/palace/linalg/operator.hpp @@ -0,0 +1,235 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_OPERATOR_HPP +#define PALACE_LINALG_OPERATOR_HPP + +#include +#include +#include +#include +#include "linalg/vector.hpp" + +namespace palace +{ + +class ComplexOperator; + +using Operator = mfem::Operator; + +// +// Derived operator classes extending mfem::Operator from MFEM. +// + +// A parallel operator represented by RᵀAP constructed through the actions of Rᵀ, A, and P +// with possible eliminated essential BC. +class ParOperator : public Operator +{ +private: + std::unique_ptr A_; + const mfem::ParFiniteElementSpace &trial_fespace_, &test_fespace_; + const bool use_R_; + + // Lists of constrained essential boundary true dofs for elimination. + mutable const mfem::Array *trial_dbc_tdof_list_, *test_dbc_tdof_list_; + + // Diagonal policy for constrained true dofs. + DiagonalPolicy diag_policy_; + + // Assembled operator as a parallel Hypre matrix. + std::unique_ptr RAP_; + + // Temporary storage for operator application. + mutable Vector lx_, ly_, tx_, ty_; + +public: + // Construct the parallel operator, inheriting ownership of the local operator. + ParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict = false); + + // Get access to the underlying local (L-vector) operator. + const Operator &GetOperator() const + { + MFEM_VERIFY(A_, "No local matrix available for ParOperator::GetOperator!"); + return *A_; + } + + // Set essential boundary condition true dofs for square operators. + void SetEssentialTrueDofs(const mfem::Array &dbc_tdof_list, + DiagonalPolicy diag_policy) + { + MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " + "for rectangular ParOperator!"); + trial_dbc_tdof_list_ = &dbc_tdof_list; + test_dbc_tdof_list_ = &dbc_tdof_list; + diag_policy_ = diag_policy; + } + + // Set essential boundary condition true dofs for rectangular operators. + void SetEssentialTrueDofs(const mfem::Array &trial_dbc_tdof_list, + const mfem::Array &test_dbc_tdof_list, + DiagonalPolicy diag_policy) + { + MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ZERO, + "Essential boundary condition true dof elimination for rectangular " + "ParOperator only supports DiagonalPolicy::DIAG_ZERO!"); + trial_dbc_tdof_list_ = &trial_dbc_tdof_list; + test_dbc_tdof_list_ = &test_dbc_tdof_list; + diag_policy_ = diag_policy; + } + + // Get the essential boundary condition true dofs associated with the operator. May be + // nullptr. + void GetEssentialTrueDofs(const mfem::Array *&trial_dbc_tdof_list, + const mfem::Array *&test_dbc_tdof_list) + { + trial_dbc_tdof_list = trial_dbc_tdof_list_; + test_dbc_tdof_list = test_dbc_tdof_list_; + } + + // Eliminate essential true dofs from the RHS vector b, using the essential boundary + // condition values in x. + void EliminateRHS(const Vector &x, Vector &b) const; + + // Assemble the diagonal for the parallel operator. + void AssembleDiagonal(Vector &diag) const override; + + // Assemble the operator as a parallel sparse matrix. + mfem::HypreParMatrix &ParallelAssemble(); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return trial_fespace_.GetComm(); } + + void Mult(const Vector &x, Vector &y) const override + { + if (RAP_) + { + RAP_->Mult(x, y); + return; + } + y = 0.0; + AddMult(x, y); + } + + void MultTranspose(const Vector &x, Vector &y) const override + { + if (RAP_) + { + RAP_->MultTranspose(x, y); + return; + } + y = 0.0; + AddMultTranspose(x, y); + } + + void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override; + + void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override; +}; + +// Wrap a sequence of operators of the same dimensions and optional coefficients. +class SumOperator : public Operator +{ +private: + std::vector> ops_; + +public: + SumOperator(int s) : Operator(s) {} + SumOperator(int h, int w) : Operator(h, w) {} + SumOperator(const Operator &op, double c = 1.0) : Operator(op.Height(), op.Width()) + { + AddOperator(op, c); + } + + void AddOperator(const Operator &op, double c = 1.0) + { + MFEM_VERIFY(op.Height() == height && op.Width() == width, + "Invalid Operator dimensions for SumOperator!"); + ops_.emplace_back(&op, c); + } + + void Mult(const Vector &x, Vector &y) const override + { + y = 0.0; + AddMult(x, y); + } + + void MultTranspose(const Vector &x, Vector &y) const override + { + y = 0.0; + AddMultTranspose(x, y); + } + + void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override + { + for (const auto &[op, c] : ops_) + { + op->AddMult(x, y, a * c); + } + } + + void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override + { + for (const auto &[op, c] : ops_) + { + op->AddMultTranspose(x, y, a * c); + } + } +}; + +// Wraps two symmetric operators such that: (AB)ᵀ = BᵀAᵀ = BA. +class SymmetricProductOperator : public Operator +{ +private: + const Operator &A_, &B_; + mutable Vector z_; + +public: + SymmetricProductOperator(const Operator &A, const Operator &B) + : Operator(A.Height(), B.Width()), A_(A), B_(B), z_(B_.Height()) + { + } + + void Mult(const Vector &x, Vector &y) const override + { + B_.Mult(x, z_); + A_.Mult(z_, y); + } + + void MultTranspose(const Vector &x, Vector &y) const override + { + A_.Mult(x, z_); + B_.Mult(z_, y); + } +}; + +// Applies the simple (symmetric) operator: diag(d). +class DiagonalOperator : public Operator +{ +private: + const Vector &d_; + +public: + DiagonalOperator(const Vector &d) : Operator(d.Size()), d_(d) {} + + void Mult(const Vector &x, Vector &y) const override; + + void MultTranspose(const Vector &x, Vector &y) const override { Mult(x, y); } +}; + +namespace linalg +{ + +// Estimate operator 2-norm (spectral norm) using power iteration. Assumes the operator is +// not symmetric or Hermitian unless specified. +double SpectralNorm(MPI_Comm comm, const Operator &A, bool sym = false, double tol = 1.0e-4, + int max_it = 100); +double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm = false, + double tol = 1.0e-4, int max_it = 100); + +} // namespace linalg + +} // namespace palace + +#endif // PALACE_LINALG_OPERATOR_HPP diff --git a/palace/linalg/petsc.cpp b/palace/linalg/petsc.cpp index 0b556b188..6b4271b4b 100644 --- a/palace/linalg/petsc.cpp +++ b/palace/linalg/petsc.cpp @@ -1518,6 +1518,9 @@ void PetscDenseMatrix::SetRandomSign(PetscInt start, PetscInt end, bool init) PetscReal PetscDenseMatrix::OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2) { + + // XX TODO MOVE INTO REAL-VALUED BASIS CLASS... + auto Dot = [](const PetscParVector &v, const PetscParVector &w) -> PetscScalar { return v.Dot(w); }; auto VecDot = [](const PetscParVector &v, const PetscParMatrix &A, diff --git a/palace/linalg/petsc.hpp b/palace/linalg/petsc.hpp index fb9eddf2d..5aaf29db8 100644 --- a/palace/linalg/petsc.hpp +++ b/palace/linalg/petsc.hpp @@ -150,7 +150,7 @@ class PetscParVector // Zero all entries of the vector. void SetZero(); - // Sets all entries of the vector to random numbers sampled from the range[-1-i, 1+i], or + // Sets all entries of the vector to random numbers sampled from the range [-1-i, 1+i], or // [-1, 1]. void SetRandom(); #if defined(PETSC_USE_COMPLEX) @@ -585,7 +585,7 @@ class PetscDenseMatrix : public PetscParMatrix void RestoreArray(PetscScalar *data); void RestoreArrayRead(const PetscScalar *data) const; - // Sets all entries of the vector to random numbers sampled from the range[-1-i, 1+i], or + // Sets all entries of the matrix to random numbers sampled from the range [-1-i, 1+i], or // [-1, 1]. void SetRandom(PetscInt start = -1, PetscInt end = -1); #if defined(PETSC_USE_COMPLEX) diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index b118e8667..8b21a9bbe 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -50,6 +50,9 @@ PetscReal GetMaxSingularValue(const petsc::PetscParMatrix &A, PetscReal tol, // is not Hermitian). PetscInt nconv; PetscReal sigma; + + // XX TODO ADDRESS GetHermitian + if (A.GetHermitian()) // Returns true if symmetric and not PETSC_USE_COMPLEX { EPS eps; diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp new file mode 100644 index 000000000..a5db9c6a0 --- /dev/null +++ b/palace/linalg/vector.cpp @@ -0,0 +1,83 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "vector.hpp" + +#include +#include "linalg/complex.hpp" +#include "linalg/operator.hpp" +#include "utils/communication.hpp" + +namespace palace::linalg +{ + +HYPRE_BigInt GlobalSize(MPI_Comm comm, const Vector &x) +{ + HYPRE_BigInt N = x.Size(); + Mpi::GlobalSum(1, &N, comm); + return N; +} + +void SetRandom(MPI_Comm comm, Vector &x, int seed) +{ + seed *= Mpi::Rank(comm) + 1; + x.Randomize(seed); +} + +void SetRandomSign(MPI_Comm comm, Vector &x, int seed) +{ + SetRandom(comm, x, seed); + const int N = x.Size(); + auto *X = x.ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { X[i] = (X[i] > 0.0) ? 1.0 : ((X[i] < 0.0) ? -1.0 : 0.0); }); +} + +double Norml2(MPI_Comm comm, const Vector &x) +{ + return std::sqrt(mfem::InnerProduct(comm, x, x)); +} + +double Normlinf(MPI_Comm comm, const Vector &x) +{ + double norm = x.Normlinf(); + Mpi::GlobalMax(1, &norm, comm); + return norm; +} + +double Norml1(MPI_Comm comm, const Vector &x) +{ + double norm = x.Norml1(); + Mpi::GlobalSum(1, &norm, comm); + return norm; +} + +double Normalize(MPI_Comm comm, Vector &x) +{ + double norm = Norml2(comm, x); + MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + x *= 1.0 / norm; + return norm; +} + +double Normalize(MPI_Comm comm, Vector &x, const ParOperator &B, Vector &Bx) +{ + B.Mult(x, Bx); + double norm = std::sqrt(mfem::InnerProduct(comm, x, Bx)); + MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + x *= 1.0 / norm; + return norm; +} + +double Normalize(MPI_Comm comm, ComplexVector &x, const ParOperator &B, ComplexVector &Bx) +{ + // For SPD B, xᴴ B x is real. + B.Mult(x.Real(), Bx.Real()); + B.Mult(x.Imag(), Bx.Imag()); + double norm = std::sqrt(mfem::InnerProduct(comm, x, Bx)); + MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + x *= 1.0 / norm; + return norm; +} + +} // namespace palace::linalg diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp new file mode 100644 index 000000000..5eef970ea --- /dev/null +++ b/palace/linalg/vector.hpp @@ -0,0 +1,50 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_VECTOR_HPP +#define PALACE_LINALG_VECTOR_HPP + +#include +#include + +namespace palace +{ + +class ComplexVector; +class ParOperator; + +using Vector = mfem::Vector; + +namespace linalg +{ + +// +// Basic functions for parallel vectors distributed across MPI processes. +// + +// Returns the global vector size. +HYPRE_BigInt GlobalSize(MPI_Comm comm, const Vector &x); + +// Sets all entries of the vector to random numbers sampled from the [-1, 1]. +void SetRandom(MPI_Comm comm, Vector &x, int seed = 0); +void SetRandomSign(MPI_Comm comm, Vector &x, int seed = 0); + +// Calculate the vector 2-norm. +double Norml2(MPI_Comm comm, const Vector &x); + +// Calculate the vector infinity-norm. +double Normlinf(MPI_Comm comm, const Vector &x); + +// Calculate the vector 1-norm. +double Norml1(MPI_Comm comm, const Vector &x); + +// Normalize the vector, possibly with respect to an SPD matrix B. +double Normalize(MPI_Comm comm, Vector &x); +double Normalize(MPI_Comm comm, Vector &x, const ParOperator &B, Vector &Bx); +double Normalize(MPI_Comm comm, ComplexVector &x, const ParOperator &B, ComplexVector &Bx); + +} // namespace linalg + +} // namespace palace + +#endif // PALACE_LINALG_VECTOR_HPP diff --git a/palace/main.cpp b/palace/main.cpp index ed7583a37..d6344dbda 100644 --- a/palace/main.cpp +++ b/palace/main.cpp @@ -12,7 +12,7 @@ #include "drivers/electrostaticsolver.hpp" #include "drivers/magnetostaticsolver.hpp" #include "drivers/transientsolver.hpp" -#include "linalg/petsc.hpp" +#include "linalg/petsc.hpp" //XX TODO REMOVE ONLY SLEPC... #include "linalg/slepc.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index 801c27bff..f5e085c3a 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -6,7 +6,6 @@ #include "fem/coefficient.hpp" #include "fem/integrator.hpp" #include "fem/multigrid.hpp" -#include "fem/operator.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -70,22 +69,24 @@ mfem::Array SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe CurlCurlOperator::CurlCurlOperator(const IoData &iodata, const std::vector> &mesh) - : dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), skip_zeros(0), - pc_gmg(iodata.solver.linear.mat_gmg), print_hdr(true), + : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL + : mfem::AssemblyLevel::LEGACY), + skip_zeros(0), pc_gmg(iodata.solver.linear.mat_gmg), print_hdr(true), + dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), nd_fecs(utils::ConstructFECollections( pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), h1_fec(iodata.solver.order, mesh.back()->Dimension()), rt_fec(iodata.solver.order - 1, mesh.back()->Dimension()), - nd_fespaces( - pc_gmg - ? utils::ConstructFiniteElementSpaceHierarchy(mesh, nd_fecs, dbc_marker) - : utils::ConstructFiniteElementSpaceHierarchy(*mesh.back(), *nd_fecs.back())), + nd_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, nd_fecs, &dbc_marker, &dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *nd_fecs.back(), &dbc_marker, + &dbc_tdof_lists.emplace_back())), h1_fespace(mesh.back().get(), &h1_fec), rt_fespace(mesh.back().get(), &rt_fec), - mat_op(iodata, *mesh.back()), surf_j_op(iodata, h1_fespace) + mat_op(iodata, *mesh.back()), surf_j_op(iodata, GetH1Space()) { // Finalize setup. CheckBoundaryProperties(); - nd_fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker, dbc_tdof_list); // Print essential BC information. if (dbc_marker.Max() > 0) @@ -106,57 +107,55 @@ void CurlCurlOperator::CheckBoundaryProperties() } } -void CurlCurlOperator::PrintHeader() +void CurlCurlOperator::GetStiffnessMatrix(std::vector> &K) { if (print_hdr) { - Mpi::Print("\nConfiguring system matrices, number of global unknowns: {:d}\n", - nd_fespaces.GetFinestFESpace().GlobalTrueVSize()); - print_hdr = false; + Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" + " ND: {:d}\n RT: {:d}\n", + GetNDSpace().GlobalTrueVSize(), GetRTSpace().GlobalTrueVSize()); + Mpi::Print("\nAssembling multigrid hierarchy:\n"); } -} - -void CurlCurlOperator::GetStiffnessMatrix(std::vector> &K) -{ K.clear(); K.reserve(nd_fespaces.GetNumLevels()); for (int l = 0; l < nd_fespaces.GetNumLevels(); l++) { auto &nd_fespace_l = nd_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - nd_fespace_l.GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - - MaterialPropertyCoefficient muinv_func(mat_op); - mfem::ParBilinearForm k(&nd_fespace_l); - k.AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); - // k.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - k.Assemble(skip_zeros); - k.Finalize(skip_zeros); - mfem::HypreParMatrix *hK = k.ParallelAssemble(); - hK->EliminateBC(dbc_tdof_list_l, mfem::Operator::DiagonalPolicy::DIAG_ONE); - PrintHeader(); + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; + MaterialPropertyCoefficient muinv_func(mat_op); + auto k = std::make_unique(&nd_fespace_l); + k->AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); + k->SetAssemblyLevel(assembly_level); + k->Assemble(skip_zeros); + k->Finalize(skip_zeros); + if (print_hdr) { - std::string str = ""; - if (pc_gmg) + Mpi::Print(" Level {:d}: {:d} unknowns", l, nd_fespace_l.GlobalTrueVSize()); + if (assembly_level == mfem::AssemblyLevel::LEGACY) + { + HYPRE_BigInt nnz = k->SpMat().NumNonZeroElems(); + Mpi::GlobalSum(1, &nnz, nd_fespace_l.GetComm()); + Mpi::Print(", {:d} NNZ\n", nnz); + } + else { - str = - fmt::format(" (Level {:d}, {:d} unknowns)", l, nd_fespace_l.GlobalTrueVSize()); + Mpi::Print("\n"); } - Mpi::Print(" K{}: NNZ = {:d}, norm = {:e}\n", str, hK->NNZ(), - hypre_ParCSRMatrixFnorm(*hK)); } - K.emplace_back(hK); + K.push_back(std::make_unique(std::move(k), nd_fespace_l, nd_fespace_l)); + K.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); } + print_hdr = false; } -std::unique_ptr CurlCurlOperator::GetCurlMatrix() +std::unique_ptr CurlCurlOperator::GetCurlMatrix() { - mfem::ParDiscreteLinearOperator curl(&nd_fespaces.GetFinestFESpace(), &rt_fespace); - curl.AddDomainInterpolator(new mfem::CurlInterpolator); - // curl.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - curl.Assemble(); - curl.Finalize(); - return std::unique_ptr(curl.ParallelAssemble()); + auto curl = std::make_unique(&GetNDSpace(), &GetRTSpace()); + curl->AddDomainInterpolator(new mfem::CurlInterpolator); + curl->SetAssemblyLevel(assembly_level); + curl->Assemble(); + curl->Finalize(); + return std::make_unique(std::move(curl), GetNDSpace(), GetRTSpace(), true); } void CurlCurlOperator::GetExcitationVector(int idx, mfem::Vector &RHS) @@ -164,21 +163,20 @@ void CurlCurlOperator::GetExcitationVector(int idx, mfem::Vector &RHS) // Assemble the surface current excitation +J. The SurfaceCurrentOperator assembles -J // (meant for time or frequency domain Maxwell discretization, so we multiply by -1 to // retrieve +J). - SumVectorCoefficient fb(nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension()); + SumVectorCoefficient fb(GetNDSpace().GetParMesh()->SpaceDimension()); surf_j_op.AddExcitationBdrCoefficients(idx, fb); - RHS.SetSize(nd_fespaces.GetFinestFESpace().GetTrueVSize()); + RHS.SetSize(GetNDSpace().GetTrueVSize()); RHS = 0.0; if (fb.empty()) { return; } - mfem::ParLinearForm rhs(&nd_fespaces.GetFinestFESpace()); + mfem::LinearForm rhs(&GetNDSpace()); rhs.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - rhs.UseFastAssembly(true); + rhs.UseFastAssembly(false); rhs.Assemble(); - rhs.ParallelAssemble(RHS); - RHS.Neg(); - RHS.SetSubVector(dbc_tdof_list, 0.0); + GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs, RHS, -1.0); + RHS.SetSubVector(dbc_tdof_lists.back(), 0.0); } } // namespace palace diff --git a/palace/models/curlcurloperator.hpp b/palace/models/curlcurloperator.hpp index 26a65e865..aab302661 100644 --- a/palace/models/curlcurloperator.hpp +++ b/palace/models/curlcurloperator.hpp @@ -7,6 +7,8 @@ #include #include #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/materialoperator.hpp" #include "models/surfacecurrentoperator.hpp" @@ -21,17 +23,17 @@ class IoData; class CurlCurlOperator { private: - // Essential boundary condition markers. - mfem::Array dbc_marker, dbc_tdof_list; - void CheckBoundaryProperties(); + const mfem::AssemblyLevel assembly_level; // Use full or partial assembly for operators + const int skip_zeros; // Skip zeros during full assembly of operators + const bool pc_gmg; // Use geometric multigrid in preconditioning - // Options for system matrix assembly. - const int skip_zeros; // Whether to skip the zeros during assembly of operators - const bool pc_gmg; // Whether to use geometric multigrid in preconditioning - - // Helper variable and function for log file printing. + // Helper variable for log file printing. bool print_hdr; - void PrintHeader(); + + // Essential boundary condition markers. + mfem::Array dbc_marker; + std::vector> dbc_tdof_lists; + void CheckBoundaryProperties(); // Objects defining the finite element spaces for the magnetic vector potential // (Nedelec) and magnetic flux density (Raviart-Thomas) on the given mesh. The H1 spaces @@ -52,10 +54,6 @@ class CurlCurlOperator CurlCurlOperator(const IoData &iodata, const std::vector> &mesh); - // Returns array marking Dirichlet BC attributes and local subdomain vdofs. - const auto &GetDbcMarker() const { return dbc_marker; } - const auto &GetDbcTDofList() const { return dbc_tdof_list; } - // Return material operator for postprocessing. const MaterialOperator &GetMaterialOp() const { return mat_op; } @@ -70,14 +68,14 @@ class CurlCurlOperator // Construct and return system matrix representing discretized curl-curl operator for // Ampere's law. - void GetStiffnessMatrix(std::vector> &K); + void GetStiffnessMatrix(std::vector> &K); // Construct and return the discrete curl matrix. - std::unique_ptr GetCurlMatrix(); + std::unique_ptr GetCurlMatrix(); // Assemble the right-hand side source term vector for a current source applied on // specified excited boundaries. - void GetExcitationVector(int idx, mfem::Vector &RHS); + void GetExcitationVector(int idx, Vector &RHS); }; } // namespace palace diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp index 321c9a886..468c9c755 100644 --- a/palace/models/domainpostoperator.cpp +++ b/palace/models/domainpostoperator.cpp @@ -129,7 +129,7 @@ double DomainPostOperatorMF::GetVolumeIntegral(mfem::Coefficient &f) const // Integrate the coefficient over the entire domain. mfem::ParLinearForm s(ones.ParFESpace()); s.AddDomainIntegrator(new DomainLFIntegrator(f)); - s.UseFastAssembly(true); + s.UseFastAssembly(false); s.Assemble(); return s(ones); } @@ -140,7 +140,7 @@ double DomainPostOperatorMF::GetVolumeIntegral(mfem::Coefficient &f, // Integrate the coefficient over the domain attributes making up this domain index. mfem::ParLinearForm s(ones.ParFESpace()); s.AddDomainIntegrator(new DomainLFIntegrator(f), attr_marker); - s.UseFastAssembly(true); + s.UseFastAssembly(false); s.Assemble(); return s(ones); } @@ -157,8 +157,9 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera // E_elec = 1/2 Re{∫_Ω Dᴴ E dV} as (M_eps * e)ᴴ e. // Only the real part of the permeability contributes to the energy (imaginary part // cancels out in the inner product due to symmetry). - MaterialPropertyCoefficient epsilon_func( - mat_op); + constexpr MaterialPropertyType MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; + constexpr MaterialPropertyType MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; + MaterialPropertyCoefficient epsilon_func(mat_op); m0ND->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); // m0ND->SetAssemblyLevel(mfem::AssemblyLevel::FULL); m0ND->Assemble(); @@ -178,13 +179,10 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera SumMatrixCoefficient epsilon_func_r(nd_fespace->GetParMesh()->SpaceDimension()); SumMatrixCoefficient epsilon_func_i(nd_fespace->GetParMesh()->SpaceDimension()); epsilon_func_r.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>(mat_op), + std::make_unique>(mat_op), attr_marker); epsilon_func_i.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>(mat_op, - -1.0), + std::make_unique>(mat_op, -1.0), attr_marker); auto &m0 = m0NDi.emplace(idx, std::make_pair(nd_fespace, nd_fespace)).first->second; mfem::ParBilinearForm &m0r = m0.first; @@ -204,7 +202,8 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera { // Construct RT mass matrix to compute the magnetic field energy integral as: // E_mag = 1/2 Re{∫_Ω Bᴴ H dV} as (M_muinv * b)ᴴ b. - MaterialPropertyCoefficient muinv_func(mat_op); + constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; + MaterialPropertyCoefficient muinv_func(mat_op); m0RT->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func)); // m0RT->SetAssemblyLevel(mfem::AssemblyLevel::FULL); m0RT->Assemble(); diff --git a/palace/models/farfieldboundaryoperator.cpp b/palace/models/farfieldboundaryoperator.cpp index c1a009d84..3a77d1595 100644 --- a/palace/models/farfieldboundaryoperator.cpp +++ b/palace/models/farfieldboundaryoperator.cpp @@ -69,10 +69,9 @@ void FarfieldBoundaryOperator::AddDampingBdrCoefficients(double coef, // First-order absorbing boundary condition. if (farfield_marker.Max() > 0) { - fb.AddCoefficient( - std::make_unique>(mat_op, - coef), - farfield_marker); + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_Z0; + fb.AddCoefficient(std::make_unique>(mat_op, coef), + farfield_marker); } } @@ -88,11 +87,10 @@ void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(double omega, // does as well. if (farfield_marker.Max() > 0 && order > 1) { + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY_C0; dfbi.AddCoefficient( std::make_unique( - std::make_unique< - MaterialPropertyCoefficient>( - mat_op, 0.5 / omega)), + std::make_unique>(mat_op, 0.5 / omega)), farfield_marker); } } diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index 5e3a78bdf..054d25cc9 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -5,7 +5,6 @@ #include "fem/coefficient.hpp" #include "fem/multigrid.hpp" -#include "fem/operator.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -113,21 +112,21 @@ std::map> ConstructSources(const IoData &iodata) LaplaceOperator::LaplaceOperator(const IoData &iodata, const std::vector> &mesh) - : dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), skip_zeros(0), - pc_gmg(iodata.solver.linear.mat_gmg), print_hdr(true), + : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL + : mfem::AssemblyLevel::LEGACY), + skip_zeros(0), pc_gmg(iodata.solver.linear.mat_gmg), print_hdr(true), + dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), h1_fecs(utils::ConstructFECollections( pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), nd_fec(iodata.solver.order, mesh.back()->Dimension()), - h1_fespaces( - pc_gmg - ? utils::ConstructFiniteElementSpaceHierarchy(mesh, h1_fecs, dbc_marker) - : utils::ConstructFiniteElementSpaceHierarchy(*mesh.back(), *h1_fecs.back())), + h1_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, h1_fecs, &dbc_marker, &dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *h1_fecs.back(), &dbc_marker, + &dbc_tdof_lists.emplace_back())), nd_fespace(mesh.back().get(), &nd_fec), mat_op(iodata, *mesh.back()), source_attr_lists(ConstructSources(iodata)) { - // Finalize setup. - h1_fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker, dbc_tdof_list); - // Print essential BC information. if (dbc_marker.Max() > 0) { @@ -136,73 +135,63 @@ LaplaceOperator::LaplaceOperator(const IoData &iodata, } } -void LaplaceOperator::PrintHeader() +void LaplaceOperator::GetStiffnessMatrix(std::vector> &K) { if (print_hdr) { - Mpi::Print("\nConfiguring system matrices, number of global unknowns: {:d}\n", - h1_fespaces.GetFinestFESpace().GlobalTrueVSize()); - print_hdr = false; + Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" + " H1: {:d}\n ND: {:d}\n", + GetH1Space().GlobalTrueVSize(), GetNDSpace().GlobalTrueVSize()); + Mpi::Print("\nAssembling multigrid hierarchy:\n"); } -} - -void LaplaceOperator::GetStiffnessMatrix(std::vector> &K, - std::vector> &Ke) -{ K.clear(); - Ke.clear(); K.reserve(h1_fespaces.GetNumLevels()); - Ke.reserve(h1_fespaces.GetNumLevels()); for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) { auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - h1_fespace_l.GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - - MaterialPropertyCoefficient epsilon_func( - mat_op); - mfem::ParBilinearForm k(&h1_fespace_l); - k.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); - // k.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - k.Assemble(skip_zeros); - k.Finalize(skip_zeros); - mfem::HypreParMatrix *hK = k.ParallelAssemble(); - mfem::HypreParMatrix *hKe = hK->EliminateRowsCols(dbc_tdof_list_l); - PrintHeader(); + constexpr MaterialPropertyType MatType = MaterialPropertyType::PERMITTIVITY_REAL; + MaterialPropertyCoefficient epsilon_func(mat_op); + auto k = std::make_unique(&h1_fespace_l); + k->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); + k->SetAssemblyLevel(assembly_level); + k->Assemble(skip_zeros); + k->Finalize(skip_zeros); + if (print_hdr) { - std::string str = ""; - if (pc_gmg) + Mpi::Print(" Level {:d}: {:d} unknowns", l, h1_fespace_l.GlobalTrueVSize()); + if (assembly_level == mfem::AssemblyLevel::LEGACY) + { + HYPRE_BigInt nnz = k->SpMat().NumNonZeroElems(); + Mpi::GlobalSum(1, &nnz, h1_fespace_l.GetComm()); + Mpi::Print(", {:d} NNZ\n", nnz); + } + else { - str = - fmt::format(" (Level {:d}, {:d} unknowns)", l, h1_fespace_l.GlobalTrueVSize()); + Mpi::Print("\n"); } - Mpi::Print(" K{}: NNZ = {:d}, norm = {:e}\n", str, hK->NNZ(), - hypre_ParCSRMatrixFnorm(*hK)); } - K.emplace_back(hK); - Ke.emplace_back(hKe); + K.push_back(std::make_unique(std::move(k), h1_fespace_l, h1_fespace_l)); + K.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); } + print_hdr = false; } -std::unique_ptr LaplaceOperator::GetNegGradMatrix() +std::unique_ptr LaplaceOperator::GetGradMatrix() { - mfem::ParDiscreteLinearOperator grad(&h1_fespaces.GetFinestFESpace(), &nd_fespace); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - std::unique_ptr NegGrad(grad.ParallelAssemble()); - *NegGrad *= -1.0; - return NegGrad; + auto grad = std::make_unique(&GetH1Space(), &GetNDSpace()); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(assembly_level); + grad->Assemble(); + grad->Finalize(); + return std::make_unique(std::move(grad), GetH1Space(), GetNDSpace(), true); } -void LaplaceOperator::GetExcitationVector(int idx, const mfem::Operator &K, - const mfem::Operator &Ke, mfem::Vector &X, +void LaplaceOperator::GetExcitationVector(int idx, const ParOperator &K, mfem::Vector &X, mfem::Vector &RHS) { // Apply the Dirichlet BCs to the solution vector: V = 1 on terminal boundaries with the // given index, V = 0 on all ground and other terminal boundaries. - mfem::ParGridFunction x(&h1_fespaces.GetFinestFESpace()); + mfem::ParGridFunction x(&GetH1Space()); x = 0.0; // Get a marker of all boundary attributes with the given source surface index. @@ -213,13 +202,12 @@ void LaplaceOperator::GetExcitationVector(int idx, const mfem::Operator &K, x.ProjectBdrCoefficient(one, source_marker); // Values are only correct on master // Eliminate the essential BC to get the RHS vector. - X.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); - RHS.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); + X.SetSize(GetH1Space().GetTrueVSize()); + RHS.SetSize(GetH1Space().GetTrueVSize()); X = 0.0; RHS = 0.0; x.ParallelProject(X); // Restrict to the true dofs - dynamic_cast(K).EliminateBC( - dynamic_cast(Ke), dbc_tdof_list, X, RHS); + K.EliminateRHS(X, RHS); } } // namespace palace diff --git a/palace/models/laplaceoperator.hpp b/palace/models/laplaceoperator.hpp index 30b9b601f..c1cfb1df7 100644 --- a/palace/models/laplaceoperator.hpp +++ b/palace/models/laplaceoperator.hpp @@ -8,6 +8,8 @@ #include #include #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/materialoperator.hpp" namespace palace @@ -21,16 +23,16 @@ class IoData; class LaplaceOperator { private: - // Essential boundary condition markers. - mfem::Array dbc_marker, dbc_tdof_list; - - // Options for system matrix assembly. - const int skip_zeros; // Whether to skip the zeros during assembly of operators - const bool pc_gmg; // Whether to use geometric multigrid in preconditioning + const mfem::AssemblyLevel assembly_level; // Use full or partial assembly for operators + const int skip_zeros; // Skip zeros during full assembly of operators + const bool pc_gmg; // Use geometric multigrid in preconditioning - // Helper variable and function for log file printing. + // Helper variable for log file printing. bool print_hdr; - void PrintHeader(); + + // Essential boundary condition markers. + mfem::Array dbc_marker; + std::vector> dbc_tdof_lists; // Objects defining the finite element spaces for the electrostatic potential (H1) and // electric field (Nedelec) on the given mesh. @@ -49,10 +51,6 @@ class LaplaceOperator LaplaceOperator(const IoData &iodata, const std::vector> &mesh); - // Returns array marking Dirichlet BC attributes and local subdomain vdofs. - const auto &GetDbcMarker() const { return dbc_marker; } - const auto &GetDbcTDofList() const { return dbc_tdof_list; } - // Return material operator for postprocessing. const MaterialOperator &GetMaterialOp() const { return mat_op; } @@ -66,16 +64,14 @@ class LaplaceOperator // Construct and return system matrix representing discretized Laplace operator for // Gauss's law. - void GetStiffnessMatrix(std::vector> &K, - std::vector> &Ke); + void GetStiffnessMatrix(std::vector> &K); - // Construct and return the discrete negative gradient matrix. - std::unique_ptr GetNegGradMatrix(); + // Construct and return the discrete gradient matrix. + std::unique_ptr GetGradMatrix(); // Assemble the solution boundary conditions and right-hand side vector for a nonzero // prescribed voltage on the specified surface index. - void GetExcitationVector(int idx, const mfem::Operator &K, const mfem::Operator &Ke, - mfem::Vector &X, mfem::Vector &RHS); + void GetExcitationVector(int idx, const ParOperator &K, Vector &X, Vector &RHS); }; } // namespace palace diff --git a/palace/models/lumpedportoperator.cpp b/palace/models/lumpedportoperator.cpp index acbd9717d..13dd47716 100644 --- a/palace/models/lumpedportoperator.cpp +++ b/palace/models/lumpedportoperator.cpp @@ -177,7 +177,7 @@ std::complex LumpedPortData::GetSParameter(mfem::ParComplexGridFunction } s = std::make_unique(&nd_fespace); s->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - s->UseFastAssembly(true); + s->UseFastAssembly(false); s->Assemble(); } return {(*s)(E.real()), (*s)(E.imag())}; @@ -201,7 +201,7 @@ double LumpedPortData::GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction } mfem::ParLinearForm p(&nd_fespace); p.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - p.UseFastAssembly(true); + p.UseFastAssembly(false); p.Assemble(); return p(E); } @@ -230,8 +230,8 @@ LumpedPortData::GetPower(mfem::ParComplexGridFunction &E, mfem::ParComplexGridFu mfem::ParLinearForm pr(&nd_fespace), pi(&nd_fespace); pr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr)); pi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi)); - pr.UseFastAssembly(true); - pi.UseFastAssembly(true); + pr.UseFastAssembly(false); + pi.UseFastAssembly(false); pr.Assemble(); pi.Assemble(); return {pr(E.real()) + pi(E.imag()), pr(E.imag()) - pi(E.real())}; @@ -258,7 +258,7 @@ double LumpedPortData::GetVoltage(mfem::ParGridFunction &E) const } v = std::make_unique(&nd_fespace); v->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - v->UseFastAssembly(true); + v->UseFastAssembly(false); v->Assemble(); } return (*v)(E); diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp index 443067841..3b635fba2 100644 --- a/palace/models/postoperator.cpp +++ b/palace/models/postoperator.cpp @@ -278,34 +278,35 @@ void PostOperator::InitializeDataCollection(const IoData &iodata) } } -void PostOperator::GetBField(std::complex omega, - const petsc::PetscParMatrix &NegCurl, - const petsc::PetscParVector &e, petsc::PetscParVector &b) -{ - // Compute B = -1/(iω) ∇ x E on the true dofs. - MFEM_VERIFY(e.GetSize() == NegCurl.Width() && b.GetSize() == NegCurl.Height(), - "Size mismatch error computing B-field in PostOperator!"); - NegCurl.Mult(e, b); - b.Scale(1.0 / (1i * omega)); -} - -void PostOperator::GetBField(const mfem::Operator &Curl, const mfem::Vector &a, - mfem::Vector &b) -{ - // Compute B = ∇ x A on the true dofs. - MFEM_VERIFY(a.Size() == Curl.Width() && b.Size() == Curl.Height(), - "Size mismatch error computing B-field in PostOperator!"); - Curl.Mult(a, b); -} - -void PostOperator::GetEField(const mfem::Operator &NegGrad, const mfem::Vector &v, - mfem::Vector &e) -{ - // Compute E = -∇V on the true dofs. - MFEM_VERIFY(v.Size() == NegGrad.Width() && e.Size() == NegGrad.Height(), - "Size mismatch error computing E-field in PostOperator!"); - NegGrad.Mult(v, e); -} +// //XX TODO REMOVE THESE +// void PostOperator::GetBField(std::complex omega, +// const petsc::PetscParMatrix &NegCurl, +// const petsc::PetscParVector &e, petsc::PetscParVector &b) +// { +// // Compute B = -1/(iω) ∇ x E on the true dofs. +// MFEM_VERIFY(e.GetSize() == NegCurl.Width() && b.GetSize() == NegCurl.Height(), +// "Size mismatch error computing B-field in PostOperator!"); +// NegCurl.Mult(e, b); +// b.Scale(1.0 / (1i * omega)); +// } + +// void PostOperator::GetBField(const mfem::Operator &Curl, const mfem::Vector &a, +// mfem::Vector &b) +// { +// // Compute B = ∇ x A on the true dofs. +// MFEM_VERIFY(a.Size() == Curl.Width() && b.Size() == Curl.Height(), +// "Size mismatch error computing B-field in PostOperator!"); +// Curl.Mult(a, b); +// } + +// void PostOperator::GetEField(const mfem::Operator &NegGrad, const mfem::Vector &v, +// mfem::Vector &e) +// { +// // Compute E = -∇V on the true dofs. +// MFEM_VERIFY(v.Size() == NegGrad.Width() && e.Size() == NegGrad.Height(), +// "Size mismatch error computing E-field in PostOperator!"); +// NegGrad.Mult(v, e); +// } void PostOperator::SetEGridFunction(const petsc::PetscParVector &e) { diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp index 88586ee97..9d8fa341e 100644 --- a/palace/models/postoperator.hpp +++ b/palace/models/postoperator.hpp @@ -86,19 +86,23 @@ class PostOperator bool HasE() const { return E.has_value(); } bool HasB() const { return B.has_value(); } - // Compute the magnetic flux density B in RT space from electric field solution E solution - // in ND space for the time-harmonic case: B = -1/(iω) ∇ x E. - static void GetBField(std::complex omega, const petsc::PetscParMatrix &NegCurl, - const petsc::PetscParVector &e, petsc::PetscParVector &b); - - // Compute the magnetic flux density B in RT space from the magnetic vector potential - // solution A in ND space: B = ∇ x A. - static void GetBField(const mfem::Operator &Curl, const mfem::Vector &a, mfem::Vector &b); - - // Compute the electric field E in ND space from the scalar potential solution V in H1 - // space: E = -∇V. - static void GetEField(const mfem::Operator &NegGrad, const mfem::Vector &v, - mfem::Vector &e); + // XX TODO REMOVE THESE + // // Compute the magnetic flux density B in RT space from electric field solution E + // solution + // // in ND space for the time-harmonic case: B = -1/(iω) ∇ x E. + // static void GetBField(std::complex omega, const petsc::PetscParMatrix + // &NegCurl, + // const petsc::PetscParVector &e, petsc::PetscParVector &b); + + // // Compute the magnetic flux density B in RT space from the magnetic vector potential + // // solution A in ND space: B = ∇ x A. + // static void GetBField(const mfem::Operator &Curl, const mfem::Vector &a, mfem::Vector + // &b); + + // // Compute the electric field E in ND space from the scalar potential solution V in H1 + // // space: E = -∇V. + // static void GetEField(const mfem::Operator &NegGrad, const mfem::Vector &v, + // mfem::Vector &e); // Populate the grid function solutions for the E- and B-field using the solution vectors // on the true dofs. For the real-valued overload, the electric scalar potential can be diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp index e369319f6..6d99bb419 100644 --- a/palace/models/romoperator.cpp +++ b/palace/models/romoperator.cpp @@ -3,10 +3,11 @@ #include "romoperator.hpp" +#if 0 // XX TODO DISABLE ROM FOR NOW + #include #include -#include "fem/freqdomain.hpp" -#include "fem/operator.hpp" +#include "linalg/operator.hpp" #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" @@ -61,7 +62,7 @@ RomOperator::RomOperator(const IoData &iodata, SpaceOperator &sp, int nmax) if (iodata.solver.driven.adaptive_metric_aposteriori) { constexpr int curlcurl_verbose = 0; - kspKM = std::make_unique( + kspKM = std::make_unique( spaceop.GetMaterialOp(), spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), spaceop.GetH1Spaces(), iodata.solver.linear.tol, iodata.solver.linear.max_it, curlcurl_verbose); @@ -153,13 +154,17 @@ void RomOperator::SolveHDM(double omega, petsc::PetscParVector &E, bool print) std::vector> P, AuxP; A2[step] = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::EXTRA, omega, mfem::Operator::DIAG_ZERO, print); + + //XX TODO FIX WITH SUM OPERATOR + auto A = utils::GetSystemMatrixShell(omega, *K, *M, C.get(), A2[step].get()); spaceop.GetPreconditionerMatrix(omega, P, AuxP, print); pc0->SetOperator(P, &AuxP); ksp0->SetOperator(*A); Mpi::Print("\n"); - spaceop.GetFreqDomainExcitationVector(omega, *R0); + spaceop.GetFreqDomainExcitationVector( + omega, *R0); // XX TODO ASSEMBLE PIECE WISE LIKE OPERATOR... E.SetZero(); ksp0->Mult(*R0, E); } @@ -532,3 +537,5 @@ void RomOperator::BVDotVecInternal(petsc::PetscDenseMatrix &V, petsc::PetscParVe } } // namespace palace + +#endif diff --git a/palace/models/romoperator.hpp b/palace/models/romoperator.hpp index 616d7ad4b..b5fc05acf 100644 --- a/palace/models/romoperator.hpp +++ b/palace/models/romoperator.hpp @@ -4,6 +4,8 @@ #ifndef PALACE_MODELS_ROM_OPERATOR_HPP #define PALACE_MODELS_ROM_OPERATOR_HPP +#if 0 // XX TODO DISABLE ROM FOR NOW + #include #include #include @@ -50,7 +52,7 @@ class RomOperator std::unique_ptr ksp; // Linear solver for inner product solves for error metric. - std::unique_ptr kspKM; + std::unique_ptr kspKM; std::unique_ptr opKM; // PROM reduced-order basis and parameter domain samplings. @@ -105,4 +107,6 @@ class RomOperator } // namespace palace +#endif + #endif // PALACE_MODELS_ROM_OPERATOR_HPP diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index c25404dbe..95e35573f 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -3,12 +3,8 @@ #include "spaceoperator.hpp" -#include -#include "fem/coefficient.hpp" #include "fem/integrator.hpp" #include "fem/multigrid.hpp" -#include "fem/operator.hpp" -#include "linalg/petsc.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -71,7 +67,7 @@ mfem::Array SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe } template -auto AddIntegrators(mfem::ParBilinearForm &a, T1 &df, T2 &f, T3 &dfb, T4 &fb) +auto AddIntegrators(mfem::BilinearForm &a, T1 &df, T2 &f, T3 &dfb, T4 &fb) { if (!df.empty()) { @@ -92,7 +88,7 @@ auto AddIntegrators(mfem::ParBilinearForm &a, T1 &df, T2 &f, T3 &dfb, T4 &fb) } template -auto AddAuxIntegrators(mfem::ParBilinearForm &a, T1 &f, T2 &fb) +auto AddAuxIntegrators(mfem::BilinearForm &a, T1 &f, T2 &fb) { if (!f.empty()) { @@ -108,32 +104,35 @@ auto AddAuxIntegrators(mfem::ParBilinearForm &a, T1 &f, T2 &fb) SpaceOperator::SpaceOperator(const IoData &iodata, const std::vector> &mesh) - : dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), skip_zeros(0), - pc_gmg(iodata.solver.linear.mat_gmg), pc_lor(iodata.solver.linear.mat_lor), - pc_shifted(iodata.solver.linear.mat_shifted), print_hdr(true), + : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL + : mfem::AssemblyLevel::LEGACY), + skip_zeros(0), pc_gmg(iodata.solver.linear.mat_gmg), + pc_lor(iodata.solver.linear.mat_lor), pc_shifted(iodata.solver.linear.mat_shifted), + print_hdr(true), print_prec_hdr(true), + dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), nd_fecs(utils::ConstructFECollections( pc_gmg, pc_lor, iodata.solver.order, mesh.back()->Dimension())), h1_fecs(utils::ConstructFECollections( pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), rt_fec(iodata.solver.order - 1, mesh.back()->Dimension()), nd_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, nd_fecs, dbc_marker) + mesh, nd_fecs, &dbc_marker, &nd_dbc_tdof_lists) : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *nd_fecs.back())), + *mesh.back(), *nd_fecs.back(), &dbc_marker, + &nd_dbc_tdof_lists.emplace_back())), h1_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, h1_fecs, dbc_marker) + mesh, h1_fecs, &dbc_marker, &h1_dbc_tdof_lists) : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *h1_fecs.back())), + *mesh.back(), *h1_fecs.back(), &dbc_marker, + &h1_dbc_tdof_lists.emplace_back())), rt_fespace(mesh.back().get(), &rt_fec), mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()), surf_sigma_op(iodata, *mesh.back()), - surf_z_op(iodata, *mesh.back()), lumped_port_op(iodata, h1_fespaces.GetFinestFESpace()), - wave_port_op(iodata, mat_op, nd_fespaces.GetFinestFESpace(), - h1_fespaces.GetFinestFESpace()), - surf_j_op(iodata, h1_fespaces.GetFinestFESpace()) + surf_z_op(iodata, *mesh.back()), lumped_port_op(iodata, GetH1Space()), + wave_port_op(iodata, mat_op, GetNDSpace(), GetH1Space()), + surf_j_op(iodata, GetH1Space()) { // Finalize setup. CheckBoundaryProperties(); - nd_fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker, dbc_tdof_list); // Print essential BC information. if (dbc_marker.Max() > 0) @@ -165,6 +164,11 @@ void SpaceOperator::CheckBoundaryProperties() // aux_bdr_marker = 1; // Mark all boundaries (including material interfaces // // added during mesh preprocessing) // // As tested, this does not eliminate all DC modes! + for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) + { + h1_fespaces.GetFESpaceAtLevel(l).GetEssentialTrueDofs( + aux_bdr_marker, aux_bdr_tdof_lists.emplace_back()); + } // A final check that no boundary attribute is assigned multiple boundary conditions. The // one exception is that a lumped port boundary attribute can be also be assigned some @@ -200,340 +204,355 @@ void SpaceOperator::CheckBoundaryProperties() } } -void SpaceOperator::PrintHeader() +std::unique_ptr +SpaceOperator::GetSystemMatrix(SpaceOperator::OperatorType type, + Operator::DiagonalPolicy diag_policy) { if (print_hdr) { - Mpi::Print("\nConfiguring system matrices, number of global unknowns: {:d}\n", - nd_fespaces.GetFinestFESpace().GlobalTrueVSize()); + Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" + " ND: {:d}\n H1: {:d}\n RT: {:d}\n", + GetNDSpace().GlobalTrueVSize(), GetH1Space().GlobalTrueVSize(), + GetRTSpace().GlobalTrueVSize()); print_hdr = false; } + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); + SumCoefficient dfb; + switch (type) + { + case OperatorType::STIFFNESS: + AddStiffnessCoefficients(1.0, df, f, fb); + break; + case OperatorType::DAMPING: + AddDampingCoefficients(1.0, f, fb); + case OperatorType::MASS: + AddRealMassCoefficients(1.0, f, fb); + break; + case OperatorType::EXTRA: + default: + MFEM_ABORT("Invalid GetSystemMatrix matrix type for HypreParMatrix output!"); + } + if (df.empty() && f.empty() && dfb.empty() && fb.empty()) + { + return {}; + } + auto a = std::make_unique(&GetNDSpace()); + AddIntegrators(*a, df, f, dfb, fb); + a->SetAssemblyLevel(assembly_level); + a->Assemble(skip_zeros); + a->Finalize(skip_zeros); + auto A = std::make_unique(std::move(a), GetNDSpace(), GetNDSpace()); + A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return A; } -std::unique_ptr -SpaceOperator::GetSystemMatrixPetsc(SpaceOperator::OperatorType type, double omega, - mfem::Operator::DiagonalPolicy ess_diag, bool print) +std::unique_ptr +SpaceOperator::GetComplexSystemMatrix(SpaceOperator::OperatorType type, double omega, + Operator::DiagonalPolicy diag_policy) { - // Construct the frequency-dependent complex linear system matrix: - // A = K + iω C - ω² (Mr + i Mi) + A2(ω) - // or any one of its terms. - const int sdim = nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension(); + if (print_hdr) + { + Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" + " ND: {:d}\n H1: {:d}\n RT: {:d}\n", + GetNDSpace().GlobalTrueVSize(), GetH1Space().GlobalTrueVSize(), + GetRTSpace().GlobalTrueVSize()); + print_hdr = false; + } + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); SumMatrixCoefficient dfr(sdim), dfi(sdim), fr(sdim), fi(sdim), fbr(sdim), fbi(sdim); SumCoefficient dfbr, dfbi; - std::string str; switch (type) { - case OperatorType::COMPLETE: - AddStiffnessCoefficients(1.0, dfr, fr, fbr); - AddDampingCoefficients(omega, fi, fbi); - AddRealMassCoefficients(-omega * omega, false, fr, fbr); - AddImagMassCoefficients(-omega * omega, fi, fbi); - AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi); - str = "A"; - break; case OperatorType::STIFFNESS: - MFEM_VERIFY(omega == 0.0, - "GetSystemMatrix for type OperatorType::STIFFNESS does not use omega " - "parameter!"); + MFEM_VERIFY(omega == 0.0, "GetComplexSystemMatrix for type OperatorType::STIFFNESS " + "does not use omega parameter!"); AddStiffnessCoefficients(1.0, dfr, fr, fbr); - str = "K"; - break; - case OperatorType::MASS: - MFEM_VERIFY( - omega == 0.0, - "GetSystemMatrix for type OperatorType::MASS does not use omega parameter!"); - AddRealMassCoefficients(1.0, false, fr, fbr); - AddImagMassCoefficients(1.0, fi, fbi); - str = "M"; break; case OperatorType::DAMPING: - MFEM_VERIFY( - omega == 0.0, - "GetSystemMatrix for type OperatorType::DAMPING does not use omega parameter!"); + MFEM_VERIFY(omega == 0.0, "GetComplexSystemMatrix for type OperatorType::DAMPING " + "does not use omega parameter!"); AddDampingCoefficients(1.0, fr, fbr); - str = "C"; + break; + case OperatorType::MASS: + MFEM_VERIFY(omega == 0.0, "GetComplexSystemMatrix for type OperatorType::MASS does " + "not use omega parameter!"); + AddRealMassCoefficients(1.0, fr, fbr); + AddImagMassCoefficients(1.0, fi, fbi); break; case OperatorType::EXTRA: + MFEM_VERIFY(omega > 0.0, + "GetComplexSystemMatrix for type OperatorType::EXTRA requires " + "use of omega parameter!"); AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi); - str = "A2"; break; } - std::unique_ptr hAr, hAi; bool has_real = false, has_imag = false; + std::unique_ptr ar, ai; if (!dfr.empty() || !fr.empty() || !dfbr.empty() || !fbr.empty()) { has_real = true; - mfem::ParBilinearForm a(&nd_fespaces.GetFinestFESpace()); - AddIntegrators(a, dfr, fr, dfbr, fbr); - // a.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - a.Assemble(skip_zeros); - a.Finalize(skip_zeros); - hAr.reset(a.ParallelAssemble()); - hAr->EliminateBC(dbc_tdof_list, ess_diag); + ar = std::make_unique(&GetNDSpace()); + AddIntegrators(*ar, dfr, fr, dfbr, fbr); + ar->SetAssemblyLevel(assembly_level); + ar->Assemble(skip_zeros); + ar->Finalize(skip_zeros); } if (!dfi.empty() || !fi.empty() || !dfbi.empty() || !fbi.empty()) { has_imag = true; - mfem::ParBilinearForm a(&nd_fespaces.GetFinestFESpace()); - AddIntegrators(a, dfi, fi, dfbi, fbi); - // a.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - a.Assemble(skip_zeros); - a.Finalize(skip_zeros); - hAi.reset(a.ParallelAssemble()); - hAi->EliminateBC(dbc_tdof_list, mfem::Operator::DiagonalPolicy::DIAG_ZERO); + ai = std::make_unique(&GetNDSpace()); + AddIntegrators(*ai, dfi, fi, dfbi, fbi); + ai->SetAssemblyLevel(assembly_level); + ai->Assemble(skip_zeros); + ai->Finalize(skip_zeros); } if (!has_real && !has_imag) { return {}; } - auto A = std::make_unique( - nd_fespaces.GetFinestFESpace().GetComm(), std::move(hAr), std::move(hAi)); - if (!has_imag) + auto A = std::make_unique( + std::make_unique(std::move(ar), std::move(ai)), GetNDSpace(), + GetNDSpace()); + A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return A; +} + +std::unique_ptr SpaceOperator::GetSystemMatrix(double a0, double a1, double a2, + const ParOperator *K, + const ParOperator *C, + const ParOperator *M) +{ + int height = -1, width = -1; + if (K) + { + height = K->Height(); + width = K->Width(); + } + else if (C) { - A->SetRealSymmetric(); + height = C->Height(); + width = C->Width(); } - else + else if (M) { - A->SetSymmetric(); + height = M->Height(); + width = M->Width(); } - - // Print some information. - PrintHeader(); - if (print) + MFEM_VERIFY(height >= 0 && width >= 0, + "At least one argument to GetSystemMatrix must not be empty!"); + auto sum = std::make_unique(height, width); + if (K && a0 != 0.0) { - if (has_real && has_imag) - { - Mpi::Print(" Re{{{}}}: NNZ = {:d}, norm = {:e}\n Im{{{}}}: NNZ = {:d}, norm = {:e}\n", - str, A->NNZReal(), A->NormFReal(), str, A->NNZImag(), A->NormFImag()); - } - else - { - Mpi::Print(" {}: NNZ = {:d}, norm = {:e}\n", str, - has_real ? A->NNZReal() : A->NNZImag(), - has_real ? A->NormFReal() : A->NormFImag()); - } + sum->AddOperator(K->GetOperator(), a0); } + if (C && a1 != 0.0) + { + sum->AddOperator(C->GetOperator(), a1); + } + if (M && a2 != 0.0) + { + sum->AddOperator(M->GetOperator(), a2); + } + auto A = std::make_unique(std::move(sum), GetNDSpace(), GetNDSpace()); + A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); return A; } -std::unique_ptr -SpaceOperator::GetSystemMatrix(SpaceOperator::OperatorType type, double omega, - mfem::Operator::DiagonalPolicy ess_diag, bool print) +std::unique_ptr SpaceOperator::GetComplexSystemMatrix( + std::complex a0, std::complex a1, std::complex a2, + const ComplexParOperator *K, const ComplexParOperator *C, const ComplexParOperator *M, + const ComplexParOperator *A2) { - // Construct the frequency-dependent complex linear system matrix: - // A = K + iω C - ω² (Mr + i Mi) + A2(ω) - // or any subset of its terms. For output as a HypreParMatrix, only some of - // the terms are available. - MFEM_VERIFY(omega == 0.0, - "GetSystemMatrix for HypreParMatrix does not use omega parameter!"); - const int sdim = nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension(); - SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); - SumCoefficient dfb; - std::string str; - switch (type) + int height = -1, width = -1; + if (K) { - case OperatorType::STIFFNESS: - AddStiffnessCoefficients(1.0, df, f, fb); - str = "K"; - break; - case OperatorType::MASS: - AddRealMassCoefficients(1.0, false, f, fb); - str = "M"; - break; - case OperatorType::DAMPING: - AddDampingCoefficients(1.0, f, fb); - str = "C"; - break; - case OperatorType::COMPLETE: - case OperatorType::EXTRA: - MFEM_ABORT("Invalid GetSystemMatrix matrix type for HypreParMatrix output!"); + height = K->Height(); + width = K->Width(); } - if (df.empty() && f.empty() && fb.empty()) + else if (C) { - return {}; + height = C->Height(); + width = C->Width(); } - mfem::ParBilinearForm a(&nd_fespaces.GetFinestFESpace()); - AddIntegrators(a, df, f, dfb, fb); - // a.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - a.Assemble(skip_zeros); - a.Finalize(skip_zeros); - std::unique_ptr A(a.ParallelAssemble()); - A->EliminateBC(dbc_tdof_list, ess_diag); - - // Print some information. - PrintHeader(); - if (print) + else if (M) + { + height = M->Height(); + width = M->Width(); + } + else if (A2) + { + height = A2->Height(); + width = A2->Width(); + } + MFEM_VERIFY(height >= 0 && width >= 0, + "At least one argument to GetSystemMatrix must not be empty!"); + auto sum = std::make_unique(height, width); + if (K && a0 != 0.0) + { + sum->AddOperator(K->GetOperator(), a0); + } + if (C && a1 != 0.0) + { + sum->AddOperator(C->GetOperator(), a1); + } + if (M && a2 != 0.0) { - Mpi::Print(" {}: NNZ = {:d}, norm = {:e}\n", str, A->NNZ(), - hypre_ParCSRMatrixFnorm(*A)); + sum->AddOperator(M->GetOperator(), a2); } + if (A2) + { + sum->AddOperator(A2->GetOperator(), 1.0); + } + auto A = std::make_unique(std::move(sum), GetNDSpace(), GetNDSpace()); + A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); return A; } -void SpaceOperator::GetPreconditionerInternal( - const std::function &AddCoefficients, - std::vector> &B, - std::vector> &AuxB, bool print) +void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, double a3, + std::vector> &B, + std::vector> &AuxB) { - // Construct the real, optionally SPD matrix for frequency or time domain preconditioning - // (Mr > 0, Mi < 0): - // B = K + ω C + ω² (-/+ Mr - Mi) , or - // B = a0 K + a1 C + Mr . + if (print_prec_hdr) + { + Mpi::Print("\nAssembling multigrid hierarchy:\n"); + } MFEM_VERIFY(h1_fespaces.GetNumLevels() == nd_fespaces.GetNumLevels(), - "Multigrid heirarchy mismatch for auxiliary space preconditioning!"); + "Multigrid hierarchy mismatch for auxiliary space preconditioning!"); for (int s = 0; s < 2; s++) { auto &B_ = (s == 0) ? B : AuxB; + auto &fespaces = (s == 0) ? nd_fespaces : h1_fespaces; + auto &dbc_tdof_lists = (s == 0) ? nd_dbc_tdof_lists : h1_dbc_tdof_lists; B_.clear(); - B_.reserve(nd_fespaces.GetNumLevels()); - for (int l = 0; l < nd_fespaces.GetNumLevels(); l++) + B_.reserve(fespaces.GetNumLevels()); + for (int l = 0; l < fespaces.GetNumLevels(); l++) { - auto &fespace_l = - (s == 0) ? nd_fespaces.GetFESpaceAtLevel(l) : h1_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - fespace_l.GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - - const int sdim = nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension(); + auto &fespace_l = fespaces.GetFESpaceAtLevel(l); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); SumCoefficient dfb; - AddCoefficients(df, f, dfb, fb); - mfem::ParBilinearForm b(&fespace_l); - if (s == 1) + AddStiffnessCoefficients(a0, df, f, fb); + AddDampingCoefficients(a1, f, fb); + AddRealMassCoefficients( + pc_shifted ? std::abs(a2) : a2, f, fb); + AddExtraSystemBdrCoefficients(a3, dfb, dfb, fb, fb); + auto b = std::make_unique(&fespace_l); + if (s == 0) { - // H1 auxiliary space matrix Gᵀ B G. - AddAuxIntegrators(b, f, fb); + AddIntegrators(*b, df, f, dfb, fb); } else { - AddIntegrators(b, df, f, dfb, fb); + // H1 auxiliary space matrix Gᵀ B G. + AddAuxIntegrators(*b, f, fb); + } + if (print_prec_hdr) + { + Mpi::Print(" Level {:d}{}: {:d} unknowns", l, (s == 0) ? "" : " (auxiliary)", + fespace_l.GlobalTrueVSize()); } - // b.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - b.Assemble(skip_zeros); - b.Finalize(skip_zeros); - std::unique_ptr hB; if (pc_lor) { - // After we construct the LOR discretization we can extract the LOR matrix and the + // After we construct the LOR discretization we deep copy the LOR matrix and the // original bilinear form and LOR discretization are no longer needed. - mfem::ParLORDiscretization lor(b, dbc_tdof_list_l); - hB = std::make_unique(lor.GetAssembledMatrix()); + mfem::Array dummy_dbc_tdof_list; + mfem::LORDiscretization lor(*b, dummy_dbc_tdof_list); + auto b_lor = std::make_unique(lor.GetAssembledMatrix()); + if (print_prec_hdr) + { + HYPRE_BigInt nnz = b_lor->NumNonZeroElems(); + Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); + Mpi::Print(", {:d} NNZ (LOR)\n", nnz); + } + B_.push_back(std::make_unique(std::move(b_lor), fespace_l, fespace_l)); } else { - hB.reset(b.ParallelAssemble()); - } - hB->EliminateBC(dbc_tdof_list_l, mfem::Operator::DiagonalPolicy::DIAG_ONE); - - // Print some information. - PrintHeader(); - if (s == 0 && print) - { - std::string str = ""; - if (pc_gmg && pc_lor) - { - str = fmt::format(" (Level {:d}, {:d} unknowns, LOR)", l, - fespace_l.GlobalTrueVSize()); - } - else if (pc_gmg) - { - str = fmt::format(" (Level {:d}, {:d} unknowns)", l, fespace_l.GlobalTrueVSize()); - } - else if (pc_lor) + b->SetAssemblyLevel(assembly_level); + b->Assemble(skip_zeros); + b->Finalize(skip_zeros); + if (print_prec_hdr) { - str = " (LOR)"; + if (assembly_level == mfem::AssemblyLevel::LEGACY) + { + HYPRE_BigInt nnz = b->SpMat().NumNonZeroElems(); + Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); + Mpi::Print("{:d} NNZ\n", nnz); + } + else + { + Mpi::Print("\n"); + } } - Mpi::Print(" B{}: NNZ = {:d}, norm = {:e}\n", str, hB->NNZ(), - hypre_ParCSRMatrixFnorm(*hB)); + B_.push_back(std::make_unique(std::move(b), fespace_l, fespace_l)); } - B_.push_back(std::move(hB)); + B_.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], + Operator::DiagonalPolicy::DIAG_ONE); } } + print_prec_hdr = false; } -void SpaceOperator::GetPreconditionerMatrix( - double omega, std::vector> &B, - std::vector> &AuxB, bool print) +std::unique_ptr SpaceOperator::GetCurlMatrix() { - // Frequency domain preconditioner matrix. - auto AddCoefficients = [this, omega](SumMatrixCoefficient &df, SumMatrixCoefficient &f, - SumCoefficient &dfb, SumMatrixCoefficient &fb) - { - this->AddStiffnessCoefficients(1.0, df, f, fb); - this->AddDampingCoefficients(omega, f, fb); - this->AddRealMassCoefficients(pc_shifted ? omega * omega : -omega * omega, true, f, fb); - this->AddExtraSystemBdrCoefficients(omega, dfb, dfb, fb, fb); - }; - GetPreconditionerInternal(AddCoefficients, B, AuxB, print); + auto curl = std::make_unique(&GetNDSpace(), &GetRTSpace()); + curl->AddDomainInterpolator(new mfem::CurlInterpolator); + curl->SetAssemblyLevel(assembly_level); + curl->Assemble(); + curl->Finalize(); + return std::make_unique(std::move(curl), GetNDSpace(), GetRTSpace(), true); } -void SpaceOperator::GetPreconditionerMatrix( - double a0, double a1, std::vector> &B, - std::vector> &AuxB, bool print) +std::unique_ptr SpaceOperator::GetComplexCurlMatrix() { - // Time domain preconditioner matrix. - auto AddCoefficients = [this, a0, a1](SumMatrixCoefficient &df, SumMatrixCoefficient &f, - SumCoefficient &dfb, SumMatrixCoefficient &fb) - { - this->AddStiffnessCoefficients(a0, df, f, fb); - this->AddDampingCoefficients(a1, f, fb); - this->AddRealMassCoefficients(1.0, false, f, fb); - }; - GetPreconditionerInternal(AddCoefficients, B, AuxB, print); + auto curl = std::make_unique(&GetNDSpace(), &GetRTSpace()); + curl->AddDomainInterpolator(new mfem::CurlInterpolator); + curl->SetAssemblyLevel(assembly_level); + curl->Assemble(); + curl->Finalize(); + return std::make_unique( + std::make_unique(std::move(curl), nullptr), GetNDSpace(), + GetRTSpace(), true); } -std::unique_ptr SpaceOperator::GetNegCurlMatrix() +std::unique_ptr SpaceOperator::GetGradMatrix() { - mfem::ParDiscreteLinearOperator curl(&nd_fespaces.GetFinestFESpace(), &rt_fespace); - curl.AddDomainInterpolator(new mfem::CurlInterpolator); - // curl.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - curl.Assemble(); - curl.Finalize(); - std::unique_ptr NegCurl(curl.ParallelAssemble()); - *NegCurl *= -1.0; - return NegCurl; + auto grad = std::make_unique(&GetH1Space(), &GetNDSpace()); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(assembly_level); + grad->Assemble(); + grad->Finalize(); + return std::make_unique(std::move(grad), GetH1Space(), GetNDSpace(), true); } -std::unique_ptr SpaceOperator::GetNegCurlMatrixPetsc() +std::unique_ptr SpaceOperator::GetComplexGradMatrix() { - return std::make_unique(nd_fespaces.GetFinestFESpace().GetComm(), - GetNegCurlMatrix()); -} - -std::unique_ptr SpaceOperator::GetGradMatrix() -{ - mfem::ParDiscreteLinearOperator grad(&h1_fespaces.GetFinestFESpace(), - &nd_fespaces.GetFinestFESpace()); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - return std::unique_ptr(grad.ParallelAssemble()); -} - -std::unique_ptr SpaceOperator::GetGradMatrixPetsc() -{ - return std::make_unique(nd_fespaces.GetFinestFESpace().GetComm(), - GetGradMatrix()); + auto grad = std::make_unique(&GetH1Space(), &GetNDSpace()); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(assembly_level); + grad->Assemble(); + grad->Finalize(); + return std::make_unique( + std::make_unique(std::move(grad), nullptr), GetH1Space(), + GetNDSpace(), true); } void SpaceOperator::AddStiffnessCoefficients(double coef, SumMatrixCoefficient &df, SumMatrixCoefficient &f, SumMatrixCoefficient &fb) { - // Contribution for curl-curl term. - df.AddCoefficient( - std::make_unique>( - mat_op, coef)); + { + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; + df.AddCoefficient(std::make_unique>(mat_op, coef)); + } // Contribution for London superconductors. if (mat_op.HasLondonDepth()) { - f.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>(mat_op, - coef), - mat_op.GetLondonDepthMarker()); + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_LONDON_DEPTH; + f.AddCoefficient(std::make_unique>(mat_op, coef), + mat_op.GetLondonDepthMarker()); } // Robin BC contributions due to surface impedance and lumped ports (inductance). @@ -541,23 +560,30 @@ void SpaceOperator::AddStiffnessCoefficients(double coef, SumMatrixCoefficient & lumped_port_op.AddStiffnessBdrCoefficients(coef, fb); } -void SpaceOperator::AddRealMassCoefficients(double coef, bool abs_coef, - SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) +void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f, + SumMatrixCoefficient &fb) { - if (abs_coef) - { - f.AddCoefficient(std::make_unique< - MaterialPropertyCoefficient>( - mat_op, coef)); - } - else + // Contribution for domain conductivity. + if (mat_op.HasConductivity()) { - f.AddCoefficient(std::make_unique< - MaterialPropertyCoefficient>( - mat_op, coef)); + constexpr MaterialPropertyType MatType = MaterialPropertyType::CONDUCTIVITY; + f.AddCoefficient(std::make_unique>(mat_op, coef), + mat_op.GetConductivityMarker()); } + // Robin BC contributions due to surface impedance, lumped ports, and absorbing + // boundaries (resistance). + farfield_op.AddDampingBdrCoefficients(coef, fb); + surf_z_op.AddDampingBdrCoefficients(coef, fb); + lumped_port_op.AddDampingBdrCoefficients(coef, fb); +} + +template +void SpaceOperator::AddRealMassCoefficients(double coef, SumMatrixCoefficient &f, + SumMatrixCoefficient &fb) +{ + f.AddCoefficient(std::make_unique>(mat_op, coef)); + // Robin BC contributions due to surface impedance and lumped ports (capacitance). surf_z_op.AddMassBdrCoefficients(coef, fb); lumped_port_op.AddMassBdrCoefficients(coef, fb); @@ -569,33 +595,12 @@ void SpaceOperator::AddImagMassCoefficients(double coef, SumMatrixCoefficient &f // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). if (mat_op.HasLossTangent()) { - f.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>(mat_op, - coef), - mat_op.GetLossTangentMarker()); + constexpr MaterialPropertyType MatType = MaterialPropertyType::PERMITTIVITY_IMAG; + f.AddCoefficient(std::make_unique>(mat_op, coef), + mat_op.GetLossTangentMarker()); } } -void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) -{ - // Contribution for domain conductivity. - if (mat_op.HasConductivity()) - { - f.AddCoefficient( - std::make_unique>( - mat_op, coef), - mat_op.GetConductivityMarker()); - } - - // Robin BC contributions due to surface impedance, lumped ports, and absorbing - // boundaries (resistance). - farfield_op.AddDampingBdrCoefficients(coef, fb); - surf_z_op.AddDampingBdrCoefficients(coef, fb); - lumped_port_op.AddDampingBdrCoefficients(coef, fb); -} - void SpaceOperator::AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr, SumCoefficient &dfbi, SumMatrixCoefficient &fbr, @@ -609,105 +614,102 @@ void SpaceOperator::AddExtraSystemBdrCoefficients(double omega, SumCoefficient & wave_port_op.AddExtraSystemBdrCoefficients(omega, fbr, fbi); } -bool SpaceOperator::GetTimeDomainExcitationVector(mfem::Vector &RHS) +bool SpaceOperator::GetExcitationVector(Vector &RHS) { - return GetExcitationVector1Internal(RHS); + // Time domain excitation vector. + RHS.SetSize(GetNDSpace().GetTrueVSize()); + RHS = 0.0; + bool nnz = AddExcitationVector1Internal(RHS); + RHS.SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + return nnz; } -bool SpaceOperator::GetFreqDomainExcitationVector(double omega, petsc::PetscParVector &RHS) +bool SpaceOperator::GetExcitationVector(double omega, ComplexVector &RHS) { - mfem::Vector hRHSr, hRHSi; - bool nnz1 = GetExcitationVector1Internal(hRHSr); - if (nnz1) - { - RHS.SetFromVector(hRHSr); // Sets into real part - RHS.Scale(1i * omega); - } - else - { - RHS.SetZero(); - } - bool nnz2 = GetExcitationVector2Internal(omega, hRHSr, hRHSi); - if (nnz2) - { - petsc::PetscParVector RHS2(RHS.GetComm(), hRHSr, hRHSi); - RHS.AXPY(1.0, RHS2); - } + // Frequency domain excitation vector: RHS = iω RHS1 + RHS2(ω). + RHS.SetSize(GetNDSpace().GetTrueVSize()); + RHS = 0.0; + bool nnz1 = AddExcitationVector1Internal(RHS.Real()); + RHS *= 1i * omega; + bool nnz2 = AddExcitationVector2Internal(omega, RHS); + RHS.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + RHS.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); return nnz1 || nnz2; } -bool SpaceOperator::GetFreqDomainExcitationVector1(petsc::PetscParVector &RHS1) +bool SpaceOperator::GetExcitationVector1(ComplexVector &RHS1) { - // Assemble the frequency domain excitation term, including only the contributions from - // lumped ports and surface currents, which is purely imaginary with linear frequency - // dependence (coefficient iω, it is accounted for later). - mfem::Vector hRHS1; - bool nnz = GetExcitationVector1Internal(hRHS1); - RHS1.SetFromVector(hRHS1); // Sets into real part - return nnz; + // Assemble the frequency domain excitation term with linear frequency dependence + // (coefficient iω, see GetExcitationVector above, is accounted for later). + RHS1.SetSize(GetNDSpace().GetTrueVSize()); + RHS1 = 0.0; + bool nnz1 = AddExcitationVector1Internal(RHS1.Real()); + RHS1.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + return nnz1; } -bool SpaceOperator::GetFreqDomainExcitationVector2(double omega, - petsc::PetscParVector &RHS2) +bool SpaceOperator::GetExcitationVector2(double omega, ComplexVector &RHS2) { - mfem::Vector hRHS2r, hRHS2i; - bool nnz = GetExcitationVector2Internal(omega, hRHS2r, hRHS2i); - RHS2.SetFromVectors(hRHS2r, hRHS2i); - return nnz; + RHS2.SetSize(GetNDSpace().GetTrueVSize()); + RHS2 = 0.0; + bool nnz2 = AddExcitationVector2Internal(omega, RHS2); + RHS2.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + RHS2.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + return nnz2; } -bool SpaceOperator::GetExcitationVector1Internal(mfem::Vector &RHS) +bool SpaceOperator::AddExcitationVector1Internal(Vector &RHS1) { - // Assemble the time domain excitation -g'(t) J or -iω J. The g'(t) factor is not - // accounted for here, it is accounted for in the time integration later. Likewise, the - // coefficient iω, is accounted for later). - SumVectorCoefficient fb(nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension()); + // Assemble the time domain excitation -g'(t) J or frequency domain excitation -iω J. + // The g'(t) or iω factors are not accounted for here, they is accounted for in the time + // integration or frequency sweep later. + MFEM_VERIFY(RHS1.Size() == GetNDSpace().GetTrueVSize(), + "Invalid T-vector size for AddExcitationVector1Internal!"); + SumVectorCoefficient fb(GetNDSpace().GetParMesh()->SpaceDimension()); lumped_port_op.AddExcitationBdrCoefficients(fb); surf_j_op.AddExcitationBdrCoefficients(fb); - RHS.SetSize(nd_fespaces.GetFinestFESpace().GetTrueVSize()); - RHS = 0.0; if (fb.empty()) { return false; } - mfem::ParLinearForm rhs(&nd_fespaces.GetFinestFESpace()); - rhs.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - rhs.UseFastAssembly(true); - rhs.Assemble(); - rhs.ParallelAssemble(RHS); - RHS.SetSubVector(dbc_tdof_list, 0.0); + mfem::LinearForm rhs1(&GetNDSpace()); + rhs1.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); + rhs1.UseFastAssembly(false); + rhs1.Assemble(); + GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs1, RHS1); return true; } -bool SpaceOperator::GetExcitationVector2Internal(double omega, mfem::Vector &RHSr, - mfem::Vector &RHSi) +bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RHS2) { // Assemble the contribution of wave ports to the frequency domain excitation term at the // specified frequency. - SumVectorCoefficient fbr(nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension()), - fbi(nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension()); + MFEM_VERIFY(RHS2.Size() == GetNDSpace().GetTrueVSize(), + "Invalid T-vector size for AddExcitationVector2Internal!"); + SumVectorCoefficient fbr(GetNDSpace().GetParMesh()->SpaceDimension()), + fbi(GetNDSpace().GetParMesh()->SpaceDimension()); wave_port_op.AddExcitationBdrCoefficients(omega, fbr, fbi); - RHSr.SetSize(nd_fespaces.GetFinestFESpace().GetTrueVSize()); - RHSi.SetSize(nd_fespaces.GetFinestFESpace().GetTrueVSize()); - RHSr = 0.0; - RHSi = 0.0; if (fbr.empty() && fbi.empty()) { return false; } - mfem::ParLinearForm rhsr(&nd_fespaces.GetFinestFESpace()); - mfem::ParLinearForm rhsi(&nd_fespaces.GetFinestFESpace()); - rhsr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr)); - rhsi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi)); - rhsr.UseFastAssembly(true); - rhsi.UseFastAssembly(true); - rhsr.Assemble(); - rhsi.Assemble(); - rhsr.ParallelAssemble(RHSr); - rhsi.ParallelAssemble(RHSi); - RHSr.SetSubVector(dbc_tdof_list, 0.0); - RHSi.SetSubVector(dbc_tdof_list, 0.0); + mfem::LinearForm rhs2r(&GetNDSpace()), rhs2i(&GetNDSpace()); + rhs2r.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr)); + rhs2i.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi)); + rhs2r.UseFastAssembly(false); + rhs2i.UseFastAssembly(false); + rhs2r.Assemble(); + rhs2i.Assemble(); + GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs2r, RHS2.Real()); + GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs2i, RHS2.Imag()); return true; } +template void +SpaceOperator::AddRealMassCoefficients( + double, SumMatrixCoefficient &, SumMatrixCoefficient &); +template void +SpaceOperator::AddRealMassCoefficients( + double, SumMatrixCoefficient &, SumMatrixCoefficient &); + } // namespace palace diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp index ce526ae7d..79473acb0 100644 --- a/palace/models/spaceoperator.hpp +++ b/palace/models/spaceoperator.hpp @@ -4,10 +4,14 @@ #ifndef PALACE_MODELS_SPACE_OPERATOR_HPP #define PALACE_MODELS_SPACE_OPERATOR_HPP -#include +#include #include #include #include +#include "fem/coefficient.hpp" +#include "linalg/complex.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/farfieldboundaryoperator.hpp" #include "models/lumpedportoperator.hpp" #include "models/materialoperator.hpp" @@ -20,16 +24,6 @@ namespace palace { class IoData; -class SumCoefficient; -class SumMatrixCoefficient; - -namespace petsc -{ - -class PetscParMatrix; -class PetscParVector; - -} // namespace petsc // // A class handling spatial discretization of the governing equations. @@ -37,23 +31,23 @@ class PetscParVector; class SpaceOperator { private: - // Perfect electrical conductor essential boundary condition markers. - mfem::Array dbc_marker, dbc_tdof_list, aux_bdr_marker; - void CheckBoundaryProperties(); + const mfem::AssemblyLevel assembly_level; // Use full or partial assembly for operators + const int skip_zeros; // Skip zeros during full assembly of operators + const bool pc_gmg; // Use geometric multigrid in preconditioning + const bool pc_lor; // Use low-order refined (LOR) space for the preconditioner + const bool pc_shifted; // Use shifted mass matrix for the preconditioner - // Options for system matrix assembly. - const int skip_zeros; // Whether to skip the zeros during assembly of operators - const bool pc_gmg; // Whether to use geometric multigrid in preconditioning - const bool pc_lor; // Whether to use low-order refined (LOR) preconditioner - const bool pc_shifted; // Whether the preconditioner uses the shifted mass matrix + // Helper variables for log file printing. + bool print_hdr, print_prec_hdr; - // Helper variable and function for log file printing. - bool print_hdr; - void PrintHeader(); + // Perfect electrical conductor essential boundary condition markers. + mfem::Array dbc_marker, aux_bdr_marker; + std::vector> nd_dbc_tdof_lists, h1_dbc_tdof_lists, aux_bdr_tdof_lists; + void CheckBoundaryProperties(); - // Objects defining the finite element spaces for the electric field(Nedelec) and magnetic - // flux density (Raviart-Thomas) on the given mesh. The H1 spaces are used for various - // purposes throughout the code including postprocessing. + // Objects defining the finite element spaces for the electric field (Nedelec) and + // magnetic flux density (Raviart-Thomas) on the given mesh. The H1 spaces are used for + // various purposes throughout the code including postprocessing. std::vector> nd_fecs; std::vector> h1_fecs; mfem::RT_FECollection rt_fec; @@ -71,43 +65,36 @@ class SpaceOperator WavePortOperator wave_port_op; SurfaceCurrentOperator surf_j_op; - // Helper function to assemble preconditioner matrix data structures. - void GetPreconditionerInternal( - const std::function &AddCoefficients, - std::vector> &B, - std::vector> &AuxB, bool print); - // Helper functions for building the bilinear forms corresponding to the discretized // operators in Maxwell's equations. void AddStiffnessCoefficients(double coef, SumMatrixCoefficient &df, SumMatrixCoefficient &f, SumMatrixCoefficient &fb); - void AddRealMassCoefficients(double coef, bool abs_coef, SumMatrixCoefficient &f, + void AddDampingCoefficients(double coef, SumMatrixCoefficient &f, + SumMatrixCoefficient &fb); + template + void AddRealMassCoefficients(double coef, SumMatrixCoefficient &f, SumMatrixCoefficient &fb); void AddImagMassCoefficients(double coef, SumMatrixCoefficient &f, SumMatrixCoefficient &fb); - void AddDampingCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb); void AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr, SumCoefficient &dfbi, SumMatrixCoefficient &fbr, SumMatrixCoefficient &fbi); // Helper functions for excitation vector assembly. - bool GetExcitationVector1Internal(mfem::Vector &RHS); - bool GetExcitationVector2Internal(double omega, mfem::Vector &RHSr, mfem::Vector &RHSi); + bool AddExcitationVector1Internal(Vector &RHS); + bool AddExcitationVector2Internal(double omega, ComplexVector &RHS); public: SpaceOperator(const IoData &iodata, const std::vector> &mesh); - // Returns array marking Dirichlet BC (PEC) attributes and list of local true dofs. - const mfem::Array &GetDbcMarker() const { return dbc_marker; } - const mfem::Array &GetDbcTDofList() const { return dbc_tdof_list; } - - // Returns array marking all boundary condition attributes, PEC included. These are all - // boundaries which affect the stiffness and damping (K and C) matrices, used for - // nullspace corrections. - const mfem::Array &GetAuxBdrMarker() const { return aux_bdr_marker; } + // Returns lists of all boundary condition true dofs, PEC included, for the auxiliary + // H1 space hierarchy. These are all boundaries which affect the stiffness and damping + // (K and C) matrices, used for nullspace corrections. + const std::vector> &GetAuxBdrTDofLists() const + { + return aux_bdr_tdof_lists; + } // Return material operator for postprocessing. const MaterialOperator &GetMaterialOp() const { return mat_op; } @@ -127,65 +114,68 @@ class SpaceOperator auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); } auto &GetRTSpace() { return rt_fespace; } - // Construct the frequency-dependent complex linear system matrix: - // A = K + iω C - ω² (Mr + i Mi) + A2(ω) - // or any one of its terms. The type parameter controls which terms of the above - // formulation to include in the resulting matrix. The argument ω is only used for - // the "complete" or "extra" system matrix options, all others come unscaled. + // Construct any part of the frequency-dependent complex linear system matrix: + // A = K + iω C - ω² (Mr + i Mi) + A2(ω) . + // For time domain problems, any one of K, C, or M = Mr can be constructed. The argument + // ω is required only for the constructing the "extra" matrix A2(ω). enum class OperatorType { - COMPLETE, STIFFNESS, - MASS, DAMPING, + MASS, EXTRA }; - std::unique_ptr - GetSystemMatrixPetsc(OperatorType type, double omega, - mfem::Operator::DiagonalPolicy ess_diag, bool print = true); - std::unique_ptr - GetSystemMatrixPetsc(OperatorType type, mfem::Operator::DiagonalPolicy ess_diag, - bool print = true) - { - return GetSystemMatrixPetsc(type, 0.0, ess_diag, print); - } - std::unique_ptr GetSystemMatrix(OperatorType type, double omega, - mfem::Operator::DiagonalPolicy ess_diag, - bool print = true); - std::unique_ptr GetSystemMatrix(OperatorType type, - mfem::Operator::DiagonalPolicy ess_diag, - bool print = true) + + std::unique_ptr GetSystemMatrix(OperatorType type, + mfem::Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexSystemMatrix(OperatorType type, Operator::DiagonalPolicy diag_policy) { - return GetSystemMatrix(type, 0.0, ess_diag, print); + return GetComplexSystemMatrix(type, 0.0, diag_policy); } - - // Construct the real, optionally SPD matrix for frequency or time domain preconditioning - // (Mr > 0, Mi < 0): - // B = K + ω C + ω² (-/+ Mr - Mi) , or - // B = a0 K + a1 C + Mr . - void GetPreconditionerMatrix(double omega, - std::vector> &B, - std::vector> &AuxB, - bool print = true); - void GetPreconditionerMatrix(double a0, double a1, - std::vector> &B, - std::vector> &AuxB, - bool print = true); - - // Construct and return the discrete negative curl or gradient matrices. - std::unique_ptr GetNegCurlMatrix(); - std::unique_ptr GetNegCurlMatrixPetsc(); - std::unique_ptr GetGradMatrix(); - std::unique_ptr GetGradMatrixPetsc(); + std::unique_ptr + GetComplexSystemMatrix(OperatorType type, double omega, + mfem::Operator::DiagonalPolicy diag_policy); + + // Construct the complete frequency or time domain system matrix using the provided + // stiffness, damping, mass, and extra matrices: + // A = a0 K + a1 C + a2 (Mr + i Mi) + A2 . + // It is assumed that the inputs have been constructed using previous calls to + // GetSystemMatrix() and the returned operator does inherit ownership of any of them. + std::unique_ptr GetSystemMatrix(double a0, double a1, double a2, + const ParOperator *K, const ParOperator *C, + const ParOperator *M); + std::unique_ptr + GetComplexSystemMatrix(std::complex a0, std::complex a1, + std::complex a2, const ComplexParOperator *K, + const ComplexParOperator *C, const ComplexParOperator *M, + const ComplexParOperator *A2); + + // Construct the real, optionally SPD matrix for frequency or time domain linear system + // preconditioning (Mr > 0, Mi < 0, |Mr + i Mi| is done on the material property + // coefficient, not the matrix entries themselves): + // B = a0 K + a1 C -/+ a2 |Mr + i Mi| + A2r(a3) + A2i(a3) . + void GetPreconditionerMatrix(double a0, double a1, double a2, double a3, + std::vector> &B, + std::vector> &AuxB); + + // Construct and return the discrete curl or gradient matrices. The complex variants + // return a matrix suitable for applying to complex-valued vectors. + std::unique_ptr GetCurlMatrix(); + std::unique_ptr GetComplexCurlMatrix(); + std::unique_ptr GetGradMatrix(); + std::unique_ptr GetComplexGradMatrix(); // Assemble the right-hand side source term vector for an incident field or current source - // applied on specified excited boundaries. - bool GetTimeDomainExcitationVector(mfem::Vector &RHS); - bool GetFreqDomainExcitationVector(double omega, petsc::PetscParVector &RHS); - - // Separate out RHS vector as RHS = iω RHS1 + RHS2(ω). - bool GetFreqDomainExcitationVector1(petsc::PetscParVector &RHS1); - bool GetFreqDomainExcitationVector2(double omega, petsc::PetscParVector &RHS2); + // applied on specified excited boundaries. The return value indicates whether or not the + // excitation is nonzero (and thus is true most of the time). + bool GetExcitationVector(Vector &RHS); + bool GetExcitationVector(double omega, ComplexVector &RHS); + + // Separate out RHS vector as RHS = iω RHS1 + RHS2(ω). The return value indicates whether + // or not the excitation is nonzero (and thus is true most of the time). + bool GetExcitationVector1(ComplexVector &RHS1); + bool GetExcitationVector2(double omega, ComplexVector &RHS2); }; } // namespace palace diff --git a/palace/models/surfacepostoperator.cpp b/palace/models/surfacepostoperator.cpp index 5ea49b447..f1d878350 100644 --- a/palace/models/surfacepostoperator.cpp +++ b/palace/models/surfacepostoperator.cpp @@ -65,8 +65,7 @@ SurfacePostOperator::InterfaceDielectricData::InterfaceDielectricData( // Store information about the surface side to consider. int component; - sides.emplace_back(); - mfem::Vector &side = sides.back(); + mfem::Vector &side = sides.emplace_back(); if (node.side.length() == 0) { // This is OK if surface is single sided, just push back an empty Vector. @@ -95,8 +94,8 @@ SurfacePostOperator::InterfaceDielectricData::InterfaceDielectricData( } // Store markers for this element of the postprocessing boundary. - attr_markers.emplace_back(); - mesh::AttrToMarker(mesh.bdr_attributes.Max(), node.attributes, attr_markers.back()); + mesh::AttrToMarker(mesh.bdr_attributes.Max(), node.attributes, + attr_markers.emplace_back()); } } @@ -127,8 +126,8 @@ SurfacePostOperator::InterfaceDielectricData::GetCoefficient( SurfacePostOperator::SurfaceChargeData::SurfaceChargeData( const config::CapacitanceData &data, mfem::ParMesh &mesh) { - attr_markers.emplace_back(); - mesh::AttrToMarker(mesh.bdr_attributes.Max(), data.attributes, attr_markers.back()); + mesh::AttrToMarker(mesh.bdr_attributes.Max(), data.attributes, + attr_markers.emplace_back()); } std::unique_ptr SurfacePostOperator::SurfaceChargeData::GetCoefficient( @@ -173,8 +172,8 @@ SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceDa // Construct the coefficient for this postprocessing boundary (copies the direction // vector). - attr_markers.emplace_back(); - mesh::AttrToMarker(mesh.bdr_attributes.Max(), data.attributes, attr_markers.back()); + mesh::AttrToMarker(mesh.bdr_attributes.Max(), data.attributes, + attr_markers.emplace_back()); } std::unique_ptr SurfacePostOperator::SurfaceFluxData::GetCoefficient( @@ -259,7 +258,7 @@ double SurfacePostOperator::GetSurfaceIntegral(const SurfaceData &data, fb.emplace_back(data.GetCoefficient(i, U, mat_op, local_to_shared)); s.AddBoundaryIntegrator(new BoundaryLFIntegrator(*fb.back()), data.attr_markers[i]); } - s.UseFastAssembly(true); + s.UseFastAssembly(false); s.Assemble(); return s(ones); } diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index 6835846db..841e3a3ff 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -5,8 +5,8 @@ #include #include "linalg/gmg.hpp" +#include "linalg/jacobi.hpp" #include "linalg/pc.hpp" -#include "linalg/petsc.hpp" #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" @@ -17,37 +17,44 @@ namespace palace namespace { -class CurlCurlSystemOperator : public mfem::SecondOrderTimeDependentOperator +class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOperator { private: // MPI communicator for the parallel operators. MPI_Comm comm; // System matrices and excitation RHS. - std::unique_ptr K, M, C; - mfem::Vector NegJ; - - // Reference to essential boundary true degrees of freedom from SpaceOperator (not owned). - const mfem::Array dbc_tdof_list; + std::unique_ptr K, M, C; + Vector NegJ; // Time dependence of current pulse for excitation: -J'(t) = -g'(t) J. This function // returns g'(t). std::function &dJcoef; // Internal objects for solution of linear systems during time stepping. - mutable double a0_, a1_; - mutable mfem::Vector RHS; - mutable std::vector> P, AuxP; - std::function> &, - std::vector> &)> - GetPreconditionerMatrix; + double a0_, a1_; + std::unique_ptr A; + std::vector> B, AuxB; + mutable Vector RHS; + + // XX TODO REMOVE + // std::function(double, double)> GetSystemMatrix; + // std::function> &, + // std::vector> &)> + // GetPreconditionerMatrix; + // std::function(double, double)> GetSystemMatrix; // Linear system solvers and settings for implicit time integration. std::unique_ptr kspM, kspA; std::unique_ptr pcM, pcA; mutable int kspM_mult, kspA_mult, kspM_it, kspA_it; - void FormRHS(const mfem::Vector &u, const mfem::Vector &du, mfem::Vector &rhs) const + // Bindings to SpaceOperator functions to get the system matrix and preconditioner, and + // construct the linear solver. + std::function(double a0, double a1)> + ConfigureLinearSolver; + + void FormRHS(const Vector &u, const Vector &du, Vector &rhs) const { // Multiply: rhs = -(K u + C du) - g'(t) J. rhs = 0.0; @@ -60,49 +67,38 @@ class CurlCurlSystemOperator : public mfem::SecondOrderTimeDependentOperator } public: - CurlCurlSystemOperator(const IoData &iodata, SpaceOperator &spaceop, - std::function &djcoef, double t0, - mfem::TimeDependentOperator::Type type) + TimeDependentCurlCurlOperator(const IoData &iodata, SpaceOperator &spaceop, + std::function &djcoef, double t0, + mfem::TimeDependentOperator::Type type) : mfem::SecondOrderTimeDependentOperator(spaceop.GetNDSpace().GetTrueVSize(), t0, type), - comm(spaceop.GetNDSpace().GetComm()), dbc_tdof_list(spaceop.GetDbcTDofList()), - dJcoef(djcoef) + comm(spaceop.GetNDSpace().GetComm()), dJcoef(djcoef) { // Construct the system matrices defining the linear operator. PEC boundaries are // handled simply by setting diagonal entries of the mass matrix for the corresponding // dofs. Because the Dirichlet BC is always homogenous, no special elimination is // required on the RHS. Diagonal entries are set in M (so M is non-singular). K = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::STIFFNESS, - mfem::Operator::DIAG_ZERO); - M = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::MASS, - mfem::Operator::DIAG_ONE); - C = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::DAMPING, - mfem::Operator::DIAG_ZERO); + Operator::DIAG_ZERO); + M = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::MASS, Operator::DIAG_ONE); + C = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); // Set up RHS vector for the current source term: -g'(t) J, where g(t) handles the time // dependence. - spaceop.GetTimeDomainExcitationVector(NegJ); + spaceop.GetExcitationVector(NegJ); RHS.SetSize(NegJ.Size()); - // Set up linear solvers (SetOperator will be called later on at first time step). - mfem::IterativeSolver::PrintLevel print = - mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - if (iodata.problem.verbose > 0) + // Set up linear solvers. { - print.Summary(); - if (iodata.problem.verbose > 1) - { - print.Iterations(); - if (iodata.problem.verbose > 2) - { - print.All(); - } - } - } - { - // PCG with a simple smoother preconditioner for mass matrix systems. - mfem::Vector diag(M->Height()); + // PCG with a simple Jacobi preconditioner for mass matrix systems. + Vector diag(M->Height()); M->AssembleDiagonal(diag); - pcM = std::make_unique(diag, spaceop.GetDbcTDofList()); + + // XX TODO: Should not need DBC TDOF LIST as the diagonal is already 1 upon + // assembly... (see ParOperator) + // Maybe avoid MFEM's JAcobi smoother and write our own like in Chebyshev?? + // pcM = std::make_unique(diag, + // spaceop.GetDbcTDofList()); + pcM = std::make_unique(diag); auto pcg = std::make_unique(comm); pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; @@ -117,56 +113,119 @@ class CurlCurlSystemOperator : public mfem::SecondOrderTimeDependentOperator // For explicit schemes, recommended to just use cheaper preconditioners. Otherwise, // use AMS or a direct solver. The system matrix is formed as a sequence of matrix // vector products, and is only assembled for preconditioning. - pcA = ConfigurePreconditioner(iodata, spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), - &spaceop.GetH1Spaces()); - auto pcg = std::make_unique(comm); - pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; - pcg->SetRelTol(iodata.solver.linear.tol); - pcg->SetMaxIter(iodata.solver.linear.max_it); - pcg->SetPrintLevel(print); - pcg->SetOperator(*this); - pcg->SetPreconditioner(*pcA); - kspA = std::move(pcg); + // // XX TODO ADDRESS, WITH BCS, ETC..... + // pcA = ConfigurePreconditioner(iodata, spaceop.GetDbcMarker(), + // spaceop.GetNDSpaces(), + // &spaceop.GetH1Spaces()); + + // XX TODO TEST IF THE BELOW WORKS? + + // auto pcg = std::make_unique(comm); + // pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; + // pcg->SetRelTol(iodata.solver.linear.tol); + // pcg->SetMaxIter(iodata.solver.linear.max_it); + // pcg->SetPrintLevel(print); + // pcg->SetOperator(*this); + // pcg->SetPreconditioner(*pcA); + // kspA = std::move(pcg); + + // XX TODO REMOVE + // GetSystemMatrix = [this, &spaceop](double a0, double a1) -> + // std::unique_ptr + // { + // return spaceop.GetSystemMatrix(a0, a1, 1.0, this->K.get(), this->C.get(), + // this->M.get()); + // } + // GetPreconditionerMatrix = [&spaceop](double a0, double a1, + // std::vector> &B, + // std::vector> &AuxB) + // { spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0, B, AuxB); }; + // ConfigureLinearSolver = [=](std::unique_ptr &A, + // std::unique_ptr &pc) -> std::unique_ptr + // { + // auto pcg = std::make_unique(comm); + // pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; + // pcg->SetRelTol(iodata.solver.linear.tol); + // pcg->SetMaxIter(iodata.solver.linear.max_it); + // pcg->SetPrintLevel(print); + // pcg->SetOperator(*A); + // pcg->SetPreconditioner(*pc); + // } + + // The time domain system matrix is A = a0 K + a1 C + M, which constructed using the + // assembled K, C, and M matrices and the coefficients a0 and a1 defined by the time + // integrator. if (iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::DEFAULT && iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) { Mpi::Warning("Transient problem type always uses CG as the Krylov solver!\n"); } - - // The assembled matrix for preconditioning is constructed as a function of the - // coefficients defined by the time integrator. - GetPreconditionerMatrix = [&](double a0, double a1, - std::vector> &B, - std::vector> &AuxB) - { spaceop.GetPreconditionerMatrix(a0, a1, B, AuxB, true); }; + bool iterative_mode = iodata.solver.linear.ksp_initial_guess; + double tol = iodata.solver.linear.tol; + int max_it = iodata.solver.linear.max_it; + mfem::IterativeSolver::PrintLevel print = + mfem::IterativeSolver::PrintLevel().Warnings().Errors(); + if (iodata.problem.verbose > 0) + { + print.Summary(); + if (iodata.problem.verbose > 1) + { + print.Iterations(); + if (iodata.problem.verbose > 2) + { + print.All(); + } + } + } + ConfigureLinearSolver = [this, &spaceop, iterative_mode, tol, max_it, + print](double a0, + double a1) -> std::unique_ptr + { + // Configure the system matrix and also the matrix (matrices) from which the + // preconditioner will be constructed. + this->A = spaceop.GetSystemMatrix(a0, a1, 1.0, this->K.get(), this->C.get(), + this->M.get()); + spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0, this->B, this->AuxB); + + // Configure the preconditioner. + auto *gmg = dynamic_cast(this->pcA.get()); + + // XX TODO WIP + // if (gmg) + // { + // gmg->SetOperator(this->B, &this->AuxB); + // } + // else + // { + // this->pcA->SetOperator(*this->B.back()); + // } + + // Construct and return the linear solver. + auto pcg = std::make_unique(this->comm); + pcg->iterative_mode = iterative_mode; + pcg->SetRelTol(tol); + pcg->SetMaxIter(max_it); + pcg->SetPrintLevel(print); + pcg->SetOperator(*this->A); + pcg->SetPreconditioner(*this->pcA); + return pcg; + }; } kspM_mult = kspA_mult = kspM_it = kspA_it = 0; } MPI_Comm GetComm() const { return comm; } - const mfem::Operator &GetK() const { return *K; } - const mfem::Operator &GetM() const { return *M; } - const mfem::Operator &GetC() const { return *C; } - const mfem::Array &GetDbcTDofList() const { return dbc_tdof_list; } + const ParOperator &GetK() const { return *K; } + const ParOperator &GetM() const { return *M; } + const ParOperator &GetC() const { return *C; } int GetNumMult() const { return kspM_mult; } int GetNumMultIter() const { return kspM_it; } int GetNumImplicitSolve() const { return kspA_mult; } int GetNumImplicitSolveIter() const { return kspA_it; } - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - // Multiply: y = (a0 K + a1 C + M) x. - M->Mult(x, y); - K->AddMult(x, y, a0_); - if (C) - { - C->AddMult(x, y, a1_); - } - } - - void Mult(const mfem::Vector &u, const mfem::Vector &du, mfem::Vector &ddu) const override + void Mult(const Vector &u, const Vector &du, Vector &ddu) const override { // Solve: M ddu = -(K u + C du) - g'(t) J. Mpi::Print("\n"); @@ -186,8 +245,8 @@ class CurlCurlSystemOperator : public mfem::SecondOrderTimeDependentOperator kspM_it += kspM->GetNumIterations(); } - void ImplicitSolve(const double a0, const double a1, const mfem::Vector &u, - const mfem::Vector &du, mfem::Vector &k) override + void ImplicitSolve(const double a0, const double a1, const Vector &u, const Vector &du, + Vector &k) override { // Solve: (a0 K + a1 C + M) k = -(K u + C du) - g'(t) J, where a0 may be 0 in the // explicit case. At first iteration, construct the solver. Also don't print a newline @@ -198,17 +257,24 @@ class CurlCurlSystemOperator : public mfem::SecondOrderTimeDependentOperator } if (kspA_mult == 0 || a0 != a0_ || a1 != a1_) { - // Configure the matrix (matrices) from which the preconditioner will be constructed. - GetPreconditionerMatrix(a0, a1, P, AuxP); - auto *gmg = dynamic_cast(pcA.get()); - if (gmg) - { - gmg->SetOperator(P, &AuxP); - } - else - { - pcA->SetOperator(*P.back()); - } + // Configure the linear solver, including the system matrix and also the matrix + // (matrices) from which the preconditioner will be constructed. + kspA = ConfigureLinearSolver(a0, a1); + + // XX TODO WORKING: REMOVE THE BELOW IF THIS WORKS... + + // A = GetSystemMatrix(a0, a1); + // GetPreconditionerMatrix(a0, a1, P, AuxP); + // auto *gmg = dynamic_cast(pcA.get()); + // if (gmg) + // { + // gmg->SetOperator(P, &AuxP); + // } + // else + // { + // pcA->SetOperator(*P.back()); + // } + a0_ = a0; a1_ = a1; k = 0.0; @@ -225,44 +291,19 @@ class CurlCurlSystemOperator : public mfem::SecondOrderTimeDependentOperator } }; -class SymmetricProductOperator : public mfem::Operator -{ -private: - const mfem::Operator &A, &B; - mutable mfem::Vector z; - -public: - SymmetricProductOperator(const mfem::Operator &opA, const mfem::Operator &opB) - : mfem::Operator(opA.Height(), opB.Width()), A(opA), B(opB), z(opB.Height()) - { - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - B.Mult(x, z); - A.Mult(z, y); - } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - A.Mult(x, z); - B.Mult(z, y); - } -}; - } // namespace TimeOperator::TimeOperator(const IoData &iodata, SpaceOperator &spaceop, std::function &djcoef) { // Construct discrete curl matrix for B-field time integration. - NegCurl = spaceop.GetNegCurlMatrix(); + Curl = spaceop.GetCurlMatrix(); // Allocate space for solution vectors. - E.SetSize(NegCurl->Width()); - dE.SetSize(NegCurl->Width()); - En.SetSize(NegCurl->Width()); - B.SetSize(NegCurl->Height()); + E.SetSize(Curl->Width()); + dE.SetSize(Curl->Width()); + En.SetSize(Curl->Width()); + B.SetSize(Curl->Height()); // Create ODE solver for 2nd-order IVP. mfem::TimeDependentOperator::Type type; @@ -296,26 +337,26 @@ TimeOperator::TimeOperator(const IoData &iodata, SpaceOperator &spaceop, } // Set up time-dependent operator for 2nd-order curl-curl equation for E. - op = std::make_unique(iodata, spaceop, djcoef, 0.0, type); + op = std::make_unique(iodata, spaceop, djcoef, 0.0, type); } int TimeOperator::GetTotalKspMult() const { - const auto &curlcurl = dynamic_cast(*op); + const auto &curlcurl = dynamic_cast(*op); return curlcurl.GetNumMult() + curlcurl.GetNumImplicitSolve(); } int TimeOperator::GetTotalKspIter() const { - const auto &curlcurl = dynamic_cast(*op); + const auto &curlcurl = dynamic_cast(*op); return curlcurl.GetNumMultIter() + curlcurl.GetNumImplicitSolveIter(); } double TimeOperator::GetMaxTimeStep() const { - const auto &curlcurl = dynamic_cast(*op); - const mfem::Operator &M = curlcurl.GetM(); - const mfem::Operator &K = curlcurl.GetK(); + const auto &curlcurl = dynamic_cast(*op); + const ParOperator &M = curlcurl.GetM(); + const ParOperator &K = curlcurl.GetK(); // Solver for M⁻¹. constexpr double lin_tol = 1.0e-9; @@ -326,15 +367,14 @@ double TimeOperator::GetMaxTimeStep() const pcg.SetPrintLevel(0); pcg.SetOperator(M); - mfem::Vector diag(M.Height()); + Vector diag(M.Height()); M.AssembleDiagonal(diag); - mfem::OperatorJacobiSmoother prec(diag, curlcurl.GetDbcTDofList()); + JacobiSmoother prec(diag); pcg.SetPreconditioner(prec); // Power iteration to estimate largest eigenvalue of undamped system matrix M⁻¹ K. - petsc::PetscShellMatrix MinvK(curlcurl.GetComm(), - std::make_unique(pcg, K)); - double lam = MinvK.Norm2(); + SymmetricProductOperator op(pcg, K); + double lam = linalg::SpectralNorm(curlcurl.GetComm(), op, false); MFEM_VERIFY(lam > 0.0, "Error during power iteration, λ = " << lam << "!"); return 2.0 / std::sqrt(lam); } @@ -356,7 +396,7 @@ void TimeOperator::Step(double &t, double &dt) // Trapezoidal integration for B-field: dB/dt = -∇ x E. En.Add(1.0, E); - NegCurl->AddMult(En, B, 0.5 * dt); + Curl->AddMult(En, B, -0.5 * dt); } } // namespace palace diff --git a/palace/models/timeoperator.hpp b/palace/models/timeoperator.hpp index c96e99190..57c56bcf1 100644 --- a/palace/models/timeoperator.hpp +++ b/palace/models/timeoperator.hpp @@ -7,6 +7,8 @@ #include #include #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -21,7 +23,7 @@ class TimeOperator { private: // Solution vector storage. - mfem::Vector E, dE, En, B; + Vector E, dE, En, B; // Time integrator for the curl-curl E-field formulation. std::unique_ptr ode; @@ -30,16 +32,16 @@ class TimeOperator std::unique_ptr op; // Discrete curl for B-field time integration. - std::unique_ptr NegCurl; + std::unique_ptr Curl; public: TimeOperator(const IoData &iodata, SpaceOperator &spaceop, std::function &djcoef); // Access solution vectors for E- and B-fields. - const mfem::Vector &GetE() const { return E; } - const mfem::Vector &GetEdot() const { return dE; } - const mfem::Vector &GetB() const { return B; } + const Vector &GetE() const { return E; } + const Vector &GetEdot() const { return dE; } + const Vector &GetB() const { return B; } // Is time integration scheme explicit or implicit. bool isExplicit() const { return op->isExplicit(); } diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index ed5dc6963..59ccd433e 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -27,7 +27,8 @@ inline mfem::HypreParMatrix GetBtt(const MaterialOperator &mat_op, mfem::Array &attr_marker) { // Mass matrix: Bₜₜ = (μ⁻¹ u, v). - MaterialPropertyCoefficient muinv_func(mat_op); + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; + MaterialPropertyCoefficient muinv_func(mat_op); mfem::ParBilinearForm btt(&nd_fespace); btt.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func), attr_marker); // btt.SetAssemblyLevel(mfem::AssemblyLevel::FULL); @@ -42,8 +43,9 @@ inline mfem::HypreParMatrix GetBtn(const MaterialOperator &mat_op, mfem::Array &attr_marker) { // Mass matrix: Bₜₙ = (μ⁻¹ ∇ₜ u, v). + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; + MaterialPropertyCoefficient muinv_func(mat_op); mfem::ParMixedBilinearForm btn(&h1_fespace, &nd_fespace); - MaterialPropertyCoefficient muinv_func(mat_op); btn.AddBoundaryIntegrator(new mfem::MixedVectorGradientIntegrator(muinv_func), attr_marker); // btn.SetAssemblyLevel(mfem::AssemblyLevel::FULL); @@ -63,16 +65,17 @@ inline Bnn GetBnn(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &h mfem::Array &attr_marker) { // Mass matrix: Bₙₙ = (μ⁻¹ ∇ₜ u, ∇ₜ v) - ω² (ε u, v) = Bₙₙ₁ - ω² Bₙₙ₂. - MaterialPropertyCoefficient muinv_func(mat_op); + constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; + MaterialPropertyCoefficient muinv_func(mat_op); mfem::ParBilinearForm bnn1(&h1_fespace); bnn1.AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(muinv_func), attr_marker); // bnn1.SetAssemblyLevel(mfem::AssemblyLevel::FULL); bnn1.Assemble(skip_zeros); bnn1.Finalize(skip_zeros); + constexpr MaterialPropertyType MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; NormalProjectedCoefficient epsilon_func( - std::make_unique< - MaterialPropertyCoefficient>(mat_op)); + std::make_unique>(mat_op)); mfem::ParBilinearForm bnn2r(&h1_fespace); bnn2r.AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(epsilon_func), attr_marker); @@ -81,23 +84,20 @@ inline Bnn GetBnn(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &h bnn2r.Finalize(skip_zeros); // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). - if (mat_op.HasLossTangent()) - { - NormalProjectedCoefficient negepstandelta_func( - std::make_unique< - MaterialPropertyCoefficient>(mat_op)); - mfem::ParBilinearForm bnn2i(&h1_fespace); - bnn2i.AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(negepstandelta_func), - attr_marker); - // bnn2i.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - bnn2i.Assemble(skip_zeros); - bnn2i.Finalize(skip_zeros); - return {*bnn1.ParallelAssemble(), *bnn2r.ParallelAssemble(), *bnn2i.ParallelAssemble()}; - } - else + if (!mat_op.HasLossTangent()) { return {*bnn1.ParallelAssemble(), *bnn2r.ParallelAssemble()}; } + constexpr MaterialPropertyType MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; + NormalProjectedCoefficient negepstandelta_func( + std::make_unique>(mat_op)); + mfem::ParBilinearForm bnn2i(&h1_fespace); + bnn2i.AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(negepstandelta_func), + attr_marker); + // bnn2i.SetAssemblyLevel(mfem::AssemblyLevel::FULL); + bnn2i.Assemble(skip_zeros); + bnn2i.Finalize(skip_zeros); + return {*bnn1.ParallelAssemble(), *bnn2r.ParallelAssemble(), *bnn2i.ParallelAssemble()}; } struct Att @@ -111,16 +111,17 @@ inline Att GetAtt(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &n mfem::Array &attr_marker) { // Stiffness matrix: Aₜₜ = (μ⁻¹ ∇ₜ x u, ∇ₜ x v) - ω² (ε u, v) = Aₜₜ₁ - ω² Aₜₜ₂. + constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; NormalProjectedCoefficient muinv_func( - std::make_unique>( - mat_op)); + std::make_unique>(mat_op)); mfem::ParBilinearForm att1(&nd_fespace); att1.AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(muinv_func), attr_marker); // att1.SetAssemblyLevel(mfem::AssemblyLevel::FULL); att1.Assemble(skip_zeros); att1.Finalize(skip_zeros); - MaterialPropertyCoefficient epsilon_func(mat_op); + constexpr MaterialPropertyType MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; + MaterialPropertyCoefficient epsilon_func(mat_op); mfem::ParBilinearForm att2r(&nd_fespace); att2r.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func), attr_marker); @@ -133,8 +134,8 @@ inline Att GetAtt(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &n { return {*att1.ParallelAssemble(), *att2r.ParallelAssemble()}; } - MaterialPropertyCoefficient negepstandelta_func( - mat_op); + constexpr MaterialPropertyType MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; + MaterialPropertyCoefficient negepstandelta_func(mat_op); mfem::ParBilinearForm att2i(&nd_fespace); att2i.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(negepstandelta_func), attr_marker); @@ -172,7 +173,7 @@ GetSystemMatrices(const mfem::HypreParMatrix &Att1, const mfem::HypreParMatrix & const mfem::Array &nd_tdof_list, const mfem::Array &h1_tdof_list, int nd_tdof_offset) { - // Construct the 2x2 block matrices for the eigenvalue problem. We pre- compute the + // Construct the 2x2 block matrices for the eigenvalue problem. We pre-compute the // eigenvalue problem matrices such that: // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. mfem::Array2D blocks(2, 2); @@ -219,9 +220,9 @@ GetSystemMatrices(const mfem::HypreParMatrix &Att1, const mfem::HypreParMatrix & return petsc::PetscAijMatrix(*hB4r, *hB4i); }(); - // Consolidate list of local ND and H1 tdofs before extracting the respective submatrices. - // The matrix is still distributed over the same number of processors, though some are - // empty (PETSc handles this). + // Consolidate list of local ND and H1 true dofs before extracting the respective + // submatrices. The matrix is still distributed over the same number of processors, + // though some are empty (PETSc handles this). mfem::Array tdof_list; tdof_list.Reserve(nd_tdof_list.Size() + h1_tdof_list.Size()); for (auto tdof : nd_tdof_list) @@ -692,8 +693,8 @@ void WavePortData::Initialize(double omega) mfem::ParLinearForm sut(&nd_fespace), sun(&h1_fespace); sut.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(tdir), attr_marker); sun.AddBoundaryIntegrator(new BoundaryLFIntegrator(ndir), attr_marker); - sut.UseFastAssembly(true); - sun.UseFastAssembly(true); + sut.UseFastAssembly(false); + sun.UseFastAssembly(false); sut.Assemble(); sun.Assemble(); if (sut(E0t->real()) + sun(E0n->real()) < 0.0) @@ -710,8 +711,8 @@ void WavePortData::Initialize(double omega) si = std::make_unique(&nd_fespace); sr->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0r_func), attr_marker); si->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0i_func), attr_marker); - sr->UseFastAssembly(true); - si->UseFastAssembly(true); + sr->UseFastAssembly(false); + si->UseFastAssembly(false); sr->Assemble(); si->Assemble(); std::complex s0(-(*sr)(E0t->real()) - (*si)(E0t->imag()), @@ -762,8 +763,8 @@ std::complex WavePortData::GetPower(mfem::ParComplexGridFunction &E, mfem::ParLinearForm pr(&nd_fespace), pi(&nd_fespace); pr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(nxHr_func), attr_marker); pi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(nxHi_func), attr_marker); - pr.UseFastAssembly(true); - pi.UseFastAssembly(true); + pr.UseFastAssembly(false); + pi.UseFastAssembly(false); pr.Assemble(); pi.Assemble(); return {pr(E.real()) + pi(E.imag()), pr(E.imag()) - pi(E.real())}; @@ -981,11 +982,10 @@ void WavePortOperator::AddExtraSystemBdrCoefficients(double omega, Initialize(omega); for (auto &[idx, data] : ports) { - fbi.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>( - mat_op, data.GetPropagationConstant().real()), - data.GetMarker()); + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; + fbi.AddCoefficient(std::make_unique>( + mat_op, data.GetPropagationConstant().real()), + data.GetMarker()); } } diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp index 1769ffb74..9004c723a 100644 --- a/palace/models/waveportoperator.hpp +++ b/palace/models/waveportoperator.hpp @@ -185,4 +185,4 @@ class WavePortOperator } // namespace palace -#endif // PALACE_MODELS_WAVE_PORT_OPERATOR_HPP \ No newline at end of file +#endif // PALACE_MODELS_WAVE_PORT_OPERATOR_HPP diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp index 2fbf74c6b..55d939738 100644 --- a/palace/utils/configfile.cpp +++ b/palace/utils/configfile.cpp @@ -198,8 +198,7 @@ void RefinementData::SetUp(json &model) "configuration file!"); MFEM_VERIFY(it->find("Levels") != it->end(), "Missing \"Boxes\" refinement region \"Levels\" in configuration file!"); - boxlist.emplace_back(); - BoxRefinementData &data = boxlist.back(); + BoxRefinementData &data = boxlist.emplace_back(); data.ref_levels = it->at("Levels"); // Required std::vector bx = xlim->get>(); // Required @@ -267,8 +266,7 @@ void RefinementData::SetUp(json &model) MFEM_VERIFY( it->find("Levels") != it->end(), "Missing \"Spheres\" refinement region \"Levels\" in configuration file!"); - spherelist.emplace_back(); - SphereRefinementData &data = spherelist.back(); + SphereRefinementData &data = spherelist.emplace_back(); data.ref_levels = it->at("Levels"); // Required data.r = it->at("Radius"); // Required data.center = ctr->get>(); // Required @@ -345,8 +343,7 @@ void MaterialDomainData::SetUp(json &domains) MFEM_VERIFY( it->find("Attributes") != it->end(), "Missing \"Attributes\" list for \"Materials\" domain in configuration file!"); - vecdata.emplace_back(); - MaterialData &data = vecdata.back(); + MaterialData &data = vecdata.emplace_back(); data.attributes = it->at("Attributes").get>(); // Required data.mu_r = ParseSymmetricMatrixData(*it, "Permeability", data.mu_r); data.epsilon_r = ParseSymmetricMatrixData(*it, "Permittivity", data.epsilon_r); @@ -663,8 +660,7 @@ void ConductivityBoundaryData::SetUp(json &boundaries) MFEM_VERIFY( it->find("Conductivity") != it->end(), "Missing \"Conductivity\" boundary \"Conductivity\" in configuration file!"); - vecdata.emplace_back(); - ConductivityData &data = vecdata.back(); + ConductivityData &data = vecdata.emplace_back(); data.attributes = it->at("Attributes").get>(); // Required data.sigma = it->at("Conductivity"); // Required data.mu_r = it->value("Permeability", data.mu_r); @@ -704,8 +700,7 @@ void ImpedanceBoundaryData::SetUp(json &boundaries) MFEM_VERIFY( it->find("Attributes") != it->end(), "Missing \"Attributes\" list for \"Impedance\" boundary in configuration file!"); - vecdata.emplace_back(); - ImpedanceData &data = vecdata.back(); + ImpedanceData &data = vecdata.emplace_back(); data.attributes = it->at("Attributes").get>(); // Required data.Rs = it->value("Rs", data.Rs); data.Ls = it->value("Ls", data.Ls); @@ -793,8 +788,7 @@ void LumpedPortBoundaryData::SetUp(json &boundaries) MFEM_VERIFY(elem_it->find("Attributes") != elem_it->end(), "Missing \"Attributes\" list for \"LumpedPort\" or \"Terminal\" " "boundary element in configuration file!"); - data.nodes.emplace_back(); - LumpedPortData::Node &node = data.nodes.back(); + LumpedPortData::Node &node = data.nodes.emplace_back(); node.attributes = elem_it->at("Attributes").get>(); // Required node.direction = elem_it->value("Direction", node.direction); if (terminal == boundaries.end()) @@ -933,8 +927,7 @@ void SurfaceCurrentBoundaryData::SetUp(json &boundaries) elem_it->find("Attributes") != elem_it->end(), "Missing \"Attributes\" list for \"SurfaceCurrent\" boundary element in " "configuration file!"); - data.nodes.emplace_back(); - SurfaceCurrentData::Node &node = data.nodes.back(); + SurfaceCurrentData::Node &node = data.nodes.emplace_back(); node.attributes = it->at("Attributes").get>(); // Required node.direction = it->value("Direction", node.direction); CheckDirection(node.direction, true); @@ -1102,8 +1095,7 @@ void InterfaceDielectricPostData::SetUp(json &postpro) MFEM_VERIFY(elem_it->find("Attributes") != elem_it->end(), "Missing \"Attributes\" list for \"Dielectric\" boundary element in " "configuration file!"); - data.nodes.emplace_back(); - InterfaceDielectricData::Node &node = data.nodes.back(); + InterfaceDielectricData::Node &node = data.nodes.emplace_back(); node.attributes = elem_it->at("Attributes").get>(); // Required node.side = it->value("Side", node.side); if (!node.side.empty()) @@ -1569,6 +1561,7 @@ void LinearSolverData::SetUp(json &solver) ksp_piped = linear->value("UseKSPPiped", ksp_piped); // Preconditioner-specific options + mat_pa = linear->value("UsePA", mat_pa); mat_gmg = linear->value("UseGMG", mat_gmg); mat_lor = linear->value("UseLOR", mat_lor); mat_shifted = linear->value("UsePCShifted", mat_shifted); @@ -1607,6 +1600,7 @@ void LinearSolverData::SetUp(json &solver) linear->erase("UseCGS2"); linear->erase("UseInitialGuess"); linear->erase("UseKSPPiped"); + linear->erase("UsePA"); linear->erase("UseGMG"); linear->erase("UseLOR"); linear->erase("UsePCShifted"); @@ -1637,6 +1631,7 @@ void LinearSolverData::SetUp(json &solver) // std::cout << "UseCGS2: " << orthog_cgs2 << '\n'; // std::cout << "UseInitialGuess: " << ksp_initial_guess << '\n'; // std::cout << "UseKSPPiped: " << ksp_piped << '\n'; + // std::cout << "UsePA: " << mat_pa << '\n'; // std::cout << "UseGMG: " << mat_gmg << '\n'; // std::cout << "UseLOR: " << mat_lor << '\n'; // std::cout << "UsePCShifted: " << mat_shifted << '\n'; diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp index b3e785565..87ea8a5b8 100644 --- a/palace/utils/configfile.hpp +++ b/palace/utils/configfile.hpp @@ -697,6 +697,10 @@ struct TransientSolverData struct LinearSolverData { + + // XX TODO REVISIT AVAILABLE OPTIONS FOR KSP AFTER HYPRE SWITCH... + // XX TODO REVISIT OPTIONS FOR PA AND KEYWORDS... "GMG" "PA" CAN DO BETTER (RATEL?) + public: // Solver type. enum class Type @@ -751,6 +755,9 @@ struct LinearSolverData // Enable pipelined Krylov solver variants to reduce blocking communications. bool ksp_piped = false; + // Enable partial assembly for operators. + bool mat_pa = false; + // Enable hp-geometric multigrid coarsening, using the solver specified by the type member // at the coarsest level. bool mat_gmg = true; diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp index b3b571ed8..1f081de98 100644 --- a/palace/utils/geodata.cpp +++ b/palace/utils/geodata.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include "utils/communication.hpp" #include "utils/filesystem.hpp" #include "utils/iodata.hpp" @@ -1068,16 +1067,17 @@ std::map> CheckMesh(std::unique_ptr &orig_me if (orig_mesh->GetNodes()) { const mfem::GridFunction *nodes = orig_mesh->GetNodes(); - const mfem::FiniteElementSpace *fes = nodes->FESpace(); + const mfem::FiniteElementSpace *fespace = nodes->FESpace(); - mfem::Ordering::Type ordering = fes->GetOrdering(); - int order = fes->GetMaxElementOrder(); + mfem::Ordering::Type ordering = fespace->GetOrdering(); + int order = fespace->GetMaxElementOrder(); int sdim = orig_mesh->SpaceDimension(); - bool discont = dynamic_cast(fes->FEColl()) != nullptr; + bool discont = + dynamic_cast(fespace->FEColl()) != nullptr; new_mesh->SetCurvature(order, discont, sdim, ordering); mfem::GridFunction *new_nodes = new_mesh->GetNodes(); - const mfem::FiniteElementSpace *new_fes = new_nodes->FESpace(); + const mfem::FiniteElementSpace *new_fespace = new_nodes->FESpace(); // The element loop works because we know the mapping from old_mesh to new_mesh element // indices from the insertion order. @@ -1088,9 +1088,9 @@ std::map> CheckMesh(std::unique_ptr &orig_me { if (!elem_delete[e]) { - fes->GetElementVDofs(e, vdofs); + fespace->GetElementVDofs(e, vdofs); nodes->GetSubVector(vdofs, loc_vec); - new_fes->GetElementVDofs(te, new_vdofs); + new_fespace->GetElementVDofs(te, new_vdofs); new_nodes->SetSubVector(new_vdofs, loc_vec); te++; } From 75b5532080c22561b44535495f8339f9aef8298a Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Fri, 12 May 2023 12:11:00 -0700 Subject: [PATCH 02/41] WIP: Real-valued linear solvers from MFEM to replace need for PETSc KSP, with reworking to add ParallelAssemble in various solvers which require HypreParMatrix operators TODO: Work into all driver classes. --- palace/drivers/drivensolver.cpp | 4 +- palace/drivers/eigensolver.cpp | 395 ++++++----- palace/drivers/magnetostaticsolver.cpp | 2 +- palace/linalg/CMakeLists.txt | 1 - palace/linalg/amg.cpp | 20 +- palace/linalg/amg.hpp | 13 +- palace/linalg/ams.cpp | 288 ++++---- palace/linalg/ams.hpp | 36 +- palace/linalg/arpack.cpp | 4 + palace/linalg/arpack.hpp | 4 + palace/linalg/chebyshev.cpp | 85 +-- palace/linalg/chebyshev.hpp | 23 +- palace/linalg/complex.cpp | 72 +- palace/linalg/complex.hpp | 22 +- palace/linalg/curlcurl.cpp | 22 +- palace/linalg/curlcurl.hpp | 2 - palace/linalg/distrelaxation.cpp | 88 ++- palace/linalg/distrelaxation.hpp | 278 ++++---- palace/linalg/divfree.cpp | 18 +- palace/linalg/divfree.hpp | 1 - palace/linalg/gmg.cpp | 147 ++-- palace/linalg/gmg.hpp | 103 +-- palace/linalg/jacobi.cpp | 9 +- palace/linalg/jacobi.hpp | 12 +- palace/linalg/ksp.cpp | 923 ++++++++++++++++--------- palace/linalg/ksp.hpp | 248 ++++--- palace/linalg/mumps.cpp | 7 + palace/linalg/mumps.hpp | 3 + palace/linalg/operator.cpp | 73 +- palace/linalg/operator.hpp | 41 +- palace/linalg/pc.hpp | 5 +- palace/linalg/petsc.cpp | 8 +- palace/linalg/slepc.cpp | 4 + palace/linalg/slepc.hpp | 6 +- palace/linalg/strumpack.cpp | 11 +- palace/linalg/strumpack.hpp | 4 +- palace/linalg/superlu.cpp | 5 +- palace/linalg/superlu.hpp | 18 +- palace/main.cpp | 12 +- palace/models/curlcurloperator.cpp | 13 +- palace/models/curlcurloperator.hpp | 9 +- palace/models/laplaceoperator.cpp | 4 +- palace/models/postoperator.cpp | 30 - palace/models/postoperator.hpp | 18 - palace/models/spaceoperator.cpp | 24 +- palace/models/timeoperator.cpp | 41 +- palace/models/waveportoperator.cpp | 174 ++--- palace/models/waveportoperator.hpp | 10 +- palace/utils/configfile.cpp | 8 +- palace/utils/configfile.hpp | 14 +- 50 files changed, 1841 insertions(+), 1521 deletions(-) diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index 2d55986c2..baff3b1aa 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -144,7 +144,7 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in // &spaceop.GetH1Spaces()); // pc.SetOperator(P, &AuxP); - KspSolver ksp(A->GetComm(), iodata, "ksp_"); + // KspSolver ksp(A->GetComm(), iodata, "ksp_"); // ksp.SetPreconditioner(pc); //XX TODO! // ksp.SetOperator(*A); @@ -210,7 +210,7 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in step++; omega += delta_omega; } - SaveMetadata(ksp.GetTotalNumMult(), ksp.GetTotalNumIter()); + // SaveMetadata(ksp.GetTotalNumMult(), ksp.GetTotalNumIter()); //XX TODO } void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, int nstep, diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 9173b7216..8f73a1b8f 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -51,7 +51,10 @@ void EigenSolver::Solve(std::vector> &mesh, // with λ = iω. A shift-and-invert strategy is employed to solve for the eigenvalues // closest to the specified target, σ. In general, the system matrices are complex and // symmetric. - std::unique_ptr eigen; + + // XX TODO + // std::unique_ptr eigen; + config::EigenSolverData::Type type = iodata.solver.eigenmode.type; #if defined(PALACE_WITH_ARPACK) && defined(PALACE_WITH_SLEPC) if (type == config::EigenSolverData::Type::DEFAULT) @@ -77,221 +80,219 @@ void EigenSolver::Solve(std::vector> &mesh, #else #error "Eigenmode solver requires building with ARPACK or SLEPc!" #endif - if (type == config::EigenSolverData::Type::FEAST) - { -#if 0 // XX TODO DISABLE FEAST EIGENSOLVER FOR NOW + // XX TODO REVISIT... - Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); -#if defined(PALACE_WITH_SLEPC) - if (C) - { - eigen = std::make_unique( - K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - iodata.problem.verbose); - } - else - { - eigen = std::make_unique( - K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - iodata.problem.verbose); - } -#endif - -#endif - } - else if (type == config::EigenSolverData::Type::ARPACK) - { - Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); -#if defined(PALACE_WITH_ARPACK) - if (C) - { - eigen = std::make_unique(iodata.problem.verbose); - } - else - { - eigen = std::make_unique(iodata.problem.verbose); - } -#endif - } - else // config::EigenSolverData::Type::SLEPC - { - Mpi::Print("\nConfiguring SLEPc eigenvalue solver\n"); -#if defined(PALACE_WITH_SLEPC) - std::unique_ptr slepc; - if (C) - { - if (!iodata.solver.eigenmode.pep_linear) - { - slepc = - std::make_unique(K->GetComm(), iodata.problem.verbose); - slepc->SetType(slepc::SlepcEigenSolver::Type::TOAR); - } - else - { - slepc = std::make_unique(K->GetComm(), - iodata.problem.verbose); - slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); - } - } - else - { - slepc = std::make_unique(K->GetComm(), iodata.problem.verbose); - slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); - } - slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); - slepc->SetOrthogonalization(iodata.solver.linear.orthog_mgs, - iodata.solver.linear.orthog_cgs2); - eigen = std::move(slepc); -#endif - } - EigenSolverBase::ScaleType scale = iodata.solver.eigenmode.scale - ? EigenSolverBase::ScaleType::NORM_2 - : EigenSolverBase::ScaleType::NONE; + // if (type == config::EigenSolverData::Type::FEAST) + // { + // Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); + // #if defined(PALACE_WITH_SLEPC) + // if (C) + // { + // eigen = std::make_unique( + // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, + // iodata.problem.verbose); + // } + // else + // { + // eigen = std::make_unique( + // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, + // iodata.problem.verbose); + // } + // #endif + // } + // else if (type == config::EigenSolverData::Type::ARPACK) + // { + // Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); + // #if defined(PALACE_WITH_ARPACK) + // if (C) + // { + // eigen = std::make_unique(iodata.problem.verbose); + // } + // else + // { + // eigen = std::make_unique(iodata.problem.verbose); + // } + // #endif + // } + // else // config::EigenSolverData::Type::SLEPC + // { + // Mpi::Print("\nConfiguring SLEPc eigenvalue solver\n"); + // #if defined(PALACE_WITH_SLEPC) + // std::unique_ptr slepc; + // if (C) + // { + // if (!iodata.solver.eigenmode.pep_linear) + // { + // slepc = + // std::make_unique(K->GetComm(), + // iodata.problem.verbose); + // slepc->SetType(slepc::SlepcEigenSolver::Type::TOAR); + // } + // else + // { + // slepc = std::make_unique(K->GetComm(), + // iodata.problem.verbose); + // slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + // } + // } + // else + // { + // slepc = std::make_unique(K->GetComm(), + // iodata.problem.verbose); + // slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + // } + // slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); + // slepc->SetOrthogonalization(iodata.solver.linear.orthog_mgs, + // iodata.solver.linear.orthog_cgs2); + // eigen = std::move(slepc); + // #endif + // } + // EigenSolverBase::ScaleType scale = iodata.solver.eigenmode.scale + // ? EigenSolverBase::ScaleType::NORM_2 + // : EigenSolverBase::ScaleType::NONE; // XX TODO REVISIT BELOW... - // if (C) - // { - // eigen->SetOperators(*K, *C, *M, scale); - // } - // else - // { - // eigen->SetOperators(*K, *M, scale); - // } - // eigen->SetNumModes(iodata.solver.eigenmode.n, iodata.solver.eigenmode.max_size); - // eigen->SetTol(iodata.solver.eigenmode.tol); - // eigen->SetMaxIter(iodata.solver.eigenmode.max_it); - // Mpi::Print(" Scaling γ = {:.3e}, δ = {:.3e}\n", eigen->GetScalingGamma(), - // eigen->GetScalingDelta()); - - // const double target = iodata.solver.eigenmode.target; - // const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, - // target); std::unique_ptr A; - // std::vector> P, AuxP; - // std::unique_ptr ksp; - // std::unique_ptr pc; - -#if 0 // XX TODO DISABLE FEAST EIGENSOLVER FOR NOW - -#if defined(PALACE_WITH_SLEPC) - auto *feast = dynamic_cast(eigen.get()); - if (feast) - { - // Configure the FEAST integration contour. The linear solvers are set up inside the - // solver. - if (iodata.solver.eigenmode.feast_contour_np > 1) - { - double contour_ub = iodata.solver.eigenmode.feast_contour_ub; - double f_contour_ub = - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, contour_ub); - double contour_ar = iodata.solver.eigenmode.feast_contour_ar; - MFEM_VERIFY(contour_ub > target, - "FEAST eigensolver requires a specified upper frequency target!"); - MFEM_VERIFY( - contour_ar >= 0.0 && contour_ar <= 1.0, - "Contour aspect ratio for FEAST eigenvalue solver must be in range [0.0, 1.0]!"); - Mpi::Print(" FEAST search contour: σ_lower = {:.3e} GHz ({:.3e})\n" - " σ_upper = {:.3e} GHz ({:.3e})\n" - " AR = {:.1e}\n", - f_target, target, f_contour_ub, contour_ub, contour_ar); - if (C) - { - // Search for eigenvalues in the range λ = iσₗₒ to iσₕᵢ. - double h = (contour_ub - target) * contour_ar; - feast->SetContour(-0.5 * h, target, 0.5 * h, contour_ub, false, true); - } - else - { - // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues from μ = σₗₒ² to - // σₕᵢ². - double h = (contour_ub * contour_ub - target * target) * contour_ar; - feast->SetContour(target * target, -0.5 * h, contour_ub * contour_ub, 0.5 * h); - } - } - else - { - Mpi::Print(" FEAST search target: σ = {:.3e} GHz ({:.3e})\n", f_target, target); - if (C) - { - feast->SetContour(0.0, target, 0.0, target, false, true); - } - else - { - feast->SetContour(target * target, 0.0, target * target, 0.0); - } - } - } - else -#endif - -#endif - - // { - // Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); // if (C) // { - // // Search for eigenvalues closest to λ = iσ. - // eigen->SetShiftInvert(0.0, target); - // if (type == config::EigenSolverData::Type::ARPACK) + // eigen->SetOperators(*K, *C, *M, scale); + // } + // else + // { + // eigen->SetOperators(*K, *M, scale); + // } + // eigen->SetNumModes(iodata.solver.eigenmode.n, iodata.solver.eigenmode.max_size); + // eigen->SetTol(iodata.solver.eigenmode.tol); + // eigen->SetMaxIter(iodata.solver.eigenmode.max_it); + // Mpi::Print(" Scaling γ = {:.3e}, δ = {:.3e}\n", eigen->GetScalingGamma(), + // eigen->GetScalingDelta()); + + // const double target = iodata.solver.eigenmode.target; + // const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, + // target); std::unique_ptr A; + // std::vector> P, AuxP; + // std::unique_ptr ksp; + // std::unique_ptr pc; + // #if defined(PALACE_WITH_SLEPC) + // auto *feast = dynamic_cast(eigen.get()); + // if (feast) + // { + // // Configure the FEAST integration contour. The linear solvers are set up inside + // the + // // solver. + // if (iodata.solver.eigenmode.feast_contour_np > 1) // { - // // ARPACK searches based on eigenvalues of the transformed problem. The - // eigenvalue - // // 1/(λ-σ) will be a large-magnitude negative imaginary number for an eigenvalue - // λ - // // with frequency close to but not below the target σ. - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::SMALLEST_IMAGINARY); + // double contour_ub = iodata.solver.eigenmode.feast_contour_ub; + // double f_contour_ub = + // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, contour_ub); + // double contour_ar = iodata.solver.eigenmode.feast_contour_ar; + // MFEM_VERIFY(contour_ub > target, + // "FEAST eigensolver requires a specified upper frequency target!"); + // MFEM_VERIFY( + // contour_ar >= 0.0 && contour_ar <= 1.0, + // "Contour aspect ratio for FEAST eigenvalue solver must be in range + // [0.0, 1.0]!"); + // Mpi::Print(" FEAST search contour: σ_lower = {:.3e} GHz ({:.3e})\n" + // " σ_upper = {:.3e} GHz ({:.3e})\n" + // " AR = {:.1e}\n", + // f_target, target, f_contour_ub, contour_ub, contour_ar); + // if (C) + // { + // // Search for eigenvalues in the range λ = iσₗₒ to iσₕᵢ. + // double h = (contour_ub - target) * contour_ar; + // feast->SetContour(-0.5 * h, target, 0.5 * h, contour_ub, false, true); + // } + // else + // { + // // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues from μ = + // σₗₒ² to + // // σₕᵢ². + // double h = (contour_ub * contour_ub - target * target) * contour_ar; + // feast->SetContour(target * target, -0.5 * h, contour_ub * contour_ub, 0.5 * h); + // } // } // else // { - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_IMAGINARY); + // Mpi::Print(" FEAST search target: σ = {:.3e} GHz ({:.3e})\n", f_target, target); + // if (C) + // { + // feast->SetContour(0.0, target, 0.0, target, false, true); + // } + // else + // { + // feast->SetContour(target * target, 0.0, target * target, 0.0); + // } // } - // // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); // } // else + // #endif // { - // // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = - // σ². eigen->SetShiftInvert(target * target, 0.0); if (type == - // config::EigenSolverData::Type::ARPACK) + // Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); + // if (C) // { - // // ARPACK searches based on eigenvalues of the transformed problem. 1/(μ-σ²) will - // be - // // a large-magnitude positive real number for an eigenvalue μ with frequency - // close - // // to but below the target σ². - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_REAL); + // // Search for eigenvalues closest to λ = iσ. + // eigen->SetShiftInvert(0.0, target); + // if (type == config::EigenSolverData::Type::ARPACK) + // { + // // ARPACK searches based on eigenvalues of the transformed problem. The + // eigenvalue + // // 1/(λ-σ) will be a large-magnitude negative imaginary number for an + // eigenvalue λ + // // with frequency close to but not below the target σ. + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::SMALLEST_IMAGINARY); + // } + // else + // { + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_IMAGINARY); + // } + // // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); // } // else // { - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_REAL); + // // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = + // σ². eigen->SetShiftInvert(target * target, 0.0); if (type == + // config::EigenSolverData::Type::ARPACK) + // { + // // ARPACK searches based on eigenvalues of the transformed problem. 1/(μ-σ²) + // will be + // // a large-magnitude positive real number for an eigenvalue μ with frequency + // close + // // to but below the target σ². + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_REAL); + // } + // else + // { + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_REAL); + // } + // // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); // } - // // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); - // } - // // Set up the linear solver required for solving systems involving the shifted - // operator - // // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The - // // preconditioner for complex linear systems is constructed from a real approximation - // // to the complex system matrix. + // // Set up the linear solver required for solving systems involving the shifted + // operator + // // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The + // // preconditioner for complex linear systems is constructed from a real + // approximation + // // to the complex system matrix. - // XX TODO REPLACE... WITH SUM OPERATOR! + // // XX TODO REPLACE... WITH SUM OPERATOR! - // A = utils::GetSystemMatrixShell(target, *K, *M, C.get()); + // A = utils::GetSystemMatrixShell(target, *K, *M, C.get()); - // // XX TODO MOVE THIS FOR LOG FILE... - // spaceop.GetPreconditionerMatrix(target, P, AuxP); + // // XX TODO MOVE THIS FOR LOG FILE... + // spaceop.GetPreconditionerMatrix(target, P, AuxP); - // pc = std::make_unique(iodata, spaceop.GetDbcMarker(), - // spaceop.GetNDSpaces(), - // &spaceop.GetH1Spaces()); - // pc->SetOperator(P, &AuxP); + // pc = std::make_unique(iodata, spaceop.GetDbcMarker(), + // spaceop.GetNDSpaces(), + // &spaceop.GetH1Spaces()); + // pc->SetOperator(P, &AuxP); - // ksp = std::make_unique(A->GetComm(), iodata, "ksp_"); - // ksp->SetPreconditioner(*pc); - // ksp->SetOperator(*A); - // ksp->SetTabLevel(1); - // eigen->SetLinearSolver(*ksp); - // } + // ksp = std::make_unique(A->GetComm(), iodata, "ksp_"); + // ksp->SetPreconditioner(*pc); + // ksp->SetOperator(*A); + // ksp->SetTabLevel(1); + // eigen->SetLinearSolver(*ksp); + // } // // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The // // constructed matrix just references the real SPD part of the mass matrix (no copy is @@ -364,10 +365,7 @@ void EigenSolver::Solve(std::vector> &mesh, // Eigenvalue problem solve. Mpi::Print("\n"); int num_conv = 0; - // num_conv = eigen->Solve(); - - // #if 0 // XX TODO DISABLE FEAST EIGENSOLVER FOR NOW - + // int num_conv = eigen->Solve(); // #if defined(PALACE_WITH_SLEPC) // if (!ksp) // { @@ -376,9 +374,6 @@ void EigenSolver::Solve(std::vector> &mesh, // } // else // #endif - - // #endif - // { // SaveMetadata(ksp->GetTotalNumMult(), ksp->GetTotalNumIter()); // } @@ -391,9 +386,9 @@ void EigenSolver::Solve(std::vector> &mesh, // Get the eigenvalue and relative error. double real, imag, error1, error2; std::complex omega; - eigen->GetEigenvalue(i, real, imag); - eigen->GetError(i, EigenSolverBase::ErrorType::BACKWARD, error1); - eigen->GetError(i, EigenSolverBase::ErrorType::ABSOLUTE, error2); + // eigen->GetEigenvalue(i, real, imag); //XX TODO EIGENVALUE SOLVES... + // eigen->GetError(i, EigenSolverBase::ErrorType::BACKWARD, error1); + // eigen->GetError(i, EigenSolverBase::ErrorType::ABSOLUTE, error2); omega.real(real); omega.imag(imag); if (!C) diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp index b80c9d82a..6be76a0a1 100644 --- a/palace/drivers/magnetostaticsolver.cpp +++ b/palace/drivers/magnetostaticsolver.cpp @@ -34,7 +34,7 @@ void MagnetostaticSolver::Solve(std::vector> &mes // // Set up the linear solver. // std::unique_ptr pc = // ConfigurePreconditioner(iodata, curlcurlop.GetDbcMarker(), - // curlcurlop.GetNDSpaces()); + // curlcurlop.GetNDSpaces(), &curlcurlop.GetH1Spaces()); // auto *gmg = dynamic_cast(pc.get()); // if (gmg) // { diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt index 4d640f9fb..8668c6831 100644 --- a/palace/linalg/CMakeLists.txt +++ b/palace/linalg/CMakeLists.txt @@ -22,7 +22,6 @@ target_sources(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/ksp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/operator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/petsc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/slepc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/strumpack.cpp diff --git a/palace/linalg/amg.cpp b/palace/linalg/amg.cpp index 9d5facd89..ecaac4844 100644 --- a/palace/linalg/amg.cpp +++ b/palace/linalg/amg.cpp @@ -12,26 +12,24 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print) SetPrintLevel((print > 1) ? print - 1 : 0); SetMaxIter(cycle_it); SetTol(0.0); - SetNumSweeps(smooth_it); - Init(); -} -void BoomerAmgSolver::Init() -{ + // Set additional BoomerAMG options. double theta = 0.5; // AMG strength parameter = 0.25 is 2D optimal (0.5-0.8 for 3D) int agg_levels = 1; // Number of aggressive coarsening levels + SetStrengthThresh(theta); SetAggressiveCoarsening(agg_levels); -} + HYPRE_BoomerAMGSetNumSweeps(*this, smooth_it); -void BoomerAmgSolver::SetNumSweeps(int relax_sweeps) -{ - HYPRE_BoomerAMGSetNumSweeps(*this, relax_sweeps); + // int coarse_relax_type = 8; // l1-symm. GS (inexact coarse solve) + // HYPRE_BoomerAMGSetCycleRelaxType(*this, coarse_relax_type, 3); } -void BoomerAmgSolver::SetCoarseRelaxType(int relax_type) +void BoomerAmgSolver::SetOperator(const Operator &op) { - HYPRE_BoomerAMGSetCycleRelaxType(*this, relax_type, 3); + auto *PtAP = const_cast(dynamic_cast(&op)); + MFEM_VERIFY(PtAP, "BoomerAmgSolver requires a ParOperator operator!"); + mfem::HypreBoomerAMG::SetOperator(PtAP->ParallelAssemble()); } } // namespace palace diff --git a/palace/linalg/amg.hpp b/palace/linalg/amg.hpp index 30c7f8c36..b75d1d129 100644 --- a/palace/linalg/amg.hpp +++ b/palace/linalg/amg.hpp @@ -5,6 +5,7 @@ #define PALACE_LINALG_AMG_HPP #include +#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -15,10 +16,6 @@ namespace palace // class BoomerAmgSolver : public mfem::HypreBoomerAMG { -private: - // Helper function for setting common settings. - void Init(); - public: BoomerAmgSolver(int cycle_it = 1, int smooth_it = 1, int print = 0); BoomerAmgSolver(const IoData &iodata, int print) @@ -27,13 +24,7 @@ class BoomerAmgSolver : public mfem::HypreBoomerAMG { } - // Set the number of smoothing iterations to be performed at each level. - void SetNumSweeps(int relax_sweeps); - - // Set the relaxation type on the coarsest level. Useful for specifying to not use a - // direct solve when the coarse matrix may be singular(relax_type = 8 is the AMS - // default). - void SetCoarseRelaxType(int relax_type); + void SetOperator(const Operator &op) override; }; } // namespace palace diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 5bd696be6..1fb26870a 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -3,13 +3,11 @@ #include "ams.hpp" -#include "linalg/hypre.hpp" - namespace palace { HypreAmsSolver::HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace, int cycle_it, + mfem::ParFiniteElementSpace &h1_fespace, int cycle_it, int smooth_it, int agg_coarsen, bool vector_interp, bool op_singular, int print_lvl) : mfem::HypreSolver(), @@ -17,25 +15,26 @@ HypreAmsSolver::HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, // (MFEM default is 13). 14 is similar to 11/13 but is cheaper in that is uses additive // scalar Pi-space corrections. cycle_type(vector_interp ? 1 : 14), - // Control levels of aggressive coarsening based on problem type: SPD/ semi-definite - // curl-curl operators are easier than indefinite frequency domain problems. When used - // as the coarse solver of geometric multigrid, always do only a single V-cycle. - sdim(nd_fespace.GetParMesh()->SpaceDimension()), ams_it(cycle_it), - ams_smooth_it(smooth_it), + space_dim(nd_fespace.GetParMesh()->SpaceDimension()), + // When used as the coarse solver of geometric multigrid, always do only a single + // V-cycle. + ams_it(cycle_it), ams_smooth_it(smooth_it), // Use no aggressive coarsening for frequency domain problems when the preconditioner // matrix is not SPD. - agg_levels(agg_coarsen), ams_singular(op_singular), - print((print_lvl > 1) ? print_lvl - 1 : 0) + amg_agg_levels(agg_coarsen), + // If we know the operator is singular (no mass matrix, for magnetostatic problems), + // internally the AMS solver will avoid G-space corrections. + ams_singular(op_singular), print((print_lvl > 1) ? print_lvl - 1 : 0) { // From MFEM: The AMS preconditioner may sometimes require inverting singular matrices // with BoomerAMG, which are handled correctly in hypre's Solve method, but can produce - // hypre errors in the Setup (specifically in the l1 row norm computation). See the + // hypre errors in the Setup (specifically in the row l1-norm computation). See the // documentation of MFEM's SetErrorMode() for more details. error_mode = IGNORE_HYPRE_ERRORS; // Set up the AMS solver. - Initialize(); ConstructAuxiliaryMatrices(nd_fespace, h1_fespace); + InitializeSolver(); } HypreAmsSolver::~HypreAmsSolver() @@ -43,13 +42,121 @@ HypreAmsSolver::~HypreAmsSolver() HYPRE_AMSDestroy(ams); } -void HypreAmsSolver::Initialize() +void HypreAmsSolver::ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace) +{ + // Set up the auxiliary space objects for the preconditioner. Mostly the same as MFEM's + // HypreAMS:Init. Start with the discrete gradient matrix. + { + // XX TODO: Partial assembly option? + auto grad = std::make_unique(&h1_fespace, &nd_fespace); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + grad->Assemble(); + grad->Finalize(); + G = std::make_unique(std::move(grad), h1_fespace, nd_fespace, true); + } + + // Vertex coordinates for the lowest order case, or Nedelec interpolation matrix or + // matrices for order > 1. + mfem::ParMesh &mesh = *h1_fespace.GetParMesh(); + if (h1_fespace.GetMaxElementOrder() == 1) + { + mfem::ParGridFunction x_coord(&h1_fespace), y_coord(&h1_fespace), z_coord(&h1_fespace); + if (mesh.GetNodes()) + { + mesh.GetNodes()->GetNodalValues(x_coord, 1); + MFEM_VERIFY(x_coord.Size() == h1_fespace.GetVSize(), + "Unexpected size for vertex coordinates in AMS setup!"); + if (space_dim > 1) + { + mesh.GetNodes()->GetNodalValues(y_coord, 2); + } + if (space_dim > 2) + { + mesh.GetNodes()->GetNodalValues(z_coord, 3); + } + } + else + { + MFEM_VERIFY(x_coord.Size() == mesh.GetNV(), + "Unexpected size for vertex coordinates in AMS setup!"); + for (int i = 0; i < mesh.GetNV(); i++) + { + x_coord(i) = mesh.GetVertex(i)[0]; + if (space_dim > 1) + { + y_coord(i) = mesh.GetVertex(i)[1]; + } + if (space_dim > 2) + { + z_coord(i) = mesh.GetVertex(i)[2]; + } + } + } + x.reset(x_coord.ParallelProject()); + x->HypreReadWrite(); + if (space_dim > 1) + { + y.reset(y_coord.ParallelProject()); + y->HypreReadWrite(); + } + if (space_dim > 2) + { + z.reset(z_coord.ParallelProject()); + z->HypreReadWrite(); + } + } + else + { + // XX TODO: Partial assembly option? + h1d_fespace = std::make_unique( + &mesh, h1_fespace.FEColl(), space_dim, mfem::Ordering::byVDIM); + auto pi = + std::make_unique(h1d_fespace.get(), &nd_fespace); + pi->AddDomainInterpolator(new mfem::IdentityInterpolator); + pi->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + pi->Assemble(); + pi->Finalize(); + Pi = std::make_unique(std::move(pi), *h1d_fespace, nd_fespace, true); + if (cycle_type >= 10) + { + // Get blocks of Pi corresponding to each component, and free Pi. + mfem::Array2D Pi_blocks(1, h1d_fespace->GetVDim()); + Pi->ParallelAssemble().GetBlocks( + Pi_blocks, false, h1d_fespace->GetOrdering() == mfem::Ordering::byVDIM); + Pix.reset(Pi_blocks(0, 0)); + if (space_dim > 1) + { + Piy.reset(Pi_blocks(0, 1)); + } + if (space_dim > 2) + { + Piz.reset(Pi_blocks(0, 2)); + } + Pi.reset(); + } + } +} + +void HypreAmsSolver::InitializeSolver() { // Create the Hypre solver object. HYPRE_AMSCreate(&ams); - HYPRE_AMSSetDimension(ams, sdim); + HYPRE_AMSSetDimension(ams, space_dim); HYPRE_AMSSetCycleType(ams, cycle_type); + // Control printing and number of iterations for use as a preconditioner. + HYPRE_AMSSetPrintLevel(ams, print); + HYPRE_AMSSetMaxIter(ams, ams_it); + // HYPRE_AMSSetTol(ams, 1.0e-16); // Avoid issues with zero RHS + + // Set this option when solving a curl-curl problem with zero mass term. + if (ams_singular) + { + HYPRE_AMSSetBetaPoissonMatrix(ams, nullptr); + } + // Set additional AMS options. int coarsen_type = 10; // 10 = HMIS, 8 = PMIS, 6 = Falgout, 0 = CLJP double theta = 0.5; // AMG strength parameter = 0.25 is 2D optimal (0.5-0.8 for 3D) @@ -57,111 +164,59 @@ void HypreAmsSolver::Initialize() // 18 = l1-Jacobi, 16 = Chebyshev int interp_type = 6; // 6 = Extended+i, 0 = Classical, 13 = FF1 int Pmax = 4; // Interpolation width - int relax_type = 2; // 2 = l1-SSOR, 4 = trunc. l1-SSOR, - // 1 = l1-Jacobi, 16 = Chebyshev - int relax_sweeps = ams_smooth_it; + int relax_type = 2; // 2 = l1-SSOR, 4 = trunc. l1-SSOR, 1 = l1-Jacobi, 16 = Chebyshev double weight = 1.0; double omega = 1.0; - HYPRE_AMSSetSmoothingOptions(ams, relax_type, relax_sweeps, weight, omega); - HYPRE_AMSSetAlphaAMGOptions(ams, coarsen_type, agg_levels, amg_relax_type, theta, + HYPRE_AMSSetSmoothingOptions(ams, relax_type, ams_smooth_it, weight, omega); + HYPRE_AMSSetAlphaAMGOptions(ams, coarsen_type, amg_agg_levels, amg_relax_type, theta, interp_type, Pmax); - HYPRE_AMSSetBetaAMGOptions(ams, coarsen_type, agg_levels, amg_relax_type, theta, + HYPRE_AMSSetBetaAMGOptions(ams, coarsen_type, amg_agg_levels, amg_relax_type, theta, interp_type, Pmax); // int coarse_relax_type = 8; // Default, l1-symm. GS // HYPRE_AMSSetAlphaAMGCoarseRelaxType(ams, coarse_relax_type); // HYPRE_AMSSetBetaAMGCoarseRelaxType(ams, coarse_relax_type); - // Control printing and number of iterations for use as a preconditioner. - HYPRE_AMSSetPrintLevel(ams, print); - HYPRE_AMSSetMaxIter(ams, ams_it); - HYPRE_AMSSetTol(ams, 0.0); - // HYPRE_AMSSetTol(ams, 1.0e-16); // Avoid issues with zero RHS + // Set the discrete gradient matrix. + HYPRE_AMSSetDiscreteGradient(ams, G->ParallelAssemble()); - // Set this option when solving a curl-curl problem with zero mass term. - if (ams_singular) + // Set the mesh vertex coordinates or Nedelec interpolation matrix or matrices. + if (x) { - HYPRE_AMSSetBetaPoissonMatrix(ams, nullptr); - } -} - -void HypreAmsSolver::ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace) -{ - // Set up the auxiliary spaces for the preconditioner. Mostly the same as MFEM's - // HypreAMS:Init. - mfem::FiniteElementCollection *h1_fec = nullptr; - if (!h1_fespace) - { - - // XX TODO REUSE FROM INPUT... - - h1_fec = new mfem::H1_FECollection(nd_fespace.GetMaxElementOrder(), - nd_fespace.GetParMesh()->Dimension()); - h1_fespace = new mfem::ParFiniteElementSpace(nd_fespace.GetParMesh(), h1_fec); + HYPRE_ParVector HY_X = (x) ? (HYPRE_ParVector)*x : nullptr; + HYPRE_ParVector HY_Y = (y) ? (HYPRE_ParVector)*y : nullptr; + HYPRE_ParVector HY_Z = (z) ? (HYPRE_ParVector)*z : nullptr; + HYPRE_AMSSetCoordinateVectors(ams, HY_X, HY_Y, HY_Z); } + else { - mfem::ParDiscreteLinearOperator grad(h1_fespace, &nd_fespace); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - G.reset(grad.ParallelAssemble()); - } - { - mfem::ParFiniteElementSpace h1d_fespace(h1_fespace->GetParMesh(), h1_fespace->FEColl(), - sdim, mfem::Ordering::byVDIM); - mfem::ParDiscreteLinearOperator id_ND(&h1d_fespace, &nd_fespace); - id_ND.AddDomainInterpolator(new mfem::IdentityInterpolator); - // id_ND.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - id_ND.Assemble(); - id_ND.Finalize(); - if (cycle_type < 10) - { - Pi.reset(id_ND.ParallelAssemble()); - } - else - { - mfem::Array2D Pi_blocks; - id_ND.GetParBlocks(Pi_blocks); - Pix.reset(Pi_blocks(0, 0)); - if (sdim >= 2) - { - Piy.reset(Pi_blocks(0, 1)); - } - if (sdim == 3) - { - Piz.reset(Pi_blocks(0, 2)); - } - } - } - if (h1_fec) - { - delete h1_fespace; - delete h1_fec; + HYPRE_ParCSRMatrix HY_Pi = (Pi) ? (HYPRE_ParCSRMatrix)Pi->ParallelAssemble() : nullptr; + HYPRE_ParCSRMatrix HY_Pix = (Pix) ? (HYPRE_ParCSRMatrix)*Pix : nullptr; + HYPRE_ParCSRMatrix HY_Piy = (Piy) ? (HYPRE_ParCSRMatrix)*Piy : nullptr; + HYPRE_ParCSRMatrix HY_Piz = (Piz) ? (HYPRE_ParCSRMatrix)*Piz : nullptr; + HYPRE_AMSSetInterpolations(ams, HY_Pi, HY_Pix, HY_Piy, HY_Piz); } } -void HypreAmsSolver::SetOperator(const mfem::Operator &op) +void HypreAmsSolver::SetOperator(const Operator &op) { // When the operator changes, we need to rebuild the AMS solver but can use the unchanged // auxiliary space matrices. - bool first = true; - if (setup_called) + if (A) { HYPRE_AMSDestroy(ams); - Initialize(); - first = false; + InitializeSolver(); } - setup_called = 0; + + auto *PtAP = const_cast(dynamic_cast(&op)); + MFEM_VERIFY(PtAP, "HypreAmsSolver requires a ParOperator operator!"); + A = &PtAP->ParallelAssemble(); + height = A->Height(); + width = A->Width(); // From mfem::HypreAMS: Update HypreSolver base class. - height = op.Height(); - width = op.Width(); - auto *new_A = dynamic_cast(&op); - MFEM_VERIFY(new_A, "AMS solver requires a HypreParMatrix operator!"); - A = const_cast(new_A); + setup_called = 0; delete X; delete B; B = X = nullptr; @@ -169,51 +224,6 @@ void HypreAmsSolver::SetOperator(const mfem::Operator &op) auxB.Reset(); auxX.Delete(); auxX.Reset(); - - // Eliminate Dirichlet BCs in G, Pi matrices before RAP. - if (first) - { - constexpr double tol = 1.0e-9; - mfem::Array dbc_rows; - mfem::Vector diag(A->Height()), rowsums(A->Height()); - A->AssembleDiagonal(diag); - hypre::hypreParCSRRowSums(*A, rowsums); - for (int i = 0; i < A->Height(); i++) - { - if (std::abs(rowsums(i) - diag(i)) < tol * std::abs(diag(i))) - { - dbc_rows.Append(i); - } - } - if (G) - { - G->EliminateRows(dbc_rows); - } - if (Pi) - { - Pi->EliminateRows(dbc_rows); - } - if (Pix) - { - Pix->EliminateRows(dbc_rows); - } - if (Piy) - { - Piy->EliminateRows(dbc_rows); - } - if (Piz) - { - Piz->EliminateRows(dbc_rows); - } - } - - // Set the discrete gradient and Nedelec interpolation matrices. - HYPRE_ParCSRMatrix HY_Pi = (Pi) ? (HYPRE_ParCSRMatrix)*Pi : nullptr; - HYPRE_ParCSRMatrix HY_Pix = (Pix) ? (HYPRE_ParCSRMatrix)*Pix : nullptr; - HYPRE_ParCSRMatrix HY_Piy = (Piy) ? (HYPRE_ParCSRMatrix)*Piy : nullptr; - HYPRE_ParCSRMatrix HY_Piz = (Piz) ? (HYPRE_ParCSRMatrix)*Piz : nullptr; - HYPRE_AMSSetDiscreteGradient(ams, *G); - HYPRE_AMSSetInterpolations(ams, HY_Pi, HY_Pix, HY_Piy, HY_Piz); } } // namespace palace diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp index 69a276c43..dc923a2fe 100644 --- a/palace/linalg/ams.hpp +++ b/palace/linalg/ams.hpp @@ -4,7 +4,9 @@ #ifndef PALACE_LINALG_AMS_HPP #define PALACE_LINALG_AMS_HPP +#include #include +#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -19,32 +21,38 @@ class HypreAmsSolver : public mfem::HypreSolver // The Hypre solver object. HYPRE_Solver ams; - // Discrete gradient matrix. - std::unique_ptr G; - - // Nedelec interpolation matrix and its components (used even for p = 1). - std::unique_ptr Pi, Pix, Piy, Piz; - // Parameters used for preconditioner construction. - const int cycle_type, sdim, ams_it, ams_smooth_it, agg_levels; + const int cycle_type, space_dim, ams_it, ams_smooth_it, amg_agg_levels; const bool ams_singular; // Control print level for debugging. const int print; - // Helper functions to construct the AMS solver and required auxiliary space matrices. - void Initialize(); + // Discrete gradient matrix. + std::unique_ptr G; + + // Nedelec interpolation matrix and its components, or, for p = 1, the mesh vertex + // coordinates. + std::unique_ptr h1d_fespace; + std::unique_ptr Pi; + std::unique_ptr Pix, Piy, Piz; + std::unique_ptr x, y, z; + + // Helper function to set up the auxiliary objects required by the AMS solver. void ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace = nullptr); + mfem::ParFiniteElementSpace &h1_fespace); + + // Helper function to construct and configure the AMS solver. + void InitializeSolver(); public: // Constructor requires the ND space, but will construct the H1 and (H1)ᵈ spaces // internally as needed. HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace, int cycle_it, int smooth_it, + mfem::ParFiniteElementSpace &h1_fespace, int cycle_it, int smooth_it, int agg_coarsen, bool vector_interp, bool op_singular, int print_lvl); HypreAmsSolver(const IoData &iodata, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace, int print_lvl) + mfem::ParFiniteElementSpace &h1_fespace, int print_lvl) : HypreAmsSolver(nd_fespace, h1_fespace, iodata.solver.linear.mat_gmg ? 1 : iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, @@ -59,10 +67,8 @@ class HypreAmsSolver : public mfem::HypreSolver } ~HypreAmsSolver() override; - // Sets matrix associated with the AMS solver. - void SetOperator(const mfem::Operator &op) override; + void SetOperator(const Operator &op) override; - // The typecast to HYPRE_Solver returns the internal ams object. operator HYPRE_Solver() const override { return ams; } HYPRE_PtrToParSolverFcn SetupFcn() const override diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp index 2297adb86..37ddaf752 100644 --- a/palace/linalg/arpack.cpp +++ b/palace/linalg/arpack.cpp @@ -3,6 +3,8 @@ #include "arpack.hpp" +#if 0 // XX TODO DISABLE ARPACK FOR NOW + #if defined(PALACE_WITH_ARPACK) #if defined(__GNUC__) && defined(__clang__) @@ -971,3 +973,5 @@ void ArpackPEPSolver::RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, #endif #endif + +#endif diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp index dc818a7d0..d959f983c 100644 --- a/palace/linalg/arpack.hpp +++ b/palace/linalg/arpack.hpp @@ -4,6 +4,8 @@ #ifndef PALACE_LINALG_ARPACK_HPP #define PALACE_LINALG_ARPACK_HPP +#if 0 // XX TODO DISABLE FEAST FOR NOW + #if defined(PALACE_WITH_ARPACK) #include "linalg/petsc.hpp" @@ -268,4 +270,6 @@ class ArpackPEPSolver : public ArpackEigenSolver #endif +#endif + #endif // PALACE_LINALG_ARPACK_HPP diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 565629a1f..5278ed8bf 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -5,102 +5,35 @@ #include #include -#include "linalg/operator.hpp" namespace palace { -// XX TODO REPLACE CHEBYSHEV ARRAYMULT WITH MULT/MULT TRANSPOSE (NO NEED FOR ARRAY MULT...) - -// namespace -// { - -// using mfem::ForallWrap; // XX TODO NEEDED? - -// class SymmetricScaledOperator : public mfem::Operator -// { -// private: -// const mfem::Operator &A; -// const mfem::Vector &d; -// mutable mfem::Vector z; - -// public: -// SymmetricScaledOperator(const mfem::Operator &op, const mfem::Vector &v) -// : mfem::Operator(op.Height()), A(op), d(v), z(v.Size()) -// { -// } - -// void Mult(const mfem::Vector &x, mfem::Vector &y) const override -// { -// A.Mult(x, z); -// { -// const int N = height; -// const auto *D = d.Read(); -// const auto *Z = z.Read(); -// auto *Y = y.Write(); -// mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Y[i] = D[i] * Z[i]; }); -// } -// } - -// void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override -// { -// { -// const int N = height; -// const auto *D = d.Read(); -// const auto *X = x.Read(); -// auto *Z = z.Write(); -// mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Z[i] = D[i] * X[i]; }); -// } -// A.Mult(z, y); -// } -// }; - -// } // namespace - -ChebyshevSmoother::ChebyshevSmoother(MPI_Comm c, const mfem::Array &tdof_list, - int smooth_it, int poly_order) - : comm(c), A(nullptr), dbc_tdof_list(tdof_list), pc_it(smooth_it), order(poly_order) +ChebyshevSmoother::ChebyshevSmoother(int smooth_it, int poly_order) + : mfem::Solver(), pc_it(smooth_it), order(poly_order), A(nullptr) { } void ChebyshevSmoother::SetOperator(const mfem::Operator &op) { A = &op; + height = A->Height(); width = A->Width(); r.SetSize(height); d.SetSize(height); - - // XX TODO: AS FOR TIME OPERATOR, TDOF_LIST SHOULD NOT BE NEEDED AS WE HAVE IT IN THE - // OPERATOR DIAGONAL ALREADY?? Can just use AssembleDiagonal and .Reciprocal() - - // Configure symmetric diagonal scaling. - const int N = height; - dinv.SetSize(N); - mfem::Vector diag(N); - A->AssembleDiagonal(diag); - const auto *D = diag.Read(); - auto *DI = dinv.Write(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - MFEM_ASSERT_KERNEL(D[i] != 0.0, - "Zero diagonal entry in Chebyshev smoother!"); - DI[i] = 1.0 / D[i]; - }); - const auto *I = dbc_tdof_list.Read(); - mfem::forall(dbc_tdof_list.Size(), - [=] MFEM_HOST_DEVICE(int i) - { - DI[I[i]] = 1.0; // Assumes operator DiagonalPolicy::ONE - }); + dinv.SetSize(height); + A->AssembleDiagonal(dinv); + // dinv.Reciprocal(); //XX TODO NEED MFEM PATCH // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See // mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing: // polynomial versus Gauss-Seidel, JCP (2003). + const auto *PtAP = dynamic_cast(A); + MFEM_VERIFY(PtAP, "ChebyshevSmoother requires a ParOperator operator!"); DiagonalOperator Dinv(dinv); SymmetricProductOperator DinvA(Dinv, *A); - lambda_max = 1.1 * linalg::SpectralNorm(comm, DinvA, false); + lambda_max = 1.1 * linalg::SpectralNorm(PtAP->GetComm(), DinvA, false); } void ChebyshevSmoother::Mult(const mfem::Vector &x, mfem::Vector &y) const diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index dbf066809..7364c4acd 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -5,6 +5,8 @@ #define PALACE_LINALG_CHEBYSHEV_SMOOTHER_HPP #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -19,32 +21,29 @@ namespace palace class ChebyshevSmoother : public mfem::Solver { private: - // System matrix (not owned), its communicator, and list of eliminated degrees of freedom. - MPI_Comm comm; - const mfem::Operator *A; - const mfem::Array dbc_tdof_list; - // Number of smoother iterations and polynomial order. const int pc_it, order; + // System matrix (not owned). + const Operator *A; + // Inverse diagonal scaling of the operator. - mfem::Vector dinv; + Vector dinv; // Maximum operator eigenvalue for Chebyshev polynomial smoothing. double lambda_max; // Temporary vectors for smoother application. - mutable mfem::Vector r, d; + mutable Vector r, d; public: - ChebyshevSmoother(MPI_Comm c, const mfem::Array &tdof_list, int smooth_it, - int poly_order); + ChebyshevSmoother(int smooth_it, int poly_order); - void SetOperator(const mfem::Operator &op) override; + void SetOperator(const Operator &op) override; - void Mult(const mfem::Vector &x, mfem::Vector &y) const override; + void Mult(const Vector &x, Vector &y) const override; - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override + void MultTranspose(const Vector &x, Vector &y) const override { Mult(x, y); // Assumes operator symmetry } diff --git a/palace/linalg/complex.cpp b/palace/linalg/complex.cpp index 95593abcf..34e71e281 100644 --- a/palace/linalg/complex.cpp +++ b/palace/linalg/complex.cpp @@ -108,10 +108,10 @@ void ComplexVector::AXPY(std::complex alpha, const ComplexVector &y) const int N = Size(); const double ar = alpha.real(); const double ai = alpha.imag(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); const auto *YR = y.Real().Read(); const auto *YI = y.Imag().Read(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -130,10 +130,10 @@ void ComplexVector::AXPBY(std::complex alpha, const ComplexVector &y, const double ai = alpha.imag(); const double br = beta.real(); const double bi = beta.imag(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); const auto *YR = y.Real().Read(); const auto *YI = y.Imag().Read(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -156,12 +156,12 @@ void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &y, const double bi = beta.imag(); const double gr = gamma.real(); const double gi = gamma.imag(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); const auto *YR = y.Real().Read(); const auto *YI = y.Imag().Read(); const auto *ZR = z.Real().Read(); const auto *ZI = z.Imag().Read(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -328,14 +328,14 @@ void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, test_fespace_.GetRestrictionMatrix()->Mult(lyi_, tyi_); } { - const int N = test_dbc_tdof_list_->Size(); - auto idx = test_dbc_tdof_list_->Read(); - auto TYR = tyr_.ReadWrite(); - auto TYI = tyi_.ReadWrite(); if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) { + const int N = test_dbc_tdof_list_->Size(); + const auto *idx = test_dbc_tdof_list_->Read(); const auto *XR = xr.Read(); const auto *XI = xi.Read(); + auto *TYR = tyr_.ReadWrite(); + auto *TYI = tyi_.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -346,12 +346,8 @@ void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, } else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TYR[id] = TYI[id] = 0.0; - }); + tyr_.SetSubVector(*test_dbc_tdof_list_, 0.0); + tyi_.SetSubVector(*test_dbc_tdof_list_, 0.0); } else { @@ -425,14 +421,14 @@ void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Ve trial_fespace_.GetProlongationMatrix()->MultTranspose(lxr_, txr_); trial_fespace_.GetProlongationMatrix()->MultTranspose(lxi_, txi_); { - const int N = trial_dbc_tdof_list_->Size(); - auto idx = trial_dbc_tdof_list_->Read(); - auto TXR = txr_.ReadWrite(); - auto TXI = txi_.ReadWrite(); if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) { + const int N = trial_dbc_tdof_list_->Size(); + const auto *idx = trial_dbc_tdof_list_->Read(); const auto *XR = xr.Read(); const auto *XI = xi.Read(); + auto *TXR = txr_.ReadWrite(); + auto *TXI = txi_.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -443,12 +439,8 @@ void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Ve } else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TXR[id] = TXI[id] = 0.0; - }); + txr_.SetSubVector(*trial_dbc_tdof_list_, 0.0); + txi_.SetSubVector(*trial_dbc_tdof_list_, 0.0); } else { @@ -515,14 +507,14 @@ void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vecto trial_fespace_.GetProlongationMatrix()->MultTranspose(lxr_, txr_); trial_fespace_.GetProlongationMatrix()->MultTranspose(lxi_, txi_); { - const int N = trial_dbc_tdof_list_->Size(); - auto idx = trial_dbc_tdof_list_->Read(); - auto TXR = txr_.ReadWrite(); - auto TXI = txi_.ReadWrite(); if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) { + const int N = trial_dbc_tdof_list_->Size(); + const auto *idx = trial_dbc_tdof_list_->Read(); const auto *XR = xr.Read(); const auto *XI = xi.Read(); + auto *TXR = txr_.ReadWrite(); + auto *TXI = txi_.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -533,12 +525,8 @@ void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vecto } else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TXR[id] = TXI[id] = 0.0; - }); + txr_.SetSubVector(*trial_dbc_tdof_list_, 0.0); + txi_.SetSubVector(*trial_dbc_tdof_list_, 0.0); } else { @@ -685,10 +673,10 @@ void ComplexWrapperOperator::AddMult(const Vector &xr, const Vector &xi, Vector const int N = height; const double ar = a.real(); const double ai = a.imag(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); const auto *TYR = tyr_.Read(); const auto *TYI = tyi_.Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -759,10 +747,10 @@ void ComplexWrapperOperator::AddMultTranspose(const Vector &xr, const Vector &xi const int N = width; const double ar = a.real(); const double ai = a.imag(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); const auto *TXR = txr_.Read(); const auto *TXI = txi_.Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -833,10 +821,10 @@ void ComplexWrapperOperator::AddMultHermitianTranspose(const Vector &xr, const V const int N = width; const double ar = a.real(); const double ai = a.imag(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); const auto *TXR = txr_.Read(); const auto *TXI = txi_.Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { diff --git a/palace/linalg/complex.hpp b/palace/linalg/complex.hpp index 316a71381..943fb2b98 100644 --- a/palace/linalg/complex.hpp +++ b/palace/linalg/complex.hpp @@ -233,9 +233,9 @@ class ComplexParOperator : public ComplexOperator bool test_restrict = false); // Get access to the underlying local (L-vector) operator. - const ComplexOperator &GetOperator() const + const ComplexOperator &LocalOperator() const { - MFEM_VERIFY(A_, "No local matrix available for ComplexParOperator::GetOperator!"); + MFEM_VERIFY(A_, "No local matrix available for ComplexParOperator::LocalOperator!"); return *A_; } @@ -251,25 +251,25 @@ class ComplexParOperator : public ComplexOperator } // Set essential boundary condition true dofs for rectangular operators. - void SetEssentialTrueDofs(const mfem::Array &trial_dbc_tdof_list, - const mfem::Array &test_dbc_tdof_list, + void SetEssentialTrueDofs(const mfem::Array *trial_dbc_tdof_list, + const mfem::Array *test_dbc_tdof_list, DiagonalPolicy diag_policy) { MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ZERO, "Essential boundary condition true dof elimination for rectangular " - "ComplexParOperator only supports DiagonalPolicy::DIAG_ZERO!"); - trial_dbc_tdof_list_ = &trial_dbc_tdof_list; - test_dbc_tdof_list_ = &test_dbc_tdof_list; + "ParOperator only supports DiagonalPolicy::DIAG_ZERO!"); + trial_dbc_tdof_list_ = trial_dbc_tdof_list; + test_dbc_tdof_list_ = test_dbc_tdof_list; diag_policy_ = diag_policy; } // Get the essential boundary condition true dofs associated with the operator. May be // nullptr. - void GetEssentialTrueDofs(const mfem::Array *&trial_dbc_tdof_list, - const mfem::Array *&test_dbc_tdof_list) + const mfem::Array *GetEssentialTrueDofs() const { - trial_dbc_tdof_list = trial_dbc_tdof_list_; - test_dbc_tdof_list = test_dbc_tdof_list_; + MFEM_VERIFY(trial_dbc_tdof_list_ == test_dbc_tdof_list_ && height == width, + "GetEssentialTrueDofs should only be used for square ComplexParOperator!"); + return trial_dbc_tdof_list_; } // Set the diagonal policy for the operator. diff --git a/palace/linalg/curlcurl.cpp b/palace/linalg/curlcurl.cpp index 61dfab6da..1bfaa6788 100644 --- a/palace/linalg/curlcurl.cpp +++ b/palace/linalg/curlcurl.cpp @@ -58,15 +58,19 @@ CurlCurlMassSolver::CurlCurlMassSolver(const MaterialOperator &mat_op, } } - // The system matrix for the projection is real and SPD. For the coarse-level AMG solve, - // we don't use an exact solve on the coarsest level. - auto ams = std::make_unique(nd_fespaces.GetFESpaceAtLevel(0), - &h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, - false, false, 0); - auto gmg = std::make_unique(std::move(ams), dbc_marker, - nd_fespaces, &h1_fespaces, 1, 1, 2); - gmg->SetOperator(A, &AuxA); - pc = std::move(gmg); + // XX TODO VISIT + + // // The system matrix for the projection is real and SPD. For the coarse-level AMG + // solve, + // // we don't use an exact solve on the coarsest level. + // auto ams = std::make_unique(nd_fespaces.GetFESpaceAtLevel(0), + // &h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, + // false, false, 0); + // auto gmg = std::make_unique(std::move(ams), dbc_marker, + // nd_fespaces, &h1_fespaces, 1, 1, + // 2); + // gmg->SetOperator(A, &AuxA); + // pc = std::move(gmg); ksp = std::make_unique(nd_fespaces.GetFinestFESpace().GetComm()); ksp->SetRelTol(tol); diff --git a/palace/linalg/curlcurl.hpp b/palace/linalg/curlcurl.hpp index 65820825e..5a1d1b60a 100644 --- a/palace/linalg/curlcurl.hpp +++ b/palace/linalg/curlcurl.hpp @@ -35,10 +35,8 @@ class CurlCurlMassSolver : public mfem::Solver mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, int max_it, int print); - // Operator is set in constructor. void SetOperator(const mfem::Operator &op) override {} - // Application of the solver. void Mult(const mfem::Vector &x, mfem::Vector &y) const override { ksp->Mult(x, y); } void Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const { diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 06f09c222..5239ab2ef 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -3,6 +3,7 @@ #include "distrelaxation.hpp" +#include #include "linalg/chebyshev.hpp" namespace palace @@ -10,46 +11,99 @@ namespace palace DistRelaxationSmoother::DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_fespace, mfem::ParFiniteElementSpace &h1_fespace, - const mfem::Array &dbc_marker, int smooth_it, int cheby_smooth_it, int cheby_order) - : mfem::Solver(), A(nullptr), A_G(nullptr), pc_it(smooth_it) + : mfem::Solver(), pc_it(smooth_it), A(nullptr), A_G(nullptr), dbc_tdof_list_G(nullptr) { // Construct discrete gradient matrix for the auxiliary space. { - mfem::ParDiscreteLinearOperator grad(&h1_fespace, &nd_fespace); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - G.reset(grad.ParallelAssemble()); + // XX TODO: Partial assembly option? + auto grad = std::make_unique(&h1_fespace, &nd_fespace); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + grad->Assemble(); + grad->Finalize(); + G = std::make_unique(std::move(grad), h1_fespace, nd_fespace, true); } // Initialize smoothers. - mfem::Array nd_dbc_tdof_list; - nd_fespace.GetEssentialTrueDofs(dbc_marker, nd_dbc_tdof_list); - h1_fespace.GetEssentialTrueDofs(dbc_marker, h1_dbc_tdof_list); - B = std::make_unique(nd_fespace.GetComm(), nd_dbc_tdof_list, - cheby_smooth_it, cheby_order); - B_G = std::make_unique(h1_fespace.GetComm(), h1_dbc_tdof_list, - cheby_smooth_it, cheby_order); + B = std::make_unique(cheby_smooth_it, cheby_order); + B_G = std::make_unique(cheby_smooth_it, cheby_order); B_G->iterative_mode = false; } -void DistRelaxationSmoother::SetOperator(const mfem::Operator &op, - const mfem::Operator &op_G) +void DistRelaxationSmoother::SetOperator(const Operator &op, const Operator &op_G) { A = &op; A_G = &op_G; MFEM_VERIFY(A->Height() == G->Height() && A->Width() == G->Height() && A_G->Height() == G->Width() && A_G->Width() == G->Width(), "Invalid operator sizes for DistRelaxationSmoother!"); + + const auto *PtAP_G = dynamic_cast(&op_G); + MFEM_VERIFY(PtAP_G, "DistRelaxationSmoother requires ParOperator operators!"); + dbc_tdof_list_G = PtAP_G->GetEssentialTrueDofs(); + height = A->Height(); width = A->Width(); + r.SetSize(height); + x_G.SetSize(A_G->Height()); + y_G.SetSize(A_G->Height()); // Set up smoothers for A and A_G. B->SetOperator(*A); B_G->SetOperator(*A_G); } +void DistRelaxationSmoother::Mult(const Vector &x, Vector &y) const +{ + // Apply smoother. + for (int it = 0; it < pc_it; it++) + { + // y = y + B (x - A y) + B->iterative_mode = (iterative_mode || it > 0); + B->Mult(x, y); + + // y = y + G B_G Gᵀ (x - A y) + A->Mult(y, r); + subtract(x, r, r); + G->MultTranspose(r, x_G); + if (dbc_tdof_list_G) + { + x_G.SetSubVector(*dbc_tdof_list_G, 0.0); + } + B_G->Mult(x_G, y_G); + G->AddMult(y_G, y, 1.0); + } +} + +void DistRelaxationSmoother::MultTranspose(const Vector &x, Vector &y) const +{ + // Apply transpose. + B->iterative_mode = true; + for (int it = 0; it < pc_it; it++) + { + // y = y + G B_Gᵀ Gᵀ (x - A y) + if (iterative_mode || it > 0) + { + A->Mult(y, r); + subtract(x, r, r); + G->MultTranspose(r, x_G); + } + else + { + y = 0.0; + } + if (dbc_tdof_list_G) + { + x_G.SetSubVector(*dbc_tdof_list_G, 0.0); + } + B_G->MultTranspose(x_G, y_G); + G->AddMult(y_G, y, 1.0); + + // y = y + Bᵀ (x - A y) + B->MultTranspose(x, y); + } +} + } // namespace palace diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index 2616bd17e..a248eb150 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -7,6 +7,8 @@ #include #include #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -20,161 +22,165 @@ namespace palace class DistRelaxationSmoother : public mfem::Solver { private: + // Number of smoother iterations. + const int pc_it; + // System matrix and its projection G^T A G (not owned). - const mfem::Operator *A, *A_G; + const Operator *A, *A_G; + + // Dirichlet boundary conditions in the auxiliary space (not owned). + const mfem::Array *dbc_tdof_list_G; // Discrete gradient matrix. - std::unique_ptr G; + std::unique_ptr G; // Point smoother objects for each matrix. mutable std::unique_ptr B; std::unique_ptr B_G; // Temporary vectors for smoother application. - mutable mfem::Vector r, x_G, y_G; - - // Dirichlet boundary conditions in the auxiliary space. - mfem::Array h1_dbc_tdof_list; - - // Number of smoother iterations. - const int pc_it; + mutable Vector r, x_G, y_G; public: DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - const mfem::Array &dbc_marker, int smooth_it, + mfem::ParFiniteElementSpace &h1_fespace, int smooth_it, int cheby_smooth_it, int cheby_order); - void SetOperator(const mfem::Operator &op) override + void SetOperator(const Operator &op) override { MFEM_ABORT("SetOperator with a single operator is not implemented for " "DistRelaxationSmoother, use the two argument signature instead!"); } - void SetOperator(const mfem::Operator &op, const mfem::Operator &op_G); - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - mfem::Array X(1); - mfem::Array Y(1); - X[0] = &x; - Y[0] = &y; - ArrayMult(X, Y); - } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - mfem::Array X(1); - mfem::Array Y(1); - X[0] = &x; - Y[0] = &y; - ArrayMultTranspose(X, Y); - } - - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override - { - // Initialize. - const int nrhs = X.Size(); - mfem::Array R(nrhs), X_G(nrhs), Y_G(nrhs); - std::vector rrefs(nrhs), xgrefs(nrhs), ygrefs(nrhs); - if (nrhs * height != r.Size()) - { - r.SetSize(nrhs * height); - x_G.SetSize(nrhs * A_G->Height()); - y_G.SetSize(nrhs * A_G->Height()); - } - for (int j = 0; j < nrhs; j++) - { - rrefs[j].MakeRef(r, j * height, height); - xgrefs[j].MakeRef(x_G, j * A_G->Height(), A_G->Height()); - ygrefs[j].MakeRef(y_G, j * A_G->Height(), A_G->Height()); - R[j] = &rrefs[j]; - X_G[j] = &xgrefs[j]; - Y_G[j] = &ygrefs[j]; - } - - // Apply smoother. - for (int it = 0; it < pc_it; it++) - { - // y = y + B (x - A y) - B->iterative_mode = (iterative_mode || it > 0); - B->ArrayMult(X, Y); - - // y = y + G B_G Gᵀ (x - A y) - A->ArrayMult(Y, R); - for (int j = 0; j < nrhs; j++) - { - subtract(*X[j], *R[j], *R[j]); - } - G->ArrayMultTranspose(R, X_G); - for (int j = 0; j < nrhs; j++) - { - X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - } - B_G->ArrayMult(X_G, Y_G); - G->ArrayAddMult(Y_G, Y, 1.0); - } - } - - void ArrayMultTranspose(const mfem::Array &X, - mfem::Array &Y) const override - { - // Initialize. - const int nrhs = X.Size(); - mfem::Array R(nrhs), X_G(nrhs), Y_G(nrhs); - std::vector rrefs(nrhs), xgrefs(nrhs), ygrefs(nrhs); - if (nrhs * height != r.Size()) - { - r.SetSize(nrhs * height); - x_G.SetSize(nrhs * A_G->Height()); - y_G.SetSize(nrhs * A_G->Height()); - } - for (int j = 0; j < nrhs; j++) - { - rrefs[j].MakeRef(r, j * height, height); - xgrefs[j].MakeRef(x_G, j * A_G->Height(), A_G->Height()); - ygrefs[j].MakeRef(y_G, j * A_G->Height(), A_G->Height()); - R[j] = &rrefs[j]; - X_G[j] = &xgrefs[j]; - Y_G[j] = &ygrefs[j]; - } - - // Apply transpose. - B->iterative_mode = true; - for (int it = 0; it < pc_it; it++) - { - // y = y + G B_Gᵀ Gᵀ (x - A y) - if (iterative_mode || it > 0) - { - A->ArrayMult(Y, R); - for (int j = 0; j < nrhs; j++) - { - subtract(*X[j], *R[j], *R[j]); - } - G->ArrayMultTranspose(R, X_G); - for (int j = 0; j < nrhs; j++) - { - X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - } - B_G->ArrayMultTranspose(X_G, Y_G); - G->ArrayAddMult(Y_G, Y, 1.0); - } - else - { - G->ArrayMultTranspose(X, X_G); - for (int j = 0; j < nrhs; j++) - { - X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - } - B_G->ArrayMultTranspose(X_G, Y_G); - G->ArrayMult(Y_G, Y); - } - - // y = y + Bᵀ (x - A y) - B->ArrayMultTranspose(X, Y); - } - } + void SetOperator(const Operator &op, const Operator &op_G); + + void Mult(const Vector &x, Vector &y) const override; + + void MultTranspose(const Vector &x, Vector &y) const override; + + // //XX TODO REMOVE... + // void Mult(const mfem::Vector &x, mfem::Vector &y) const override + // { + // mfem::Array X(1); + // mfem::Array Y(1); + // X[0] = &x; + // Y[0] = &y; + // ArrayMult(X, Y); + // } + + // void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override + // { + // mfem::Array X(1); + // mfem::Array Y(1); + // X[0] = &x; + // Y[0] = &y; + // ArrayMultTranspose(X, Y); + // } + + // void ArrayMult(const mfem::Array &X, + // mfem::Array &Y) const override + // { + // // Initialize. + // const int nrhs = X.Size(); + // mfem::Array R(nrhs), X_G(nrhs), Y_G(nrhs); + // std::vector rrefs(nrhs), xgrefs(nrhs), ygrefs(nrhs); + // if (nrhs * height != r.Size()) + // { + // r.SetSize(nrhs * height); + // x_G.SetSize(nrhs * A_G->Height()); + // y_G.SetSize(nrhs * A_G->Height()); + // } + // for (int j = 0; j < nrhs; j++) + // { + // rrefs[j].MakeRef(r, j * height, height); + // xgrefs[j].MakeRef(x_G, j * A_G->Height(), A_G->Height()); + // ygrefs[j].MakeRef(y_G, j * A_G->Height(), A_G->Height()); + // R[j] = &rrefs[j]; + // X_G[j] = &xgrefs[j]; + // Y_G[j] = &ygrefs[j]; + // } + + // // Apply smoother. + // for (int it = 0; it < pc_it; it++) + // { + // // y = y + B (x - A y) + // B->iterative_mode = (iterative_mode || it > 0); + // B->ArrayMult(X, Y); + + // // y = y + G B_G Gᵀ (x - A y) + // A->ArrayMult(Y, R); + // for (int j = 0; j < nrhs; j++) + // { + // subtract(*X[j], *R[j], *R[j]); + // } + // G->ArrayMultTranspose(R, X_G); + // for (int j = 0; j < nrhs; j++) + // { + // X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); + // } + // B_G->ArrayMult(X_G, Y_G); + // G->ArrayAddMult(Y_G, Y, 1.0); + // } + // } + + // void ArrayMultTranspose(const mfem::Array &X, + // mfem::Array &Y) const override + // { + // // Initialize. + // const int nrhs = X.Size(); + // mfem::Array R(nrhs), X_G(nrhs), Y_G(nrhs); + // std::vector rrefs(nrhs), xgrefs(nrhs), ygrefs(nrhs); + // if (nrhs * height != r.Size()) + // { + // r.SetSize(nrhs * height); + // x_G.SetSize(nrhs * A_G->Height()); + // y_G.SetSize(nrhs * A_G->Height()); + // } + // for (int j = 0; j < nrhs; j++) + // { + // rrefs[j].MakeRef(r, j * height, height); + // xgrefs[j].MakeRef(x_G, j * A_G->Height(), A_G->Height()); + // ygrefs[j].MakeRef(y_G, j * A_G->Height(), A_G->Height()); + // R[j] = &rrefs[j]; + // X_G[j] = &xgrefs[j]; + // Y_G[j] = &ygrefs[j]; + // } + + // // Apply transpose. + // B->iterative_mode = true; + // for (int it = 0; it < pc_it; it++) + // { + // // y = y + G B_Gᵀ Gᵀ (x - A y) + // if (iterative_mode || it > 0) + // { + // A->ArrayMult(Y, R); + // for (int j = 0; j < nrhs; j++) + // { + // subtract(*X[j], *R[j], *R[j]); + // } + // G->ArrayMultTranspose(R, X_G); + // for (int j = 0; j < nrhs; j++) + // { + // X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); + // } + // B_G->ArrayMultTranspose(X_G, Y_G); + // G->ArrayAddMult(Y_G, Y, 1.0); + // } + // else + // { + // G->ArrayMultTranspose(X, X_G); + // for (int j = 0; j < nrhs; j++) + // { + // X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); + // } + // B_G->ArrayMultTranspose(X_G, Y_G); + // G->ArrayMult(Y_G, Y); + // } + + // // y = y + Bᵀ (x - A y) + // B->ArrayMultTranspose(X, Y); + // } + // } }; } // namespace palace diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index 3d31cf804..c3dbe6384 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -64,14 +64,16 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, } h1_fespaces.GetFinestFESpace().GetEssentialTrueDofs(bdr_marker, h1_bdr_tdof_list); - // The system matrix for the projection is real and SPD. For the coarse-level AMG solve, - // we don't use an exact solve on the coarsest level. - auto amg = std::make_unique(); - amg->SetCoarseRelaxType(8); - auto gmg = std::make_unique(std::move(amg), bdr_marker, - h1_fespaces, nullptr, 1, 1, 2); - gmg->SetOperator(M); - pc = std::move(gmg); + // XX TODO VISIT + + // // The system matrix for the projection is real and SPD. For the coarse-level AMG + // solve, + // // we don't use an exact solve on the coarsest level. + // auto amg = std::make_unique(); + // auto gmg = std::make_unique(std::move(amg), bdr_marker, + // h1_fespaces, nullptr, 1, 1, 2); + // gmg->SetOperator(M); + // pc = std::move(gmg); ksp = std::make_unique(h1_fespaces.GetFinestFESpace().GetComm()); ksp->SetRelTol(tol); diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp index 69e2b3693..a1635a978 100644 --- a/palace/linalg/divfree.hpp +++ b/palace/linalg/divfree.hpp @@ -41,7 +41,6 @@ class DivFreeSolver : public mfem::Solver mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, int max_it, int print); - // Operator is set in constructor. void SetOperator(const mfem::Operator &op) override {} // Given a vector of Nedelec dofs for an arbitrary vector field, compute the Nedelec dofs diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index e71c19a29..9e2a5ef54 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -10,93 +10,147 @@ namespace palace { GeometricMultigridSolver::GeometricMultigridSolver( - std::unique_ptr &&coarse_solver, const mfem::Array &dbc_marker, + std::unique_ptr &&coarse_solver, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces, int cycle_it, int smooth_it, int cheby_order) - : mfem::Solver(), fespaces_(fespaces), pc_it(cycle_it) + : mfem::Solver(), pc_it(cycle_it) { - // Read configuration file parameters used to set up the preconditioner. The default MG - // parameters are for a V-cycle with a single pre/post smoothing iteration. - MFEM_VERIFY(GetNumLevels() > 0, - "Empty finite element space hierarchy during multigrid solver setup!"); - // Configure levels of geometric coarsening. Multigrid vectors will be configured at first // call to Mult. The multigrid operator size is set based on the finest space dimension. - const int m = GetNumLevels(); - A_.resize(m, nullptr); - x_.resize(m, mfem::Vector()); - y_.resize(m, mfem::Vector()); - r_.resize(m, mfem::Vector()); - X_.resize(m, mfem::Array()); - Y_.resize(m, mfem::Array()); - R_.resize(m, mfem::Array()); - xrefs_.resize(m, std::vector()); - yrefs_.resize(m, std::vector()); - rrefs_.resize(m, std::vector()); + const int n_levels = fespaces.GetNumLevels(); + MFEM_VERIFY(n_levels > 0, + "Empty finite element space hierarchy during multigrid solver setup!"); + A_.resize(n_levels, nullptr); + dbc_tdof_lists_.resize(n_levels, nullptr); + x_.resize(n_levels, Vector()); + y_.resize(n_levels, Vector()); + r_.resize(n_levels, Vector()); + xrefs_.resize(n_levels, std::vector()); + yrefs_.resize(n_levels, std::vector()); + rrefs_.resize(n_levels, std::vector()); + X_.resize(n_levels, mfem::Array()); + Y_.resize(n_levels, mfem::Array()); + R_.resize(n_levels, mfem::Array()); + + // Configure prolongation operators. + P_.reserve(n_levels); + for (int l = 0; l < n_levels; l++) + { + P_.push_back(fespaces.GetProlongationAtLevel(l)); + } // Use the supplied level 0 (coarse) solver. - B_.reserve(m); + B_.reserve(n_levels); B_.push_back(std::move(coarse_solver)); // Configure level smoothers. Use distributive relaxation smoothing if an auxiliary // finite element space was provided. if (aux_fespaces) { - int cheby_smooth_it = 1; - for (int l = 1; l < m; l++) + for (int l = 1; l < n_levels; l++) { B_.push_back(std::make_unique( - fespaces.GetFESpaceAtLevel(l), aux_fespaces->GetFESpaceAtLevel(l), dbc_marker, - smooth_it, cheby_smooth_it, cheby_order)); + fespaces.GetFESpaceAtLevel(l), aux_fespaces->GetFESpaceAtLevel(l), smooth_it, 1, + cheby_order)); } } else { - for (int l = 1; l < m; l++) + for (int l = 1; l < n_levels; l++) { - mfem::Array dbc_tdof_list_l; - fespaces.GetFESpaceAtLevel(l).GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - B_.push_back( - std::make_unique(fespaces.GetFESpaceAtLevel(l).GetComm(), - dbc_tdof_list_l, smooth_it, cheby_order)); + B_.push_back(std::make_unique(smooth_it, cheby_order)); } } } void GeometricMultigridSolver::SetOperator( - const std::vector> &ops, - const std::vector> *aux_ops) + const std::vector> &ops, + const std::vector> *aux_ops) { - const int m = GetNumLevels(); - MFEM_VERIFY(ops.size() == static_cast(m) && - (!aux_ops || aux_ops->size() == static_cast(m)), + const int n_levels = static_cast(A_.size()); + MFEM_VERIFY(static_cast(ops.size()) == n_levels && + (!aux_ops || static_cast(aux_ops->size()) == n_levels), "Invalid number of levels for operators in multigrid solver setup!"); - for (int l = 0; l < m; l++) + for (int l = 0; l < n_levels; l++) { A_[l] = ops[l].get(); auto *dist_smoother = dynamic_cast(B_[l].get()); if (dist_smoother) { MFEM_VERIFY(aux_ops, "Distributive relaxation smoother relies on both primary space " - "and auxiliary space operators for geometric multigrid!") + "and auxiliary space operators for multigrid smoothing!"); dist_smoother->SetOperator(*ops[l], *(*aux_ops)[l]); } else { B_[l]->SetOperator(*ops[l]); } + + // Configure lists of essential boundary condition true dofs. + const auto *PtAP_l = dynamic_cast(ops[l].get()); + MFEM_VERIFY(PtAP_l, "GeometricMultigridSolver requires ParOperator operators!"); + dbc_tdof_lists_[l] = PtAP_l->GetEssentialTrueDofs(); } + + // Operator size is given by the fine level dimensions. height = A_.back()->Height(); width = A_.back()->Width(); } +void GeometricMultigridSolver::ArrayMult(const mfem::Array &X, + mfem::Array &Y) const +{ + // Initialize. + const int n_levels = static_cast(A_.size()), n_rhs = X.Size(); + MFEM_ASSERT(!iterative_mode, "Geometric multigrid solver does not use iterative_mode!"); + MFEM_ASSERT(n_levels > 1 || pc_it == 1, + "Single-level geometric multigrid will not work with multiple iterations!"); + if (n_rhs * height != x_.back().Size()) + { + for (int l = 0; l < n_levels; l++) + { + MFEM_ASSERT(A_[l], "Missing operator for geometric multigrid level " << l << "!"); + x_[l].SetSize(n_rhs * A_[l]->Height()); + y_[l].SetSize(n_rhs * A_[l]->Height()); + r_[l].SetSize(n_rhs * A_[l]->Height()); + xrefs_[l].resize(n_rhs); + yrefs_[l].resize(n_rhs); + rrefs_[l].resize(n_rhs); + X_[l].SetSize(n_rhs); + Y_[l].SetSize(n_rhs); + R_[l].SetSize(n_rhs); + for (int j = 0; j < n_rhs; j++) + { + xrefs_[l][j].MakeRef(x_[l], j * A_[l]->Height(), A_[l]->Height()); + yrefs_[l][j].MakeRef(y_[l], j * A_[l]->Height(), A_[l]->Height()); + rrefs_[l][j].MakeRef(r_[l], j * A_[l]->Height(), A_[l]->Height()); + X_[l][j] = &xrefs_[l][j]; + Y_[l][j] = &yrefs_[l][j]; + R_[l][j] = &rrefs_[l][j]; + } + } + } + + // Apply V-cycle. X_ and Y_ on the finest level just point to X and Y to avoid an extra + // copy. + for (int j = 0; j < n_rhs; j++) + { + X_.back()[j] = const_cast(X[j]); + Y_.back()[j] = Y[j]; + } + for (int it = 0; it < pc_it; it++) + { + VCycle(n_levels - 1, (it > 0)); + } +} + void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const { // Pre-smooth, with zero initial guess (Y = 0 set inside). This is the coarse solve at // level 0. Important to note that the smoothers must respect the iterative_mode flag // correctly (given X, Y, compute Y <- Y + B (X - A Y)) . - const int nrhs = X_[l].Size(); + const int n_rhs = X_[l].Size(); B_[l]->iterative_mode = initial_guess; B_[l]->ArrayMult(X_[l], Y_[l]); if (l == 0) @@ -104,22 +158,27 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const return; } - // Compute residual and restrict. + // Compute residual. A_[l]->ArrayMult(Y_[l], R_[l]); - for (int j = 0; j < nrhs; j++) + for (int j = 0; j < n_rhs; j++) { subtract(*X_[l][j], *R_[l][j], *R_[l][j]); } - GetProlongationAtLevel(l - 1).ArrayMultTranspose(R_[l], X_[l - 1]); - - // XX TODO FIX DIRICHLET BCS HERE LIKE FOR DIST RELAXATION... // Coarse grid correction. + P_[l - 1]->ArrayMultTranspose(R_[l], X_[l - 1]); + if (dbc_tdof_lists_[l - 1]) + { + for (int j = 0; j < n_rhs; j++) + { + X_[l - 1][j]->SetSubVector(*dbc_tdof_lists_[l - 1], 0.0); + } + } VCycle(l - 1, false); // Prolongate and add. - GetProlongationAtLevel(l - 1).ArrayMult(Y_[l - 1], R_[l]); - for (int j = 0; j < nrhs; j++) + P_[l - 1]->ArrayMult(Y_[l - 1], R_[l]); + for (int j = 0; j < n_rhs; j++) { *Y_[l][j] += *R_[l][j]; } diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index c17b4fe03..4d8998fb3 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -7,13 +7,13 @@ #include #include #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "utils/iodata.hpp" namespace palace { -// XX TODO STORE DBC TDOFS AND APPLY AFTER PROLONGATION TRANSPOSE - // // Geometric multigrid preconditioner using a given coarse solver for the provided // hierarchy of finite element spaces. Optionally can be configured to use auxiliary space @@ -22,125 +22,62 @@ namespace palace class GeometricMultigridSolver : public mfem::Solver { private: - // Reference to the underlying finite element space hierarchy used to construct the - // multilevel preconditioner. - const mfem::ParFiniteElementSpaceHierarchy &fespaces_; + // Number of V-cycles per preconditioner application. + const int pc_it; + + // System matrices at each multigrid level and prolongation operators (not owned). + std::vector A_, P_; - // System matrices at each multigrid level (not owned). - std::vector A_; + // Essential Dirichlet boundary conditions at each level (not owned). + std::vector *> dbc_tdof_lists_; // Smoothers for each level. Coarse level solver is B_[0]. std::vector> B_; // Temporary vectors for preconditioner application. The type of these is dictated by the // MFEM Operator interface for multiple RHS. - mutable std::vector x_, y_, r_; - mutable std::vector> X_, Y_, R_; - mutable std::vector> xrefs_, yrefs_, rrefs_; - - // Number of V-cycles per preconditioner application. - const int pc_it; - - // Returns prolongation operator at given level. - const mfem::Operator &GetProlongationAtLevel(int l) const - { - return *fespaces_.GetProlongationAtLevel(l); - } - - // Returns the number of levels. - int GetNumLevels() const { return fespaces_.GetNumLevels(); } + mutable std::vector x_, y_, r_; + mutable std::vector> xrefs_, yrefs_, rrefs_; + mutable std::vector> X_, Y_, R_; // Internal function to perform a single V-cycle iteration. void VCycle(int l, bool initial_guess) const; public: GeometricMultigridSolver(std::unique_ptr &&coarse_solver, - const mfem::Array &dbc_marker, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces, int cycle_it, int smooth_it, int cheby_order); GeometricMultigridSolver(const IoData &iodata, std::unique_ptr &&coarse_solver, - const mfem::Array &dbc_marker, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) - : GeometricMultigridSolver(std::move(coarse_solver), dbc_marker, fespaces, aux_fespaces, + : GeometricMultigridSolver(std::move(coarse_solver), fespaces, aux_fespaces, iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, iodata.solver.linear.mg_smooth_order) { } - // Sets the matrices from which to contruct a multilevel preconditioner. void SetOperator(const Operator &op) override { MFEM_ABORT("SetOperator with a single operator is not implemented for " "GeometricMultigridSolver, use the other signature instead!"); } - void SetOperator(const std::vector> &ops, - const std::vector> *aux_ops = nullptr); + void SetOperator(const std::vector> &ops, + const std::vector> *aux_ops = nullptr); - // Application of the solver. - void Mult(const mfem::Vector &x, mfem::Vector &y) const override + void Mult(const Vector &x, Vector &y) const override { - mfem::Array X(1); - mfem::Array Y(1); + mfem::Array X(1); + mfem::Array Y(1); X[0] = &x; Y[0] = &y; ArrayMult(X, Y); } - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override - { - // Initialize. - const int m = GetNumLevels(), nrhs = X.Size(); - MFEM_VERIFY(!iterative_mode, "Geometric multigrid solver does not use iterative_mode!"); - MFEM_VERIFY(m > 1 || pc_it == 1, - "Single-level geometric multigrid will not work with multiple iterations!"); - if (nrhs * height != x_[m - 1].Size()) - { - for (int l = 0; l < m; l++) - { - MFEM_VERIFY(A_[l], "Missing operator for geometric multigrid level " << l << "!"); - x_[l].SetSize(nrhs * A_[l]->Height()); - y_[l].SetSize(nrhs * A_[l]->Height()); - r_[l].SetSize(nrhs * A_[l]->Height()); - } - } - for (int l = 0; l < m; l++) - { - xrefs_[l].resize(nrhs); - yrefs_[l].resize(nrhs); - rrefs_[l].resize(nrhs); - X_[l].SetSize(nrhs); - Y_[l].SetSize(nrhs); - R_[l].SetSize(nrhs); - for (int j = 0; j < nrhs; j++) - { - xrefs_[l][j].MakeRef(x_[l], j * A_[l]->Height(), A_[l]->Height()); - yrefs_[l][j].MakeRef(y_[l], j * A_[l]->Height(), A_[l]->Height()); - rrefs_[l][j].MakeRef(r_[l], j * A_[l]->Height(), A_[l]->Height()); - X_[l][j] = &xrefs_[l][j]; - Y_[l][j] = &yrefs_[l][j]; - R_[l][j] = &rrefs_[l][j]; - } - } - - // Apply V-cycle. - for (int j = 0; j < nrhs; j++) - { - *X_[m - 1][j] = *X[j]; - } - for (int it = 0; it < pc_it; it++) - { - VCycle(m - 1, (it > 0)); - } - for (int j = 0; j < nrhs; j++) - { - *Y[j] = *Y_[m - 1][j]; - } - } + void ArrayMult(const mfem::Array &X, + mfem::Array &Y) const override; }; } // namespace palace diff --git a/palace/linalg/jacobi.cpp b/palace/linalg/jacobi.cpp index 61d610cd6..b4ccb86b1 100644 --- a/palace/linalg/jacobi.cpp +++ b/palace/linalg/jacobi.cpp @@ -8,13 +8,16 @@ namespace palace { -JacobiSmoother::JacobiSmoother(const mfem::Vector &diag) : mfem::Solver(diag.Size()) +void JacobiSmoother::SetOperator(const Operator &op) { - dinv = diag; + height = op.Height(); + width = op.Width(); + dinv.SetSize(height); + op.AssembleDiagonal(dinv); // dinv.Reciprocal(); //XX TODO NEED MFEM PATCH } -void JacobiSmoother::Mult(const mfem::Vector &x, mfem::Vector &y) const +void JacobiSmoother::Mult(const Vector &x, Vector &y) const { MFEM_ASSERT(!iterative_mode, "JacobiSmoother is not implemented for iterative_mode = true!"); diff --git a/palace/linalg/jacobi.hpp b/palace/linalg/jacobi.hpp index 433d5aa3d..2b240d3af 100644 --- a/palace/linalg/jacobi.hpp +++ b/palace/linalg/jacobi.hpp @@ -5,6 +5,8 @@ #define PALACE_LINALG_JACOBI_SMOOTHER_HPP #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -18,16 +20,16 @@ class JacobiSmoother : public mfem::Solver { private: // Inverse diagonal scaling of the operator. - mfem::Vector dinv; + Vector dinv; public: - JacobiSmoother(const mfem::Vector &diag); + JacobiSmoother() : mfem::Solver() {} - void SetOperator(const mfem::Operator &op) override {} + void SetOperator(const Operator &op) override; - void Mult(const mfem::Vector &x, mfem::Vector &y) const override; + void Mult(const Vector &x, Vector &y) const override; - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override { Mult(x, y); } + void MultTranspose(const Vector &x, Vector &y) const override { Mult(x, y); } }; } // namespace palace diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index 4a0946f72..6a0bb9b15 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -3,397 +3,702 @@ #include "ksp.hpp" -#include -#include -#include "linalg/pc.hpp" -#include "linalg/petsc.hpp" +#include "linalg/amg.hpp" +#include "linalg/ams.hpp" +#include "linalg/complex.hpp" +#include "linalg/gmg.hpp" +#include "linalg/mumps.hpp" +#include "linalg/strumpack.hpp" +#include "linalg/superlu.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" namespace palace { -KspSolver::KspSolver(MPI_Comm comm, const IoData &iodata, const std::string &prefix) - : clcustom(false), print(iodata.problem.verbose), print_opts(true), check_final(true), - solve(0) +namespace { - PalacePetscCall(KSPCreate(comm, &ksp)); - PalacePetscCall(KSPSetOptionsPrefix(ksp, prefix.c_str())); - Configure(iodata); - ConfigureVerbose(print, prefix); -} -KspSolver::KspSolver(MPI_Comm comm, int print_lvl, const std::string &prefix) - : clcustom(false), print(print_lvl), print_opts(true), check_final(true), solve(0) +std::unique_ptr ConfigureKrylovSolver(MPI_Comm comm, + const IoData &iodata) { - PalacePetscCall(KSPCreate(comm, &ksp)); - PalacePetscCall(KSPSetOptionsPrefix(ksp, prefix.c_str())); - ConfigureVerbose(print, prefix); -} + // Configure solver settings as needed based on inputs. + config::LinearSolverData::KspType type = iodata.solver.linear.ksp_type; + if (type == config::LinearSolverData::KspType::DEFAULT) + { + if (iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC || + iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC || + iodata.problem.type == config::ProblemData::Type::TRANSIENT) + { + type = config::LinearSolverData::KspType::CG; + } + else + { + type = config::LinearSolverData::KspType::GMRES; + } + } + mfem::IterativeSolver::PrintLevel print = + mfem::IterativeSolver::PrintLevel().Warnings().Errors(); + if (iodata.problem.verbose > 0) + { + print.Summary(); + if (iodata.problem.verbose > 1) + { + print.Iterations(); + if (iodata.problem.verbose > 2) + { + print.All(); + } + } + } -KspSolver::~KspSolver() -{ - MPI_Comm comm; - PalacePetscCall(PetscObjectGetComm(reinterpret_cast(ksp), &comm)); - PalacePetscCall(KSPDestroy(&ksp)); -} + // XX TODO: We may want to replace the MFEM Krylov solvers with Hypre ones for performance + // (for examples, Hypre has a COGMRES solver which uses CGS (or CGS2) for + // orthogonalization). These will require some wrappers to allow operability with + // an mfem::Operator operator and mfem::Solver preconditioner. -void KspSolver::Configure(const IoData &iodata) -{ - // Configure the Krylov solver. GMRES is the default solver for frequency domain - // problems. - switch (iodata.solver.linear.ksp_type) + // Create the solver. + std::unique_ptr ksp; + switch (type) { case config::LinearSolverData::KspType::CG: - SetType(Type::CG); - break; - case config::LinearSolverData::KspType::CGSYM: - SetType(Type::CGSYM); - break; - case config::LinearSolverData::KspType::FCG: - SetType(Type::FCG); + ksp = std::make_unique(comm); break; case config::LinearSolverData::KspType::MINRES: - SetType(Type::MINRES); + ksp = std::make_unique(comm); break; case config::LinearSolverData::KspType::GMRES: - case config::LinearSolverData::KspType::DEFAULT: - SetType(Type::GMRES); - SetGMRESOptions(iodata.solver.linear.max_size, iodata.solver.linear.orthog_mgs, - iodata.solver.linear.orthog_cgs2); + { + auto gmres = std::make_unique(comm); + gmres->SetKDim(iodata.solver.linear.max_size); + ksp = std::move(gmres); + } break; case config::LinearSolverData::KspType::FGMRES: - SetType(Type::FGMRES); - SetGMRESOptions(iodata.solver.linear.max_size, iodata.solver.linear.orthog_mgs, - iodata.solver.linear.orthog_cgs2); - break; - case config::LinearSolverData::KspType::BCGS: - SetType(Type::BCGS); - break; - case config::LinearSolverData::KspType::BCGSL: - SetType(Type::BCGSL); - break; - case config::LinearSolverData::KspType::FBCGS: - SetType(Type::FBCGS); - break; - case config::LinearSolverData::KspType::QMRCGS: - SetType(Type::QMRCGS); + { + auto fgmres = std::make_unique(comm); + fgmres->SetKDim(iodata.solver.linear.max_size); + ksp = std::move(fgmres); + } break; - case config::LinearSolverData::KspType::TFQMR: - SetType(Type::TFQMR); + case config::LinearSolverData::KspType::BICGSTAB: + ksp = std::make_unique(comm); break; + case config::LinearSolverData::KspType::DEFAULT: case config::LinearSolverData::KspType::INVALID: - MFEM_ABORT("Unexpected type for KspSolver configuration!"); + MFEM_ABORT("Unexpected solver type for Krylov solver configuration!"); break; } - SetTol(iodata.solver.linear.tol); - SetMaxIter(iodata.solver.linear.max_it); - - // Reuse previous solution as guess for later solves if desired. - SetNonzeroInitialGuess(iodata.solver.linear.ksp_initial_guess); - - // Optionally use left or right preconditioning (otherwise use PETSc default for the given - // solver). - if (iodata.solver.linear.pc_side_type == config::LinearSolverData::SideType::LEFT) - { - PalacePetscCall(KSPSetPCSide(ksp, PC_LEFT)); - } - else if (iodata.solver.linear.pc_side_type == config::LinearSolverData::SideType::RIGHT) - { - PalacePetscCall(KSPSetPCSide(ksp, PC_RIGHT)); - } + ksp->iterative_mode = iodata.solver.linear.ksp_initial_guess; + ksp->SetRelTol(iodata.solver.linear.tol); + ksp->SetMaxIter(iodata.solver.linear.max_it); + ksp->SetPrintLevel(print); + return ksp; } -void KspSolver::ConfigureVerbose(int print, const std::string &prefix) +std::unique_ptr +ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, + mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) { - // Manage debugging output. - if (print > 0) + // Configure solver settings as needed based on inputs. + config::LinearSolverData::Type type = iodata.solver.linear.type; + if (type == config::LinearSolverData::Type::DEFAULT) { - std::string opts = "-ksp_converged_reason"; - if (print > 1) - { - opts.append(" -ksp_monitor"); - } - if (print > 3) + if (iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC || + (iodata.problem.type == config::ProblemData::Type::TRANSIENT && + iodata.solver.transient.type == config::TransientSolverData::Type::CENTRAL_DIFF)) { - opts.append(" -ksp_view"); + type = config::LinearSolverData::Type::BOOMER_AMG; } - if (prefix.length() > 0) + else if (iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC || + iodata.problem.type == config::ProblemData::Type::TRANSIENT) { - PetscOptionsPrefixPush(nullptr, prefix.c_str()); + type = config::LinearSolverData::Type::AMS; } - PetscOptionsInsertString(nullptr, opts.c_str()); - if (prefix.length() > 0) + else { - PetscOptionsPrefixPop(nullptr); + // Prefer sparse direct solver for frequency domain problems if available. +#if defined(MFEM_USE_SUPERLU) + type = config::LinearSolverData::Type::SUPERLU; +#elif defined(MFEM_USE_STRUMPACK) + type = config::LinearSolverData::Type::STRUMPACK; +#elif defined(MFEM_USE_MUMPS) + type = config::LinearSolverData::Type::MUMPS; +#else + type = config::LinearSolverData::Type::AMS; +#endif } } -} + int print = iodata.problem.verbose - 1; -void KspSolver::SetType(KspSolver::Type type, bool piped) -{ + // Create the solver. + std::unique_ptr pc; switch (type) { - case Type::CG: - PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPECG) : KSPSetType(ksp, KSPCG)); - PalacePetscCall(KSPCGSetType(ksp, KSP_CG_HERMITIAN)); - break; - case Type::CGSYM: - PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPECG) : KSPSetType(ksp, KSPCG)); - PalacePetscCall(KSPCGSetType(ksp, KSP_CG_SYMMETRIC)); - break; - case Type::FCG: - PalacePetscCall(KSPSetType(ksp, KSPFCG)); - break; - case Type::GMRES: - PalacePetscCall((piped) ? KSPSetType(ksp, KSPPGMRES) : KSPSetType(ksp, KSPGMRES)); - break; - case Type::FGMRES: - PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPEFGMRES) - : KSPSetType(ksp, KSPFGMRES)); - break; - case Type::MINRES: - PalacePetscCall(KSPSetType(ksp, KSPMINRES)); + case config::LinearSolverData::Type::AMS: + // Can either be the coarse solve for geometric multigrid or the solver at the finest + // space (in which case fespaces.GetNumLevels() == 1). + MFEM_VERIFY(aux_fespaces, "AMS solver relies on both primary space " + "and auxiliary spaces for construction!"); + pc = std::make_unique(iodata, fespaces.GetFESpaceAtLevel(0), + aux_fespaces->GetFESpaceAtLevel(0), print); break; - case Type::BCGS: - PalacePetscCall(KSPSetType(ksp, KSPBCGS)); + case config::LinearSolverData::Type::BOOMER_AMG: + pc = std::make_unique(iodata, print); break; - case Type::BCGSL: - PalacePetscCall(KSPSetType(ksp, KSPBCGSL)); - PalacePetscCall(KSPBCGSLSetEll(ksp, 2)); // PETSc default - break; - case Type::FBCGS: - PalacePetscCall(KSPSetType(ksp, KSPFBCGS)); + case config::LinearSolverData::Type::SUPERLU: +#if defined(MFEM_USE_SUPERLU) + pc = std::make_unique(comm, iodata, print); +#else + MFEM_ABORT("Solver was not built with SuperLU_DIST support, please choose a " + "different solver!"); +#endif break; - case Type::QMRCGS: - PalacePetscCall(KSPSetType(ksp, KSPQMRCGS)); + case config::LinearSolverData::Type::STRUMPACK: +#if defined(MFEM_USE_STRUMPACK) + pc = std::make_unique(comm, iodata, print); break; - case Type::TFQMR: - PalacePetscCall(KSPSetType(ksp, KSPTFQMR)); +#endif + case config::LinearSolverData::Type::STRUMPACK_MP: +#if defined(MFEM_USE_STRUMPACK) && \ + (STRUMPACK_VERSION_MAJOR >= 6 && STRUMPACK_VERSION_MINOR >= 3 && \ + STRUMPACK_VERSION_PATCH > 1) + pc = std::make_unique(comm, iodata, print); +#else + MFEM_ABORT("Solver was not built with STRUMPACK support or uses STRUMPACK older than " + "6.3.1 which does not include mixed-precision support, please choose a " + "different solver!"); +#endif break; - case Type::CHOLESKY: - { - PC pc; - PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCCHOLESKY)); - SetCheckFinal(false); - } + case config::LinearSolverData::Type::MUMPS: +#if defined(MFEM_USE_MUMPS) + pc = std::make_unique(comm, iodata, print); +#else + MFEM_ABORT( + "Solver was not built with MUMPS support, please choose a different solver!"); +#endif break; - case Type::LU: - { - PC pc; - PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCLU)); - SetCheckFinal(false); - } + default: + MFEM_ABORT("Unexpected solver type for preconditioner configuration!"); break; } -} - -void KspSolver::SetTol(PetscReal tol) -{ - PalacePetscCall(KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT)); -} - -void KspSolver::SetAbsTol(PetscReal tol) -{ - PalacePetscCall(KSPSetTolerances(ksp, PETSC_DEFAULT, tol, PETSC_DEFAULT, PETSC_DEFAULT)); -} - -void KspSolver::SetMaxIter(PetscInt maxits) -{ - PalacePetscCall( - KSPSetTolerances(ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, maxits)); -} - -void KspSolver::SetGMRESOptions(PetscInt maxsize, bool mgs, bool cgs2) -{ - PalacePetscCall(KSPGMRESSetRestart(ksp, maxsize)); - if (mgs) + if (iodata.solver.linear.mat_gmg) { - PalacePetscCall( - KSPGMRESSetOrthogonalization(ksp, KSPGMRESModifiedGramSchmidtOrthogonalization)); + // This will construct the multigrid hierarchy using pc as the coarse solver + // (ownership of pc is transfered to the GeometricMultigridSolver). When a special + // auxiliary space smoother for pre-/post-smoothing is not desired, the auxiliary + // space is a nullptr here. + return std::make_unique(iodata, std::move(pc), fespaces, + aux_fespaces); } - else if (cgs2) + else { - PalacePetscCall(KSPGMRESSetCGSRefinementType(ksp, KSP_GMRES_CGS_REFINE_ALWAYS)); + return pc; } } -void KspSolver::SetTabLevel(PetscInt l) +class ComplexBlockDiagonalSolver : public mfem::Solver { - PalacePetscCall(PetscObjectSetTabLevel(reinterpret_cast(ksp), l)); -} +private: + std::unique_ptr op_; -void KspSolver::SetNonzeroInitialGuess(bool guess) -{ - PalacePetscCall(KSPSetInitialGuessNonzero(ksp, guess ? PETSC_TRUE : PETSC_FALSE)); -} +public: + ComplexBlockDiagonalSolver(std::unique_ptr &&op) + : mfem::Solver(2 * op->Height(), 2 * op->Width()), op_(std::move(op)) + { + } -void KspSolver::SetOperator(const petsc::PetscParMatrix &A, bool copy_prefix) -{ - // If A is the same as before, PETSc will reuse things like symbolic factorizations - // automatically. - PalacePetscCall(KSPSetOperators(ksp, A, A)); - if (copy_prefix) + void SetOperator(const Operator &op) override {} + + void Mult(const Vector &x, Vector &y) const override { - // Set Mat prefix to be the same as KSP to enable setting command-line options. - const char *prefix; - PalacePetscCall(KSPGetOptionsPrefix(ksp, &prefix)); - PalacePetscCall(MatSetOptionsPrefix(A, prefix)); + MFEM_ASSERT(x.Size() == 2 * op_->Width() && y.Size() == 2 * op_->Height(), + "Incompatible dimensions for ComplexBlockDiagonalSolver::Mult!"); + mfem::Array X(2); + mfem::Array Y(2); + Vector xr, xi, yr, yi; + xr.MakeRef(const_cast(x), 0, op_->Width()); + xi.MakeRef(const_cast(x), op_->Width(), op_->Width()); + yr.MakeRef(y, 0, op_->Height()); + yi.MakeRef(y, op_->Height(), op_->Height()); + op_->ArrayMult(X, Y); + yr.SyncAliasMemory(y); + yi.SyncAliasMemory(y); } -} +}; + +} // namespace -void KspSolver::SetPreconditioner(const KspPreconditioner &op) +KspSolver::KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) + : mfem::Solver(), ksp_mult(0), ksp_mult_it(0) { - // The PETSc shell preconditioner does not take ownership of the preconditioner object. - PC pc; - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCSHELL)); - PalacePetscCall(PCShellSetContext(pc, (void *)&op)); - PalacePetscCall(PCShellSetSetUp(pc, KspPreconditioner::PCSetUp)); - PalacePetscCall(PCShellSetApply(pc, KspPreconditioner::PCApply)); - PalacePetscCall(PCShellSetDestroy(pc, KspPreconditioner::PCDestroy)); + MFEM_VERIFY(fespaces.GetNumLevels() > 0, + "Empty finite element space hierarchy linear solver setup!"); + MPI_Comm comm = fespaces.GetFESpaceAtLevel(0).GetComm(); + pc_ = ConfigurePreconditionerSolver(comm, iodata, fespaces, aux_fespaces); + ksp_ = ConfigureKrylovSolver(comm, iodata); + ksp_->SetPreconditioner(*pc_); } -void KspSolver::Customize() const +void KspSolver::SetOperator(const Operator &op, const Operator &pc_op) { - if (!clcustom) - { - PalacePetscCall(KSPSetFromOptions(ksp)); - if (print > 0 && print_opts) - { - PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); - Mpi::Print(GetComm(), "\n"); - } - clcustom = true; - } + // Unset the preconditioner before so that IterativeSolver::SetOperator does not set the + // preconditioner operator again. + pc_->SetOperator(pc_op); + // ksp_->SetPreconditioner(nullptr); //XX TODO WAITING MFEM PATCH + ksp_->SetOperator(op); + ksp_->SetPreconditioner(*pc_); + height = op.Height(); + width = op.Width(); } -void KspSolver::Mult(const petsc::PetscParVector &b, petsc::PetscParVector &x) const +void KspSolver::SetOperator(const Operator &op, + const std::vector> &pc_ops, + const std::vector> *aux_pc_ops) { - KSPConvergedReason reason; - PetscReal norm0 = 1.0, norm; - if (check_final) + auto *gmg = dynamic_cast(pc_.get()); + if (gmg) { - norm0 = b.Norml2(); + // Unset the preconditioner before so that IterativeSolver::SetOperator does not set the + // preconditioner operator again. + gmg->SetOperator(pc_ops, aux_pc_ops); + // ksp_->SetPreconditioner(nullptr); //XX TODO WAITING MFEM PATCH + ksp_->SetOperator(op); + ksp_->SetPreconditioner(*pc_); } - Customize(); - PalacePetscCall(KSPSolve(ksp, b, x)); - PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); - if (check_final && reason < 0) + else { - Mat A; - Vec r; - PalacePetscCall(VecDuplicate(b, &r)); - PalacePetscCall(KSPGetOperators(ksp, &A, nullptr)); - PalacePetscCall(MatMult(A, x, r)); - PalacePetscCall(VecAXPY(r, -1.0, b)); - PalacePetscCall(VecNorm(r, NORM_2, &norm)); - PalacePetscCall(VecDestroy(&r)); - Mpi::Warning(GetComm(), - "Linear solver did not converge, " - "norm(Ax-b)/norm(b) = {:.3e} (norm(b) = {:.3e})!\n", - norm / norm0, norm0); + SetOperator(op, *pc_ops.back()); } - solve++; } -void KspSolver::Reset() +void KspSolver::Mult(const Vector &x, Vector &y) const { - PalacePetscCall(KSPReset(ksp)); + ksp_->Mult(x, y); + if (!ksp_->GetConverged()) + { + Mpi::Warning( + ksp_->GetComm(), + "Linear solver did not converge, norm(Ax-b)/norm(b) = {:.3e} (norm(b) = {:.3e})!\n", + ksp_->GetFinalRelNorm(), ksp_->GetInitialNorm()); + } + ksp_mult++; + ksp_mult_it += ksp_->GetNumIterations(); } -PetscInt KspSolver::GetTotalNumMult() const +ComplexKspSolver::ComplexKspSolver(const IoData &iodata, + mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) + : KspSolver() { - return solve; + MFEM_VERIFY(fespaces.GetNumLevels() > 0, + "Empty finite element space hierarchy linear solver setup!"); + MPI_Comm comm = fespaces.GetFESpaceAtLevel(0).GetComm(); + auto pcr = ConfigurePreconditionerSolver(comm, iodata, fespaces, aux_fespaces); + pc_ = std::make_unique(std::move(pcr)); + ksp_ = ConfigureKrylovSolver(comm, iodata); + ksp_->SetPreconditioner(*pc_); } -PetscInt KspSolver::GetNumIter() const +void ComplexKspSolver::Mult(const ComplexVector &x, ComplexVector &y) const { - PetscInt num_it; - PalacePetscCall(KSPGetIterationNumber(ksp, &num_it)); - return num_it; + KspSolver::Mult(x, y); // XX TODO TEST THIS... } -PetscInt KspSolver::GetTotalNumIter() const -{ - PetscInt num_it; - PalacePetscCall(KSPGetTotalIterations(ksp, &num_it)); - return num_it; -} +// XX TODO REMOVE -MPI_Comm KspSolver::GetComm() const -{ - return ksp ? PetscObjectComm(reinterpret_cast(ksp)) : MPI_COMM_NULL; -} +// KspSolver::KspSolver(MPI_Comm comm, const IoData &iodata, const std::string &prefix) +// : clcustom(false), print(iodata.problem.verbose), print_opts(true), check_final(true), +// solve(0) +// { +// PalacePetscCall(KSPCreate(comm, &ksp)); +// PalacePetscCall(KSPSetOptionsPrefix(ksp, prefix.c_str())); +// Configure(iodata); +// ConfigureVerbose(print, prefix); +// } -void KspSolver::SolveJacobi(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, - petsc::PetscParVector &x, PetscInt sym, PetscReal tol, - PetscInt max_it) -{ - MPI_Comm comm; - KSP ksp; - PC pc; - KSPConvergedReason reason; - - comm = A.GetComm(); - PalacePetscCall(KSPCreate(comm, &ksp)); - PalacePetscCall(KSPSetOperators(ksp, A, A)); - PalacePetscCall(KSPSetType(ksp, (sym == 1) ? KSPCG : KSPGMRES)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCJACOBI)); - PalacePetscCall(PCJacobiSetFixDiagonal(pc, PETSC_TRUE)); - PalacePetscCall(KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, max_it)); - // std::string opts = "-ksp_converged_reason -ksp_monitor"; - // PetscOptionsInsertString(nullptr, opts.c_str()); - // PalacePetscCall(KSPSetFromOptions(ksp)); - x.SetZero(); - PalacePetscCall(KSPSolve(ksp, b, x)); - PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); - MFEM_VERIFY(reason > 0, "PETSc KSP did not converge!"); - PalacePetscCall(KSPDestroy(&ksp)); -} +// KspSolver::KspSolver(MPI_Comm comm, int print_lvl, const std::string &prefix) +// : clcustom(false), print(print_lvl), print_opts(true), check_final(true), solve(0) +// { +// PalacePetscCall(KSPCreate(comm, &ksp)); +// PalacePetscCall(KSPSetOptionsPrefix(ksp, prefix.c_str())); +// ConfigureVerbose(print, prefix); +// } -void KspSolver::SolveDirect(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, - petsc::PetscParVector &x, PetscInt sym) -{ - MPI_Comm comm; - KSP ksp; - PC pc; - KSPConvergedReason reason; - - comm = A.GetComm(); - PalacePetscCall(KSPCreate(comm, &ksp)); - PalacePetscCall(KSPSetOperators(ksp, A, A)); - PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); - PalacePetscCall(KSPGetPC(ksp, &pc)); -#if defined(PETSC_HAVE_MUMPS) || defined(PETSC_HAVE_SUPERLU_DIST) - PalacePetscCall(PCSetType(pc, (sym > 0) ? PCCHOLESKY : PCLU)); -#if defined(PETSC_HAVE_MUMPS) - PalacePetscCall(PCFactorSetMatSolverType(pc, MATSOLVERMUMPS)); -#elif defined(PETSC_HAVE_SUPERLU_DIST) - PalacePetscCall(PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU_DIST)); -#endif -#else - // Use PETSc default serial direct solver. - PalacePetscCall(PCSetType(pc, PCREDUNDANT)); - PalacePetscCall(PCRedundantSetNumber(pc, Mpi::Size(comm))); - { - KSP ksp_in; - PC pc_in; - PalacePetscCall(PCRedundantGetKSP(pc, &ksp_in)); - PalacePetscCall(KSPGetPC(ksp_in, &pc_in)); - PalacePetscCall(PCSetType(pc_in, (sym > 0) ? PCCHOLESKY : PCLU)); - } -#endif - x.SetZero(); - PalacePetscCall(KSPSolve(ksp, b, x)); - PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); - MFEM_VERIFY(reason > 0, "PETSc KSP did not converge!"); - PalacePetscCall(KSPDestroy(&ksp)); -} +// KspSolver::~KspSolver() +// { +// MPI_Comm comm; +// PalacePetscCall(PetscObjectGetComm(reinterpret_cast(ksp), &comm)); +// PalacePetscCall(KSPDestroy(&ksp)); +// } + +// void KspSolver::Configure(const IoData &iodata) +// { +// // Configure the Krylov solver. GMRES is the default solver for frequency domain +// // problems. +// switch (iodata.solver.linear.ksp_type) +// { +// case config::LinearSolverData::KspType::CG: +// SetType(Type::CG); +// break; +// case config::LinearSolverData::KspType::CGSYM: +// SetType(Type::CGSYM); +// break; +// case config::LinearSolverData::KspType::FCG: +// SetType(Type::FCG); +// break; +// case config::LinearSolverData::KspType::MINRES: +// SetType(Type::MINRES); +// break; +// case config::LinearSolverData::KspType::GMRES: +// case config::LinearSolverData::KspType::DEFAULT: +// SetType(Type::GMRES); +// SetGMRESOptions(iodata.solver.linear.max_size, iodata.solver.linear.orthog_mgs, +// iodata.solver.linear.orthog_cgs2); +// break; +// case config::LinearSolverData::KspType::FGMRES: +// SetType(Type::FGMRES); +// SetGMRESOptions(iodata.solver.linear.max_size, iodata.solver.linear.orthog_mgs, +// iodata.solver.linear.orthog_cgs2); +// break; +// case config::LinearSolverData::KspType::BCGS: +// SetType(Type::BCGS); +// break; +// case config::LinearSolverData::KspType::BCGSL: +// SetType(Type::BCGSL); +// break; +// case config::LinearSolverData::KspType::FBCGS: +// SetType(Type::FBCGS); +// break; +// case config::LinearSolverData::KspType::QMRCGS: +// SetType(Type::QMRCGS); +// break; +// case config::LinearSolverData::KspType::TFQMR: +// SetType(Type::TFQMR); +// break; +// default: +// MFEM_ABORT("Unexpected type for KspSolver configuration!"); +// break; +// } +// SetTol(iodata.solver.linear.tol); +// SetMaxIter(iodata.solver.linear.max_it); + +// // Reuse previous solution as guess for later solves if desired. +// SetNonzeroInitialGuess(iodata.solver.linear.ksp_initial_guess); + +// // Optionally use left or right preconditioning (otherwise use PETSc default for the +// given +// // solver). +// if (iodata.solver.linear.pc_side_type == config::LinearSolverData::SideType::LEFT) +// { +// PalacePetscCall(KSPSetPCSide(ksp, PC_LEFT)); +// } +// else if (iodata.solver.linear.pc_side_type == +// config::LinearSolverData::SideType::RIGHT) +// { +// PalacePetscCall(KSPSetPCSide(ksp, PC_RIGHT)); +// } +// } + +// void KspSolver::ConfigureVerbose(int print, const std::string &prefix) +// { +// // Manage debugging output. +// if (print > 0) +// { +// std::string opts = "-ksp_converged_reason"; +// if (print > 1) +// { +// opts.append(" -ksp_monitor"); +// } +// if (print > 3) +// { +// opts.append(" -ksp_view"); +// } +// if (prefix.length() > 0) +// { +// PetscOptionsPrefixPush(nullptr, prefix.c_str()); +// } +// PetscOptionsInsertString(nullptr, opts.c_str()); +// if (prefix.length() > 0) +// { +// PetscOptionsPrefixPop(nullptr); +// } +// } +// } + +// void KspSolver::SetType(KspSolver::Type type, bool piped) +// { +// switch (type) +// { +// case Type::CG: +// PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPECG) : KSPSetType(ksp, KSPCG)); +// PalacePetscCall(KSPCGSetType(ksp, KSP_CG_HERMITIAN)); +// break; +// case Type::CGSYM: +// PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPECG) : KSPSetType(ksp, KSPCG)); +// PalacePetscCall(KSPCGSetType(ksp, KSP_CG_SYMMETRIC)); +// break; +// case Type::FCG: +// PalacePetscCall(KSPSetType(ksp, KSPFCG)); +// break; +// case Type::GMRES: +// PalacePetscCall((piped) ? KSPSetType(ksp, KSPPGMRES) : KSPSetType(ksp, KSPGMRES)); +// break; +// case Type::FGMRES: +// PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPEFGMRES) +// : KSPSetType(ksp, KSPFGMRES)); +// break; +// case Type::MINRES: +// PalacePetscCall(KSPSetType(ksp, KSPMINRES)); +// break; +// case Type::BCGS: +// PalacePetscCall(KSPSetType(ksp, KSPBCGS)); +// break; +// case Type::BCGSL: +// PalacePetscCall(KSPSetType(ksp, KSPBCGSL)); +// PalacePetscCall(KSPBCGSLSetEll(ksp, 2)); // PETSc default +// break; +// case Type::FBCGS: +// PalacePetscCall(KSPSetType(ksp, KSPFBCGS)); +// break; +// case Type::QMRCGS: +// PalacePetscCall(KSPSetType(ksp, KSPQMRCGS)); +// break; +// case Type::TFQMR: +// PalacePetscCall(KSPSetType(ksp, KSPTFQMR)); +// break; +// case Type::CHOLESKY: +// { +// PC pc; +// PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); +// PalacePetscCall(KSPGetPC(ksp, &pc)); +// PalacePetscCall(PCSetType(pc, PCCHOLESKY)); +// SetCheckFinal(false); +// } +// break; +// case Type::LU: +// { +// PC pc; +// PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); +// PalacePetscCall(KSPGetPC(ksp, &pc)); +// PalacePetscCall(PCSetType(pc, PCLU)); +// SetCheckFinal(false); +// } +// break; +// default: +// MFEM_ABORT("Unexpected type for KspSolver!"); +// break; +// } +// } + +// void KspSolver::SetTol(PetscReal tol) +// { +// PalacePetscCall(KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, +// PETSC_DEFAULT)); +// } + +// void KspSolver::SetAbsTol(PetscReal tol) +// { +// PalacePetscCall(KSPSetTolerances(ksp, PETSC_DEFAULT, tol, PETSC_DEFAULT, +// PETSC_DEFAULT)); +// } + +// void KspSolver::SetMaxIter(PetscInt maxits) +// { +// PalacePetscCall( +// KSPSetTolerances(ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, maxits)); +// } + +// void KspSolver::SetGMRESOptions(PetscInt maxsize, bool mgs, bool cgs2) +// { +// PalacePetscCall(KSPGMRESSetRestart(ksp, maxsize)); +// if (mgs) +// { +// PalacePetscCall( +// KSPGMRESSetOrthogonalization(ksp, KSPGMRESModifiedGramSchmidtOrthogonalization)); +// } +// else if (cgs2) +// { +// PalacePetscCall(KSPGMRESSetCGSRefinementType(ksp, KSP_GMRES_CGS_REFINE_ALWAYS)); +// } +// } + +// void KspSolver::SetTabLevel(PetscInt l) +// { +// PalacePetscCall(PetscObjectSetTabLevel(reinterpret_cast(ksp), l)); +// } + +// void KspSolver::SetNonzeroInitialGuess(bool guess) +// { +// PalacePetscCall(KSPSetInitialGuessNonzero(ksp, guess ? PETSC_TRUE : PETSC_FALSE)); +// } + +// void KspSolver::SetOperator(const petsc::PetscParMatrix &A, bool copy_prefix) +// { +// // If A is the same as before, PETSc will reuse things like symbolic factorizations +// // automatically. +// PalacePetscCall(KSPSetOperators(ksp, A, A)); +// if (copy_prefix) +// { +// // Set Mat prefix to be the same as KSP to enable setting command-line options. +// const char *prefix; +// PalacePetscCall(KSPGetOptionsPrefix(ksp, &prefix)); +// PalacePetscCall(MatSetOptionsPrefix(A, prefix)); +// } +// } + +// void KspSolver::SetPreconditioner(const KspPreconditioner &op) +// { +// // The PETSc shell preconditioner does not take ownership of the preconditioner object. +// PC pc; +// PalacePetscCall(KSPGetPC(ksp, &pc)); +// PalacePetscCall(PCSetType(pc, PCSHELL)); +// PalacePetscCall(PCShellSetContext(pc, (void *)&op)); +// PalacePetscCall(PCShellSetSetUp(pc, KspPreconditioner::PCSetUp)); +// PalacePetscCall(PCShellSetApply(pc, KspPreconditioner::PCApply)); +// PalacePetscCall(PCShellSetDestroy(pc, KspPreconditioner::PCDestroy)); +// } + +// void KspSolver::Customize() const +// { +// if (!clcustom) +// { +// PalacePetscCall(KSPSetFromOptions(ksp)); +// if (print > 0 && print_opts) +// { +// PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); +// Mpi::Print(GetComm(), "\n"); +// } +// clcustom = true; +// } +// } + +// void KspSolver::Mult(const petsc::PetscParVector &b, petsc::PetscParVector &x) const +// { +// KSPConvergedReason reason; +// PetscReal norm0 = 1.0, norm; +// if (check_final) +// { +// norm0 = b.Norml2(); +// } +// Customize(); +// PalacePetscCall(KSPSolve(ksp, b, x)); +// PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); +// if (check_final && reason < 0) +// { +// Mat A; +// Vec r; +// PalacePetscCall(VecDuplicate(b, &r)); +// PalacePetscCall(KSPGetOperators(ksp, &A, nullptr)); +// PalacePetscCall(MatMult(A, x, r)); +// PalacePetscCall(VecAXPY(r, -1.0, b)); +// PalacePetscCall(VecNorm(r, NORM_2, &norm)); +// PalacePetscCall(VecDestroy(&r)); +// Mpi::Warning(GetComm(), +// "Linear solver did not converge, " +// "norm(Ax-b)/norm(b) = {:.3e} (norm(b) = {:.3e})!\n", +// norm / norm0, norm0); +// } +// solve++; +// } + +// void KspSolver::Reset() +// { +// PalacePetscCall(KSPReset(ksp)); +// } + +// PetscInt KspSolver::GetTotalNumMult() const +// { +// return solve; +// } + +// PetscInt KspSolver::GetNumIter() const +// { +// PetscInt num_it; +// PalacePetscCall(KSPGetIterationNumber(ksp, &num_it)); +// return num_it; +// } + +// PetscInt KspSolver::GetTotalNumIter() const +// { +// PetscInt num_it; +// PalacePetscCall(KSPGetTotalIterations(ksp, &num_it)); +// return num_it; +// } + +// MPI_Comm KspSolver::GetComm() const +// { +// return ksp ? PetscObjectComm(reinterpret_cast(ksp)) : MPI_COMM_NULL; +// } + +// void KspSolver::SolveJacobi(const petsc::PetscParMatrix &A, const petsc::PetscParVector +// &b, +// petsc::PetscParVector &x, PetscInt sym, PetscReal tol, +// PetscInt max_it) +// { +// MPI_Comm comm; +// KSP ksp; +// PC pc; +// KSPConvergedReason reason; + +// comm = A.GetComm(); +// PalacePetscCall(KSPCreate(comm, &ksp)); +// PalacePetscCall(KSPSetOperators(ksp, A, A)); +// PalacePetscCall(KSPSetType(ksp, (sym == 1) ? KSPCG : KSPGMRES)); +// PalacePetscCall(KSPGetPC(ksp, &pc)); +// PalacePetscCall(PCSetType(pc, PCJACOBI)); +// PalacePetscCall(PCJacobiSetFixDiagonal(pc, PETSC_TRUE)); +// PalacePetscCall(KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, max_it)); +// // std::string opts = "-ksp_converged_reason -ksp_monitor"; +// // PetscOptionsInsertString(nullptr, opts.c_str()); +// // PalacePetscCall(KSPSetFromOptions(ksp)); +// x.SetZero(); +// PalacePetscCall(KSPSolve(ksp, b, x)); +// PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); +// MFEM_VERIFY(reason > 0, "PETSc KSP did not converge!"); +// PalacePetscCall(KSPDestroy(&ksp)); +// } + +// void KspSolver::SolveDirect(const petsc::PetscParMatrix &A, const petsc::PetscParVector +// &b, +// petsc::PetscParVector &x, PetscInt sym) +// { +// MPI_Comm comm; +// KSP ksp; +// PC pc; +// KSPConvergedReason reason; + +// comm = A.GetComm(); +// PalacePetscCall(KSPCreate(comm, &ksp)); +// PalacePetscCall(KSPSetOperators(ksp, A, A)); +// PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); +// PalacePetscCall(KSPGetPC(ksp, &pc)); +// #if defined(PETSC_HAVE_MUMPS) || defined(PETSC_HAVE_SUPERLU_DIST) +// PalacePetscCall(PCSetType(pc, (sym > 0) ? PCCHOLESKY : PCLU)); +// #if defined(PETSC_HAVE_MUMPS) +// PalacePetscCall(PCFactorSetMatSolverType(pc, MATSOLVERMUMPS)); +// #elif defined(PETSC_HAVE_SUPERLU_DIST) +// PalacePetscCall(PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU_DIST)); +// #endif +// #else +// // Use PETSc default serial direct solver. +// PalacePetscCall(PCSetType(pc, PCREDUNDANT)); +// PalacePetscCall(PCRedundantSetNumber(pc, Mpi::Size(comm))); +// { +// KSP ksp_in; +// PC pc_in; +// PalacePetscCall(PCRedundantGetKSP(pc, &ksp_in)); +// PalacePetscCall(KSPGetPC(ksp_in, &pc_in)); +// PalacePetscCall(PCSetType(pc_in, (sym > 0) ? PCCHOLESKY : PCLU)); +// } +// #endif +// x.SetZero(); +// PalacePetscCall(KSPSolve(ksp, b, x)); +// PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); +// MFEM_VERIFY(reason > 0, "PETSc KSP did not converge!"); +// PalacePetscCall(KSPDestroy(&ksp)); +// } } // namespace palace diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp index ef236afe7..321a080d1 100644 --- a/palace/linalg/ksp.hpp +++ b/palace/linalg/ksp.hpp @@ -4,143 +4,201 @@ #ifndef PALACE_LINALG_KSP_SOLVER_HPP #define PALACE_LINALG_KSP_SOLVER_HPP -#include -#include "linalg/petsc.hpp" +#include +#include +#include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { +class ComplexVector; class IoData; -class KspPreconditioner; -namespace petsc +class KspSolver : public mfem::Solver { +protected: + // The actual solver and preconditioner objects. + std::unique_ptr ksp_; + std::unique_ptr pc_; -class PetscParMatrix; -class PetscParVector; +private: + // Counters for number of calls to Mult method for linear solves, and cumulative number + // of iterations. + mutable int ksp_mult, ksp_mult_it; + +protected: + KspSolver() : ksp_(nullptr), pc_(nullptr), ksp_mult(0), ksp_mult_it(0) {} + +public: + KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); + KspSolver(std::unique_ptr &&ksp, + std::unique_ptr &&pc) + : ksp_(std::move(ksp)), pc_(std::move(pc)), ksp_mult(0), ksp_mult_it(0) + { + } -} // namespace petsc + int NumTotalMult() const { return ksp_mult; } + int NumTotalMultIter() const { return ksp_mult_it; } -// -// A wrapper of PETSc's KSP class for solving linear systems. -// -class KspSolver + void SetOperator(const Operator &op) override { SetOperator(op, op); } + void SetOperator(const Operator &op, const Operator &pc_op); + void SetOperator(const Operator &op, const std::vector> &pc_ops, + const std::vector> *pc_aux_ops = nullptr); + + void Mult(const Vector &x, Vector &y) const override; +}; + +class ComplexKspSolver : public KspSolver { public: - enum class Type + ComplexKspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); + + void Mult(const Vector &x, Vector &y) const override { - CG, - CGSYM, - FCG, - MINRES, - GMRES, - FGMRES, - BCGS, - BCGSL, - FBCGS, - QMRCGS, - TFQMR, - CHOLESKY, - LU - }; + MFEM_ABORT("Mult with a real-valued vector is not implemented for " + "ComplexKspSolver, use the complex-valued signature instead!"); + } + void Mult(const ComplexVector &x, ComplexVector &y) const; +}; -private: - // The actual PETSc object. - KSP ksp; +// XX TODO REMOVE - // Boolean to handle SetFromOptions calls. - mutable bool clcustom; +// class IoData; +// class KspPreconditioner; - // Control print level for debugging. - int print; +// namespace petsc +// { - // Print PETSc options database prior to solve. - bool print_opts; +// class PetscParMatrix; +// class PetscParVector; - // Check for final residual if not converged. Defaults to true. - bool check_final; +// } // namespace petsc - // Counter for number of calls to Mult method for a linear solve. - mutable PetscInt solve; +// // +// // A wrapper of PETSc's KSP class for solving linear systems. +// // +// class KspSolver +// { +// public: +// enum class Type +// { +// CG, +// CGSYM, +// FCG, +// MINRES, +// GMRES, +// FGMRES, +// BCGS, +// BCGSL, +// FBCGS, +// QMRCGS, +// TFQMR, +// CHOLESKY, +// LU +// }; - // Set up debugging output and configure the solver based on user specified parameters. - void Configure(const IoData &iodata); - void ConfigureVerbose(int print, const std::string &prefix); +// private: +// // The actual PETSc object. +// KSP ksp; - // Customize object with command line options set. - void Customize() const; +// // Boolean to handle SetFromOptions calls. +// mutable bool clcustom; -public: - // Calls PETSc's KSPCreate. - KspSolver(MPI_Comm comm, const IoData &iodata, const std::string &prefix = std::string()); - KspSolver(MPI_Comm comm, int print_lvl, const std::string &prefix = std::string()); +// // Control print level for debugging. +// int print; - // Calls PETSc's KSPDestroy. - ~KspSolver(); +// // Print PETSc options database prior to solve. +// bool print_opts; - // Sets the solver type. - void SetType(Type type, bool piped = false); +// // Check for final residual if not converged. Defaults to true. +// bool check_final; - // Set solver tolerance. - void SetTol(PetscReal tol); +// // Counter for number of calls to Mult method for a linear solve. +// mutable PetscInt solve; - // Set solver tolerance. - void SetAbsTol(PetscReal tol); +// // Set up debugging output and configure the solver based on user specified parameters. +// void Configure(const IoData &iodata); +// void ConfigureVerbose(int print, const std::string &prefix); - // Set maximum number of iterations. - void SetMaxIter(PetscInt maxits); +// // Customize object with command line options set. +// void Customize() const; - // Set options specific to GMRES and FGMRES solvers. - void SetGMRESOptions(PetscInt maxsize, bool mgs, bool cgs2); +// public: +// // Calls PETSc's KSPCreate. +// KspSolver(MPI_Comm comm, const IoData &iodata, const std::string &prefix = +// std::string()); KspSolver(MPI_Comm comm, int print_lvl, const std::string &prefix = +// std::string()); - // Sets the tab level for KSP output. - void SetTabLevel(PetscInt l); +// // Calls PETSc's KSPDestroy. +// ~KspSolver(); - // Set flag to print PETSc options database at start of solve. - void SetPrintOptions(bool opts) { print_opts = opts; } +// // Sets the solver type. +// void SetType(Type type, bool piped = false); - // Set flag to check final residual if unconverged. - void SetCheckFinal(bool check) { check_final = check; } +// // Set solver tolerance. +// void SetTol(PetscReal tol); - // Set an initial vector for the solution subspace. - void SetNonzeroInitialGuess(bool guess); +// // Set solver tolerance. +// void SetAbsTol(PetscReal tol); - // Sets the MVP and preconditioner matrix. - void SetOperator(const petsc::PetscParMatrix &A, bool copy_prefix = true); +// // Set maximum number of iterations. +// void SetMaxIter(PetscInt maxits); - // Configures a shell preconditioner based on the given preconditioner object. - void SetPreconditioner(const KspPreconditioner &op); +// // Set options specific to GMRES and FGMRES solvers. +// void SetGMRESOptions(PetscInt maxsize, bool mgs, bool cgs2); - // Application of the solver. - void Mult(const petsc::PetscParVector &b, petsc::PetscParVector &x) const; +// // Sets the tab level for KSP output. +// void SetTabLevel(PetscInt l); - // Call KSPReset, for example if the operator dimension has changed. - void Reset(); +// // Set flag to print PETSc options database at start of solve. +// void SetPrintOptions(bool opts) { print_opts = opts; } - // Get number of solver calls. - PetscInt GetTotalNumMult() const; +// // Set flag to check final residual if unconverged. +// void SetCheckFinal(bool check) { check_final = check; } - // Get number of solver iterations. - PetscInt GetNumIter() const; - PetscInt GetTotalNumIter() const; +// // Set an initial vector for the solution subspace. +// void SetNonzeroInitialGuess(bool guess); - // Get the associated MPI communicator. - MPI_Comm GetComm() const; +// // Sets the MVP and preconditioner matrix. +// void SetOperator(const petsc::PetscParMatrix &A, bool copy_prefix = true); - // Conversion function to PETSc's KSP type. - operator KSP() const { return ksp; } +// // Configures a shell preconditioner based on the given preconditioner object. +// void SetPreconditioner(const KspPreconditioner &op); - // Typecasting to PETSc object. - operator PetscObject() const { return reinterpret_cast(ksp); } +// // Application of the solver. +// void Mult(const petsc::PetscParVector &b, petsc::PetscParVector &x) const; - // Simple static linear solve methods. The sym variable defines the matrix type: 0 for - // general, 1 for SPD, 2 for symmetric indefinite (definitions from MUMPS). - static void SolveJacobi(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, - petsc::PetscParVector &x, PetscInt sym, double PetscReal = 1.0e-9, - PetscInt max_it = 5000); - static void SolveDirect(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, - petsc::PetscParVector &x, PetscInt sym); -}; +// // Call KSPReset, for example if the operator dimension has changed. +// void Reset(); + +// // Get number of solver calls. +// PetscInt GetTotalNumMult() const; + +// // Get number of solver iterations. +// PetscInt GetNumIter() const; +// PetscInt GetTotalNumIter() const; + +// // Get the associated MPI communicator. +// MPI_Comm GetComm() const; + +// // Conversion function to PETSc's KSP type. +// operator KSP() const { return ksp; } + +// // Typecasting to PETSc object. +// operator PetscObject() const { return reinterpret_cast(ksp); } + +// // Simple static linear solve methods. The sym variable defines the matrix type: 0 for +// // general, 1 for SPD, 2 for symmetric indefinite (definitions from MUMPS). +// static void SolveJacobi(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, +// petsc::PetscParVector &x, PetscInt sym, double PetscReal +// = 1.0e-9, PetscInt max_it = 5000); +// static void SolveDirect(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, +// petsc::PetscParVector &x, PetscInt sym); +// }; } // namespace palace diff --git a/palace/linalg/mumps.cpp b/palace/linalg/mumps.cpp index 548643c8c..479ed5755 100644 --- a/palace/linalg/mumps.cpp +++ b/palace/linalg/mumps.cpp @@ -43,6 +43,13 @@ MumpsSolver::MumpsSolver(MPI_Comm comm, mfem::MUMPSSolver::MatType sym, } } +void MumpsSolver::SetOperator(const Operator &op) +{ + auto *PtAP = const_cast(dynamic_cast(&op)); + MFEM_VERIFY(PtAP, "MumpsSolver requires a ParOperator operator!"); + mfem::MUMPSSolver::SetOperator(PtAP->ParallelAssemble()); +} + } // namespace palace #endif diff --git a/palace/linalg/mumps.hpp b/palace/linalg/mumps.hpp index fa10193f8..00469a400 100644 --- a/palace/linalg/mumps.hpp +++ b/palace/linalg/mumps.hpp @@ -8,6 +8,7 @@ #if defined(MFEM_USE_MUMPS) +#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -37,6 +38,8 @@ class MumpsSolver : public mfem::MUMPSSolver print) { } + + void SetOperator(const Operator &op) override; }; } // namespace palace diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index 6aa33ab1c..280af7af1 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -64,12 +64,12 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const test_dbc_tdof_list_ = b_test_dbc_tdof_list_; { - const int N = test_dbc_tdof_list_->Size(); - auto idx = test_dbc_tdof_list_->Read(); - auto B = b.ReadWrite(); if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) { + const int N = test_dbc_tdof_list_->Size(); + const auto *idx = test_dbc_tdof_list_->Read(); const auto *X = x.Read(); + auto *B = b.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -79,12 +79,7 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const } else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - B[id] = 0.0; - }); + b.SetSubVector(*test_dbc_tdof_list_, 0.0); } else { @@ -118,26 +113,13 @@ void ParOperator::AssembleDiagonal(Vector &diag) const if (test_dbc_tdof_list_) { - const int N = test_dbc_tdof_list_->Size(); - const auto *idx = test_dbc_tdof_list_->Read(); - auto *D = diag.ReadWrite(); if (diag_policy_ == DiagonalPolicy::DIAG_ONE) { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - D[id] = 1.0; - }); + diag.SetSubVector(*test_dbc_tdof_list_, 1.0); } else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO) { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - D[id] = 0.0; - }); + diag.SetSubVector(*test_dbc_tdof_list_, 0.0); } else { @@ -148,6 +130,11 @@ void ParOperator::AssembleDiagonal(Vector &diag) const mfem::HypreParMatrix &ParOperator::ParallelAssemble() { + + // XX TODO: For mfem::AssemblyLevel::PARTIAL, we cannot use CeedOperatorFullAssemble for + // a ND space with p > 1. We should throw an error here that the user needs to + // use AssemblyLevel::LEGACY in this case. + if (!RAP_) { auto *bfA = dynamic_cast(A_.get()); @@ -233,9 +220,15 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble() else { // Rectangular elimination sets all eliminated rows/columns to zero. - mfem::HypreParMatrix *RAPe = RAP_->EliminateCols(*trial_dbc_tdof_list_); - RAP_->EliminateRows(*test_dbc_tdof_list_); - delete RAPe; + if (test_dbc_tdof_list_) + { + RAP_->EliminateRows(*test_dbc_tdof_list_); + } + if (trial_dbc_tdof_list_) + { + mfem::HypreParMatrix *RAPe = RAP_->EliminateCols(*trial_dbc_tdof_list_); + delete RAPe; + } } } } @@ -271,12 +264,12 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const { test_fespace_.GetRestrictionMatrix()->Mult(ly_, ty_); } - const int N = test_dbc_tdof_list_->Size(); - auto idx = test_dbc_tdof_list_->Read(); - auto TY = ty_.ReadWrite(); if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) { + const int N = test_dbc_tdof_list_->Size(); + const auto *idx = test_dbc_tdof_list_->Read(); const auto *X = x.Read(); + auto *TY = ty_.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -286,12 +279,7 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const } else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TY[id] = 0.0; - }); + ty_.SetSubVector(*test_dbc_tdof_list_, 0.0); } else { @@ -341,12 +329,12 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c if (trial_dbc_tdof_list_) { trial_fespace_.GetProlongationMatrix()->MultTranspose(lx_, tx_); - const int N = trial_dbc_tdof_list_->Size(); - auto idx = trial_dbc_tdof_list_->Read(); - auto TX = tx_.ReadWrite(); if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) { + const int N = trial_dbc_tdof_list_->Size(); + const auto *idx = trial_dbc_tdof_list_->Read(); const auto *X = x.Read(); + auto *TX = tx_.ReadWrite(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { @@ -356,12 +344,7 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c } else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TX[id] = 0.0; - }); + tx_.SetSubVector(*test_dbc_tdof_list_, 0.0); } else { diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index 202160945..c6f35ece6 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -49,9 +49,9 @@ class ParOperator : public Operator const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict = false); // Get access to the underlying local (L-vector) operator. - const Operator &GetOperator() const + const Operator &LocalOperator() const { - MFEM_VERIFY(A_, "No local matrix available for ParOperator::GetOperator!"); + MFEM_VERIFY(A_, "No local matrix available for ParOperator::LocalOperator!"); return *A_; } @@ -67,25 +67,25 @@ class ParOperator : public Operator } // Set essential boundary condition true dofs for rectangular operators. - void SetEssentialTrueDofs(const mfem::Array &trial_dbc_tdof_list, - const mfem::Array &test_dbc_tdof_list, + void SetEssentialTrueDofs(const mfem::Array *trial_dbc_tdof_list, + const mfem::Array *test_dbc_tdof_list, DiagonalPolicy diag_policy) { MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ZERO, "Essential boundary condition true dof elimination for rectangular " "ParOperator only supports DiagonalPolicy::DIAG_ZERO!"); - trial_dbc_tdof_list_ = &trial_dbc_tdof_list; - test_dbc_tdof_list_ = &test_dbc_tdof_list; + trial_dbc_tdof_list_ = trial_dbc_tdof_list; + test_dbc_tdof_list_ = test_dbc_tdof_list; diag_policy_ = diag_policy; } // Get the essential boundary condition true dofs associated with the operator. May be // nullptr. - void GetEssentialTrueDofs(const mfem::Array *&trial_dbc_tdof_list, - const mfem::Array *&test_dbc_tdof_list) + const mfem::Array *GetEssentialTrueDofs() const { - trial_dbc_tdof_list = trial_dbc_tdof_list_; - test_dbc_tdof_list = test_dbc_tdof_list_; + MFEM_VERIFY(trial_dbc_tdof_list_ == test_dbc_tdof_list_ && height == width, + "GetEssentialTrueDofs should only be used for square ParOperator!"); + return trial_dbc_tdof_list_; } // Eliminate essential true dofs from the RHS vector b, using the essential boundary @@ -95,7 +95,8 @@ class ParOperator : public Operator // Assemble the diagonal for the parallel operator. void AssembleDiagonal(Vector &diag) const override; - // Assemble the operator as a parallel sparse matrix. + // Assemble the operator as a parallel sparse matrix. This frees the memory associated + // with the local operator. mfem::HypreParMatrix &ParallelAssemble(); // Get the associated MPI communicator. @@ -232,4 +233,22 @@ double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm = false, } // namespace palace +namespace mfem +{ + +// A symmetric bilinear form operator which replaces *MultTranspose with *Mult. +class SymmetricBilinearForm : public BilinearForm +{ +public: + using BilinearForm::BilinearForm; + + void MultTranspose(const Vector &x, Vector &y) const override { Mult(x, y); } + void AddMultTranspose(const Vector &x, Vector &y, double c = 1.0) const override + { + AddMult(x, y, c); + } +}; + +} // namespace mfem + #endif // PALACE_LINALG_OPERATOR_HPP diff --git a/palace/linalg/pc.hpp b/palace/linalg/pc.hpp index dbb49a388..252d3d15f 100644 --- a/palace/linalg/pc.hpp +++ b/palace/linalg/pc.hpp @@ -14,6 +14,9 @@ namespace palace class IoData; +// XX TODO REFACTOR INTO KSP.HPP/CPP FOR REAL-VALUED LINEAR SOLVER! KspSolver, +// ComplexKspSolver... + // Global method for preconditioner configuration and construction. std::unique_ptr ConfigurePreconditioner(const IoData &iodata, const mfem::Array &dbc_marker, @@ -49,12 +52,10 @@ class KspPreconditioner } } - // Sets the matrix from which to contruct a preconditioner. void SetOperator(const mfem::Operator &op); void SetOperator(const std::vector> &ops, const std::vector> *aux_ops = nullptr); - // Application of the preconditioner. void Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const; // Wrapper functions for PETSc PCSHELL. diff --git a/palace/linalg/petsc.cpp b/palace/linalg/petsc.cpp index 6b4271b4b..d4596a9fb 100644 --- a/palace/linalg/petsc.cpp +++ b/palace/linalg/petsc.cpp @@ -695,9 +695,9 @@ PetscReal PetscParMatrix::Norm2(PetscReal tol, PetscInt maxits) const { maxits = 100; } -#if defined(PALACE_WITH_SLEPC) - return slepc::GetMaxSingularValue(*this, tol, maxits); -#else + // #if defined(PALACE_WITH_SLEPC) + // return slepc::GetMaxSingularValue(*this, tol, maxits); + // #else // Power iteration loop: ||A||₂² = λₙ(Aᴴ A) . PetscInt it = 0; PetscReal res = 0.0; @@ -736,7 +736,7 @@ PetscReal PetscParMatrix::Norm2(PetscReal tol, PetscInt maxits) const it, res, l); } return GetHermitian() ? l : PetscSqrtReal(l); -#endif + // #endif } void PetscParMatrix::Scale(PetscScalar s) diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index 8b21a9bbe..7dbd8d777 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -3,6 +3,8 @@ #include "slepc.hpp" +#if 0 // XX TODO DISABLE SLEPC FOR NOW + #if defined(PALACE_WITH_SLEPC) #include @@ -1524,3 +1526,5 @@ PetscErrorCode __pc_apply_PEP(PC pc, Vec x, Vec y) } #endif + +#endif diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp index ba266662b..1c9ff5b79 100644 --- a/palace/linalg/slepc.hpp +++ b/palace/linalg/slepc.hpp @@ -4,6 +4,8 @@ #ifndef PALACE_LINALG_SLEPC_HPP #define PALACE_LINALG_SLEPC_HPP +#if 0 // XX TODO DISABLE FEAST FOR NOW + #if defined(PALACE_WITH_SLEPC) #include "linalg/petsc.hpp" @@ -26,7 +28,7 @@ namespace palace { class DivFreeSolver; -class KspSolver; +class KspSolver; // XX TODO WORKING namespace petsc { @@ -502,4 +504,6 @@ class SlepcPEPSolver : public SlepcPEPSolverBase #endif +#endif + #endif // PALACE_LINALG_SLEPC_HPP diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index d74a18bb7..5689ebf9a 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -104,12 +104,13 @@ StrumpackSolverBase::StrumpackSolverBase( } template -void StrumpackSolverBase::SetOperator(const mfem::Operator &op) +void StrumpackSolverBase::SetOperator(const Operator &op) { - // Convert the input operator to a distributed STRUMPACK matrix (always use - // symmetric sparsity pattern). Safe to delete the matrix since STRUMPACK - // copies it on input. - mfem::STRUMPACKRowLocMatrix A(op, true); + // Convert the input operator to a distributed STRUMPACK matrix (always assume a symmetric + // sparsity pattern). Safe to delete the matrix since STRUMPACK copies it on input. + auto *PtAP = const_cast(dynamic_cast(&op)); + MFEM_VERIFY(PtAP, "StrumpackSolver requires a ParOperator operator!"); + mfem::STRUMPACKRowLocMatrix A(PtAP->ParallelAssemble(), true); // Set up base class. StrumpackSolverType::SetOperator(A); diff --git a/palace/linalg/strumpack.hpp b/palace/linalg/strumpack.hpp index 3ffe46e1a..081fa794e 100644 --- a/palace/linalg/strumpack.hpp +++ b/palace/linalg/strumpack.hpp @@ -8,6 +8,7 @@ #if defined(MFEM_USE_STRUMPACK) +#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -36,8 +37,7 @@ class StrumpackSolverBase : public StrumpackSolverType { } - // Sets matrix associated with the STRUMPACK solver. - void SetOperator(const mfem::Operator &op) override; + void SetOperator(const Operator &op) override; }; using StrumpackSolver = StrumpackSolverBase; diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index ba601c0bc..f4201e34e 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -5,7 +5,6 @@ #if defined(MFEM_USE_SUPERLU) -#include "linalg/petsc.hpp" #include "utils/communication.hpp" namespace palace @@ -81,7 +80,9 @@ void SuperLUSolver::SetOperator(const mfem::Operator &op) { solver.SetFact(mfem::superlu::SamePattern_SameRowPerm); } - A = std::make_unique(op); + auto *PtAP = const_cast(dynamic_cast(&op)); + MFEM_VERIFY(PtAP, "SuperLUSolver requires a ParOperator operator!"); + A = std::make_unique(PtAP->ParallelAssemble()); // Set up base class. solver.SetOperator(*A); diff --git a/palace/linalg/superlu.hpp b/palace/linalg/superlu.hpp index 1daf86631..74e857423 100644 --- a/palace/linalg/superlu.hpp +++ b/palace/linalg/superlu.hpp @@ -9,6 +9,8 @@ #if defined(MFEM_USE_SUPERLU) #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "utils/iodata.hpp" namespace palace @@ -33,22 +35,20 @@ class SuperLUSolver : public mfem::Solver { } - // Sets matrix associated with the SuperLU solver. - void SetOperator(const mfem::Operator &op) override; + void SetOperator(const Operator &op) override; - // Application of the solver. - void Mult(const mfem::Vector &x, mfem::Vector &y) const override { solver.Mult(x, y); } - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override + void Mult(const Vector &x, Vector &y) const override { solver.Mult(x, y); } + void ArrayMult(const mfem::Array &X, + mfem::Array &Y) const override { solver.ArrayMult(X, Y); } - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override + void MultTranspose(const Vector &x, Vector &y) const override { solver.MultTranspose(x, y); } - void ArrayMultTranspose(const mfem::Array &X, - mfem::Array &Y) const override + void ArrayMultTranspose(const mfem::Array &X, + mfem::Array &Y) const override { solver.ArrayMultTranspose(X, Y); } diff --git a/palace/main.cpp b/palace/main.cpp index d6344dbda..76e280025 100644 --- a/palace/main.cpp +++ b/palace/main.cpp @@ -133,9 +133,9 @@ int main(int argc, char *argv[]) // Initialize Hypre and PETSc, and optionally SLEPc. mfem::Hypre::Init(); petsc::Initialize(argc, argv, nullptr, nullptr); -#if defined(PALACE_WITH_SLEPC) - slepc::Initialize(); -#endif + // #if defined(PALACE_WITH_SLEPC) //XX TODO WORKING... + // slepc::Initialize(); + // #endif if (PETSC_COMM_WORLD != world_comm) { Mpi::Print(world_comm, "Error: Problem during MPI initialization!\n\n"); @@ -187,9 +187,9 @@ int main(int argc, char *argv[]) Mpi::Print(world_comm, "\n"); // Finalize PETSc. -#if defined(PALACE_WITH_SLEPC) - slepc::Finalize(); -#endif + // #if defined(PALACE_WITH_SLEPC) //XX TODO WORKING... + // slepc::Finalize(); + // #endif petsc::Finalize(); return 0; diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index f5e085c3a..04d1f4ae8 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -75,15 +75,20 @@ CurlCurlOperator::CurlCurlOperator(const IoData &iodata, dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), nd_fecs(utils::ConstructFECollections( pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), - h1_fec(iodata.solver.order, mesh.back()->Dimension()), + h1_fecs(utils::ConstructFECollections( + pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), rt_fec(iodata.solver.order - 1, mesh.back()->Dimension()), nd_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( mesh, nd_fecs, &dbc_marker, &dbc_tdof_lists) : utils::ConstructFiniteElementSpaceHierarchy( *mesh.back(), *nd_fecs.back(), &dbc_marker, &dbc_tdof_lists.emplace_back())), - h1_fespace(mesh.back().get(), &h1_fec), rt_fespace(mesh.back().get(), &rt_fec), - mat_op(iodata, *mesh.back()), surf_j_op(iodata, GetH1Space()) + h1_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, h1_fecs) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *h1_fecs.back())), + rt_fespace(mesh.back().get(), &rt_fec), mat_op(iodata, *mesh.back()), + surf_j_op(iodata, GetH1Space()) { // Finalize setup. CheckBoundaryProperties(); @@ -123,7 +128,7 @@ void CurlCurlOperator::GetStiffnessMatrix(std::vector muinv_func(mat_op); - auto k = std::make_unique(&nd_fespace_l); + auto k = std::make_unique(&nd_fespace_l); k->AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); k->SetAssemblyLevel(assembly_level); k->Assemble(skip_zeros); diff --git a/palace/models/curlcurloperator.hpp b/palace/models/curlcurloperator.hpp index aab302661..f51400e7f 100644 --- a/palace/models/curlcurloperator.hpp +++ b/palace/models/curlcurloperator.hpp @@ -39,10 +39,10 @@ class CurlCurlOperator // (Nedelec) and magnetic flux density (Raviart-Thomas) on the given mesh. The H1 spaces // are used for various purposes throughout the code including postprocessing. std::vector> nd_fecs; - mfem::H1_FECollection h1_fec; + std::vector> h1_fecs; mfem::RT_FECollection rt_fec; - mfem::ParFiniteElementSpaceHierarchy nd_fespaces; - mfem::ParFiniteElementSpace h1_fespace, rt_fespace; + mfem::ParFiniteElementSpaceHierarchy nd_fespaces, h1_fespaces; + mfem::ParFiniteElementSpace rt_fespace; // Operator for domain material properties. MaterialOperator mat_op; @@ -63,7 +63,8 @@ class CurlCurlOperator // Return the parallel finite element space objects. auto &GetNDSpaces() { return nd_fespaces; } auto &GetNDSpace() { return nd_fespaces.GetFinestFESpace(); } - auto &GetH1Space() { return h1_fespace; } + auto &GetH1Spaces() { return h1_fespaces; } + auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); } auto &GetRTSpace() { return rt_fespace; } // Construct and return system matrix representing discretized curl-curl operator for diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index 054d25cc9..9f409ff54 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -151,8 +151,8 @@ void LaplaceOperator::GetStiffnessMatrix(std::vector epsilon_func(mat_op); - auto k = std::make_unique(&h1_fespace_l); - k->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); + auto k = std::make_unique(&h1_fespace_l); + k->AddDomainIntegrator(new mfem::DiffusionIntegrator(epsilon_func)); k->SetAssemblyLevel(assembly_level); k->Assemble(skip_zeros); k->Finalize(skip_zeros); diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp index 3b635fba2..d886c8ed5 100644 --- a/palace/models/postoperator.cpp +++ b/palace/models/postoperator.cpp @@ -278,36 +278,6 @@ void PostOperator::InitializeDataCollection(const IoData &iodata) } } -// //XX TODO REMOVE THESE -// void PostOperator::GetBField(std::complex omega, -// const petsc::PetscParMatrix &NegCurl, -// const petsc::PetscParVector &e, petsc::PetscParVector &b) -// { -// // Compute B = -1/(iω) ∇ x E on the true dofs. -// MFEM_VERIFY(e.GetSize() == NegCurl.Width() && b.GetSize() == NegCurl.Height(), -// "Size mismatch error computing B-field in PostOperator!"); -// NegCurl.Mult(e, b); -// b.Scale(1.0 / (1i * omega)); -// } - -// void PostOperator::GetBField(const mfem::Operator &Curl, const mfem::Vector &a, -// mfem::Vector &b) -// { -// // Compute B = ∇ x A on the true dofs. -// MFEM_VERIFY(a.Size() == Curl.Width() && b.Size() == Curl.Height(), -// "Size mismatch error computing B-field in PostOperator!"); -// Curl.Mult(a, b); -// } - -// void PostOperator::GetEField(const mfem::Operator &NegGrad, const mfem::Vector &v, -// mfem::Vector &e) -// { -// // Compute E = -∇V on the true dofs. -// MFEM_VERIFY(v.Size() == NegGrad.Width() && e.Size() == NegGrad.Height(), -// "Size mismatch error computing E-field in PostOperator!"); -// NegGrad.Mult(v, e); -// } - void PostOperator::SetEGridFunction(const petsc::PetscParVector &e) { MFEM_VERIFY( diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp index 9d8fa341e..d71ff7c2f 100644 --- a/palace/models/postoperator.hpp +++ b/palace/models/postoperator.hpp @@ -86,24 +86,6 @@ class PostOperator bool HasE() const { return E.has_value(); } bool HasB() const { return B.has_value(); } - // XX TODO REMOVE THESE - // // Compute the magnetic flux density B in RT space from electric field solution E - // solution - // // in ND space for the time-harmonic case: B = -1/(iω) ∇ x E. - // static void GetBField(std::complex omega, const petsc::PetscParMatrix - // &NegCurl, - // const petsc::PetscParVector &e, petsc::PetscParVector &b); - - // // Compute the magnetic flux density B in RT space from the magnetic vector potential - // // solution A in ND space: B = ∇ x A. - // static void GetBField(const mfem::Operator &Curl, const mfem::Vector &a, mfem::Vector - // &b); - - // // Compute the electric field E in ND space from the scalar potential solution V in H1 - // // space: E = -∇V. - // static void GetEField(const mfem::Operator &NegGrad, const mfem::Vector &v, - // mfem::Vector &e); - // Populate the grid function solutions for the E- and B-field using the solution vectors // on the true dofs. For the real-valued overload, the electric scalar potential can be // specified too for electrostatic simulations. The output mesh and fields are diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 95e35573f..156528cb1 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -237,7 +237,7 @@ SpaceOperator::GetSystemMatrix(SpaceOperator::OperatorType type, { return {}; } - auto a = std::make_unique(&GetNDSpace()); + auto a = std::make_unique(&GetNDSpace()); AddIntegrators(*a, df, f, dfb, fb); a->SetAssemblyLevel(assembly_level); a->Assemble(skip_zeros); @@ -288,11 +288,11 @@ SpaceOperator::GetComplexSystemMatrix(SpaceOperator::OperatorType type, double o break; } bool has_real = false, has_imag = false; - std::unique_ptr ar, ai; + std::unique_ptr ar, ai; if (!dfr.empty() || !fr.empty() || !dfbr.empty() || !fbr.empty()) { has_real = true; - ar = std::make_unique(&GetNDSpace()); + ar = std::make_unique(&GetNDSpace()); AddIntegrators(*ar, dfr, fr, dfbr, fbr); ar->SetAssemblyLevel(assembly_level); ar->Assemble(skip_zeros); @@ -301,7 +301,7 @@ SpaceOperator::GetComplexSystemMatrix(SpaceOperator::OperatorType type, double o if (!dfi.empty() || !fi.empty() || !dfbi.empty() || !fbi.empty()) { has_imag = true; - ai = std::make_unique(&GetNDSpace()); + ai = std::make_unique(&GetNDSpace()); AddIntegrators(*ai, dfi, fi, dfbi, fbi); ai->SetAssemblyLevel(assembly_level); ai->Assemble(skip_zeros); @@ -344,15 +344,15 @@ std::unique_ptr SpaceOperator::GetSystemMatrix(double a0, double a1 auto sum = std::make_unique(height, width); if (K && a0 != 0.0) { - sum->AddOperator(K->GetOperator(), a0); + sum->AddOperator(K->LocalOperator(), a0); } if (C && a1 != 0.0) { - sum->AddOperator(C->GetOperator(), a1); + sum->AddOperator(C->LocalOperator(), a1); } if (M && a2 != 0.0) { - sum->AddOperator(M->GetOperator(), a2); + sum->AddOperator(M->LocalOperator(), a2); } auto A = std::make_unique(std::move(sum), GetNDSpace(), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); @@ -390,19 +390,19 @@ std::unique_ptr SpaceOperator::GetComplexSystemMatrix( auto sum = std::make_unique(height, width); if (K && a0 != 0.0) { - sum->AddOperator(K->GetOperator(), a0); + sum->AddOperator(K->LocalOperator(), a0); } if (C && a1 != 0.0) { - sum->AddOperator(C->GetOperator(), a1); + sum->AddOperator(C->LocalOperator(), a1); } if (M && a2 != 0.0) { - sum->AddOperator(M->GetOperator(), a2); + sum->AddOperator(M->LocalOperator(), a2); } if (A2) { - sum->AddOperator(A2->GetOperator(), 1.0); + sum->AddOperator(A2->LocalOperator(), 1.0); } auto A = std::make_unique(std::move(sum), GetNDSpace(), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); @@ -437,7 +437,7 @@ void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, dou AddRealMassCoefficients( pc_shifted ? std::abs(a2) : a2, f, fb); AddExtraSystemBdrCoefficients(a3, dfb, dfb, fb, fb); - auto b = std::make_unique(&fespace_l); + auto b = std::make_unique(&fespace_l); if (s == 0) { AddIntegrators(*b, df, f, dfb, fb); diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index 841e3a3ff..7814092cf 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -20,9 +20,6 @@ namespace class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOperator { private: - // MPI communicator for the parallel operators. - MPI_Comm comm; - // System matrices and excitation RHS. std::unique_ptr K, M, C; Vector NegJ; @@ -71,7 +68,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera std::function &djcoef, double t0, mfem::TimeDependentOperator::Type type) : mfem::SecondOrderTimeDependentOperator(spaceop.GetNDSpace().GetTrueVSize(), t0, type), - comm(spaceop.GetNDSpace().GetComm()), dJcoef(djcoef) + dJcoef(djcoef) { // Construct the system matrices defining the linear operator. PEC boundaries are // handled simply by setting diagonal entries of the mass matrix for the corresponding @@ -90,17 +87,11 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Set up linear solvers. { // PCG with a simple Jacobi preconditioner for mass matrix systems. - Vector diag(M->Height()); - M->AssembleDiagonal(diag); - - // XX TODO: Should not need DBC TDOF LIST as the diagonal is already 1 upon - // assembly... (see ParOperator) - // Maybe avoid MFEM's JAcobi smoother and write our own like in Chebyshev?? - // pcM = std::make_unique(diag, - // spaceop.GetDbcTDofList()); - pcM = std::make_unique(diag); + auto jac = std::make_unique(); + jac->SetOperator(*M); + pcM = std::move(jac); - auto pcg = std::make_unique(comm); + auto pcg = std::make_unique(M->GetComm()); pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; pcg->SetRelTol(iodata.solver.linear.tol); pcg->SetMaxIter(iodata.solver.linear.max_it); @@ -202,7 +193,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // } // Construct and return the linear solver. - auto pcg = std::make_unique(this->comm); + auto pcg = std::make_unique(this->M->GetComm()); pcg->iterative_mode = iterative_mode; pcg->SetRelTol(tol); pcg->SetMaxIter(max_it); @@ -215,12 +206,11 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera kspM_mult = kspA_mult = kspM_it = kspA_it = 0; } - MPI_Comm GetComm() const { return comm; } const ParOperator &GetK() const { return *K; } const ParOperator &GetM() const { return *M; } const ParOperator &GetC() const { return *C; } - int GetNumMult() const { return kspM_mult; } + int GetNumMult() const { return kspM_mult; } // XX TODO REVISIT WITH KspSolver int GetNumMultIter() const { return kspM_it; } int GetNumImplicitSolve() const { return kspA_mult; } int GetNumImplicitSolveIter() const { return kspA_it; } @@ -342,39 +332,38 @@ TimeOperator::TimeOperator(const IoData &iodata, SpaceOperator &spaceop, int TimeOperator::GetTotalKspMult() const { - const auto &curlcurl = dynamic_cast(*op); + const auto &curlcurl = dynamic_cast(*op); return curlcurl.GetNumMult() + curlcurl.GetNumImplicitSolve(); } int TimeOperator::GetTotalKspIter() const { - const auto &curlcurl = dynamic_cast(*op); + const auto &curlcurl = dynamic_cast(*op); return curlcurl.GetNumMultIter() + curlcurl.GetNumImplicitSolveIter(); } double TimeOperator::GetMaxTimeStep() const { - const auto &curlcurl = dynamic_cast(*op); + const auto &curlcurl = dynamic_cast(*op); const ParOperator &M = curlcurl.GetM(); const ParOperator &K = curlcurl.GetK(); // Solver for M⁻¹. constexpr double lin_tol = 1.0e-9; constexpr int max_lin_it = 500; - mfem::CGSolver pcg(curlcurl.GetComm()); + mfem::CGSolver pcg(M.GetComm()); pcg.SetRelTol(lin_tol); pcg.SetMaxIter(max_lin_it); pcg.SetPrintLevel(0); pcg.SetOperator(M); - Vector diag(M.Height()); - M.AssembleDiagonal(diag); - JacobiSmoother prec(diag); - pcg.SetPreconditioner(prec); + JacobiSmoother jac; + jac.SetOperator(M); + pcg.SetPreconditioner(jac); // Power iteration to estimate largest eigenvalue of undamped system matrix M⁻¹ K. SymmetricProductOperator op(pcg, K); - double lam = linalg::SpectralNorm(curlcurl.GetComm(), op, false); + double lam = linalg::SpectralNorm(M.GetComm(), op, false); MFEM_VERIFY(lam > 0.0, "Error during power iteration, λ = " << lam << "!"); return 2.0 / std::sqrt(lam); } diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index 59ccd433e..e90cd33bb 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -431,53 +431,57 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera // Define the linear solver to be used for solving systems associated with the // generalized eigenvalue problem. We use PETSc's sequential sparse solvers. int print = 0; - ksp = std::make_unique(A->GetComm(), print, "port_"); - ksp->SetType(KspSolver::Type::CHOLESKY); // Symmetric indefinite factorization - ksp->SetOperator(*B); - - // Define the eigenvalue solver. - config::EigenSolverData::Type type = config::EigenSolverData::Type::DEFAULT; -#if defined(PALACE_WITH_ARPACK) && defined(PALACE_WITH_SLEPC) - if (type == config::EigenSolverData::Type::DEFAULT) - { - type = config::EigenSolverData::Type::SLEPC; - } -#elif defined(PALACE_WITH_ARPACK) - if (type == config::EigenSolverData::Type::SLEPC) - { - Mpi::Warning("SLEPc eigensolver not available, using ARPACK!\n"); - } - type = config::EigenSolverData::Type::ARPACK; -#elif defined(PALACE_WITH_SLEPC) - if (type == config::EigenSolverData::Type::ARPACK) - { - Mpi::Warning("ARPACK eigensolver not available, using SLEPc!\n"); - } - type = config::EigenSolverData::Type::SLEPC; -#else -#error "Wave port solver requires building with ARPACK or SLEPc!" -#endif - if (type == config::EigenSolverData::Type::ARPACK) - { -#if defined(PALACE_WITH_ARPACK) - eigen = std::unique_ptr(new arpack::ArpackEPSSolver(print)); -#endif - } - else // config::EigenSolverData::Type::SLEPC - { -#if defined(PALACE_WITH_SLEPC) - eigen = - std::unique_ptr(new slepc::SlepcEPSSolver(A->GetComm(), print)); - auto *slepc = dynamic_cast(eigen.get()); - slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); - slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); -#endif - } - constexpr double tol = 1.0e-6; - eigen->SetLinearSolver(*ksp); - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_MAGNITUDE); - eigen->SetNumModes(mode_idx, std::max(2 * mode_idx + 1, 5)); - eigen->SetTol(tol); + + // XX TODO REVISIT + + // ksp = std::make_unique(A->GetComm(), print, "port_"); + // ksp->SetType(KspSolver::Type::CHOLESKY); // Symmetric indefinite factorization + // ksp->SetOperator(*B); + + // // Define the eigenvalue solver. + // config::EigenSolverData::Type type = config::EigenSolverData::Type::DEFAULT; + // #if defined(PALACE_WITH_ARPACK) && defined(PALACE_WITH_SLEPC) + // if (type == config::EigenSolverData::Type::DEFAULT) + // { + // type = config::EigenSolverData::Type::SLEPC; + // } + // #elif defined(PALACE_WITH_ARPACK) + // if (type == config::EigenSolverData::Type::SLEPC) + // { + // Mpi::Warning("SLEPc eigensolver not available, using ARPACK!\n"); + // } + // type = config::EigenSolverData::Type::ARPACK; + // #elif defined(PALACE_WITH_SLEPC) + // if (type == config::EigenSolverData::Type::ARPACK) + // { + // Mpi::Warning("ARPACK eigensolver not available, using SLEPc!\n"); + // } + // type = config::EigenSolverData::Type::SLEPC; + // #else + // #error "Wave port solver requires building with ARPACK or SLEPc!" + // #endif + // if (type == config::EigenSolverData::Type::ARPACK) + // { + // #if defined(PALACE_WITH_ARPACK) + // eigen = std::unique_ptr(new arpack::ArpackEPSSolver(print)); + // #endif + // } + // else // config::EigenSolverData::Type::SLEPC + // { + // #if defined(PALACE_WITH_SLEPC) + // eigen = + // std::unique_ptr(new slepc::SlepcEPSSolver(A->GetComm(), + // print)); + // auto *slepc = dynamic_cast(eigen.get()); + // slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); + // slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + // #endif + // } + // constexpr double tol = 1.0e-6; + // eigen->SetLinearSolver(*ksp); + // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_MAGNITUDE); + // eigen->SetNumModes(mode_idx, std::max(2 * mode_idx + 1, 5)); + // eigen->SetTol(tol); } } @@ -564,43 +568,47 @@ std::complex WavePortData::Solve(petsc::PetscParVector &y0, petsc::PetscScatter &scatter) { double eig[2]; - if (A) // Only on root - { - // The y0 and e0 vectors are still parallel vectors, but with all data on root. We want - // true sequential vectors. - PetscScalar *pe0 = e0.GetArray(); - petsc::PetscParVector e0s(e0.GetSize(), pe0); - - // Set starting vector. - { - PetscScalar *py0 = y0.GetArray(); - petsc::PetscParVector y0s(y0.GetSize(), py0); - eigen->SetInitialSpace(y0s); - y0.RestoreArray(py0); - } -#if 0 - // Alternatively, use B-orthogonal initial space. Probably want to call SetBMat for - // the eigensolver in this case. - { - PetscScalar *py0 = y0.GetArray(); - petsc::PetscParVector y0s(y0.GetSize(), py0); - petsc::PetscParVector v0s(y0s); - ksp->Mult(y0s, v0s); - eigen->SetInitialSpace(v0s); - y0.RestoreArray(py0); - } -#endif - - // Solve (operators have been set in constructor). - int num_conv = 0; - eigen->SetOperators(*A, *B, EigenSolverBase::ScaleType::NONE); - num_conv = eigen->Solve(); - MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); - eigen->GetEigenvalue(mode_idx - 1, eig[0], eig[1]); - eigen->GetEigenvector(mode_idx - 1, e0s); - e0.RestoreArray(pe0); - } + // XX TODO REVISIT... + + // if (A) // Only on root + // { + // // The y0 and e0 vectors are still parallel vectors, but with all data on root. We + // want + // // true sequential vectors. + // PetscScalar *pe0 = e0.GetArray(); + // petsc::PetscParVector e0s(e0.GetSize(), pe0); + + // // Set starting vector. + // { + // PetscScalar *py0 = y0.GetArray(); + // petsc::PetscParVector y0s(y0.GetSize(), py0); + // eigen->SetInitialSpace(y0s); + // y0.RestoreArray(py0); + // } + + // #if 0 + // // Alternatively, use B-orthogonal initial space. Probably want to call SetBMat for + // // the eigensolver in this case. + // { + // PetscScalar *py0 = y0.GetArray(); + // petsc::PetscParVector y0s(y0.GetSize(), py0); + // petsc::PetscParVector v0s(y0s); + // ksp->Mult(y0s, v0s); + // eigen->SetInitialSpace(v0s); + // y0.RestoreArray(py0); + // } + // #endif + + // // Solve (operators have been set in constructor). + // int num_conv = 0; + // eigen->SetOperators(*A, *B, EigenSolverBase::ScaleType::NONE); + // num_conv = eigen->Solve(); + // MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); + // eigen->GetEigenvalue(mode_idx - 1, eig[0], eig[1]); + // eigen->GetEigenvector(mode_idx - 1, e0s); + // e0.RestoreArray(pe0); + // } // Scatter the result to all processors. scatter.Reverse(e0, e); diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp index 9004c723a..ac6e46d9f 100644 --- a/palace/models/waveportoperator.hpp +++ b/palace/models/waveportoperator.hpp @@ -8,8 +8,8 @@ #include #include #include -#include "linalg/eigen.hpp" -#include "linalg/ksp.hpp" +// #include "linalg/eigen.hpp" +// #include "linalg/ksp.hpp" #include "linalg/petsc.hpp" namespace palace @@ -61,9 +61,9 @@ class WavePortData std::unique_ptr nxH0r_func, nxH0i_func; std::unique_ptr sr, si; - // Eigenvalue solver for boundary modes. - std::unique_ptr eigen; - std::unique_ptr ksp; + // // Eigenvalue solver for boundary modes. + // std::unique_ptr eigen; //XX TODO + // std::unique_ptr ksp; // Helper function to get true degrees of freedom on the port. void GetTrueDofs(const mfem::Array &dbc_marker, diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp index 55d939738..2c52a41be 100644 --- a/palace/utils/configfile.cpp +++ b/palace/utils/configfile.cpp @@ -1505,16 +1505,10 @@ NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::Type, NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::KspType, {{LinearSolverData::KspType::INVALID, nullptr}, {LinearSolverData::KspType::CG, "CG"}, - {LinearSolverData::KspType::CGSYM, "CGSYM"}, - {LinearSolverData::KspType::FCG, "FCG"}, {LinearSolverData::KspType::MINRES, "MINRES"}, {LinearSolverData::KspType::GMRES, "GMRES"}, {LinearSolverData::KspType::FGMRES, "FGMRES"}, - {LinearSolverData::KspType::BCGS, "BCGS"}, - {LinearSolverData::KspType::BCGSL, "BCGSL"}, - {LinearSolverData::KspType::FBCGS, "FBCGS"}, - {LinearSolverData::KspType::QMRCGS, "QMRCGS"}, - {LinearSolverData::KspType::TFQMR, "TFQMR"}, + {LinearSolverData::KspType::BICGSTAB, "BiCGSTAB"}, {LinearSolverData::KspType::DEFAULT, "Default"}}) NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::SideType, {{LinearSolverData::SideType::INVALID, nullptr}, diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp index 87ea8a5b8..b6e1e0685 100644 --- a/palace/utils/configfile.hpp +++ b/palace/utils/configfile.hpp @@ -698,7 +698,7 @@ struct TransientSolverData struct LinearSolverData { - // XX TODO REVISIT AVAILABLE OPTIONS FOR KSP AFTER HYPRE SWITCH... + // XX TODO REVISIT AVAILABLE OPTIONS FOR KSP AFTER HYPRE SWITCH... (ALSO ADD "DEFAULT") // XX TODO REVISIT OPTIONS FOR PA AND KEYWORDS... "GMG" "PA" CAN DO BETTER (RATEL?) public: @@ -720,16 +720,10 @@ struct LinearSolverData enum class KspType { CG, - CGSYM, - FCG, MINRES, GMRES, FGMRES, - BCGS, - BCGSL, - FBCGS, - QMRCGS, - TFQMR, + BICGSTAB, DEFAULT, INVALID = -1 }; @@ -753,7 +747,7 @@ struct LinearSolverData int ksp_initial_guess = -1; // Enable pipelined Krylov solver variants to reduce blocking communications. - bool ksp_piped = false; + bool ksp_piped = false; // XX TODO REMOVE.... // Enable partial assembly for operators. bool mat_pa = false; @@ -789,7 +783,7 @@ struct LinearSolverData DEFAULT, INVALID = -1 }; - SideType pc_side_type = SideType::DEFAULT; + SideType pc_side_type = SideType::DEFAULT; // XX TODO REMOVE... // Choose left or right preconditioning. enum class SymFactType From b559ac68c0d1790264ac16126cbcf304224020b4 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Fri, 12 May 2023 19:29:26 -0700 Subject: [PATCH 03/41] WIP: Updates to driver classes and users of linear solver APIs --- palace/drivers/electrostaticsolver.cpp | 2 + palace/drivers/magnetostaticsolver.cpp | 2 + palace/linalg/amg.cpp | 6 +- palace/linalg/amg.hpp | 6 +- palace/linalg/ams.cpp | 10 +- palace/linalg/ams.hpp | 6 +- palace/linalg/chebyshev.cpp | 6 +- palace/linalg/chebyshev.hpp | 8 +- palace/linalg/complex.cpp | 63 +++---- palace/linalg/complex.hpp | 13 +- palace/linalg/curlcurl.cpp | 90 ++++----- palace/linalg/curlcurl.hpp | 40 ++-- palace/linalg/distrelaxation.cpp | 16 +- palace/linalg/distrelaxation.hpp | 12 +- palace/linalg/divfree.cpp | 105 +++++------ palace/linalg/divfree.hpp | 64 +++---- palace/linalg/gmg.cpp | 26 +-- palace/linalg/gmg.hpp | 9 +- palace/linalg/jacobi.cpp | 2 +- palace/linalg/jacobi.hpp | 6 +- palace/linalg/ksp.cpp | 42 +++-- palace/linalg/ksp.hpp | 25 ++- palace/linalg/mumps.cpp | 6 +- palace/linalg/mumps.hpp | 6 +- palace/linalg/strumpack.cpp | 6 +- palace/linalg/strumpack.hpp | 6 +- palace/linalg/superlu.cpp | 7 +- palace/linalg/superlu.hpp | 6 +- palace/models/domainpostoperator.cpp | 244 +++++++------------------ palace/models/domainpostoperator.hpp | 51 +----- palace/models/spaceoperator.hpp | 14 +- palace/models/timeoperator.cpp | 2 + palace/models/waveportoperator.cpp | 12 +- palace/models/waveportoperator.hpp | 10 +- 34 files changed, 397 insertions(+), 532 deletions(-) diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp index a8320d967..4f6cbca15 100644 --- a/palace/drivers/electrostaticsolver.cpp +++ b/palace/drivers/electrostaticsolver.cpp @@ -13,6 +13,8 @@ #include "utils/iodata.hpp" #include "utils/timer.hpp" +// XX TODO WORKING FOR MONDAY! + namespace palace { diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp index 6be76a0a1..fc8118751 100644 --- a/palace/drivers/magnetostaticsolver.cpp +++ b/palace/drivers/magnetostaticsolver.cpp @@ -14,6 +14,8 @@ #include "utils/iodata.hpp" #include "utils/timer.hpp" +// XX TODO WORKING FOR MONDAY! + namespace palace { diff --git a/palace/linalg/amg.cpp b/palace/linalg/amg.cpp index ecaac4844..c8a1ea2ae 100644 --- a/palace/linalg/amg.cpp +++ b/palace/linalg/amg.cpp @@ -25,11 +25,9 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print) // HYPRE_BoomerAMGSetCycleRelaxType(*this, coarse_relax_type, 3); } -void BoomerAmgSolver::SetOperator(const Operator &op) +void BoomerAmgSolver::SetOperator(const ParOperator &op) { - auto *PtAP = const_cast(dynamic_cast(&op)); - MFEM_VERIFY(PtAP, "BoomerAmgSolver requires a ParOperator operator!"); - mfem::HypreBoomerAMG::SetOperator(PtAP->ParallelAssemble()); + mfem::HypreBoomerAMG::SetOperator(const_cast(&op)->ParallelAssemble()); } } // namespace palace diff --git a/palace/linalg/amg.hpp b/palace/linalg/amg.hpp index b75d1d129..3b602a2b7 100644 --- a/palace/linalg/amg.hpp +++ b/palace/linalg/amg.hpp @@ -24,7 +24,11 @@ class BoomerAmgSolver : public mfem::HypreBoomerAMG { } - void SetOperator(const Operator &op) override; + void SetOperator(const Operator &op) override + { + MFEM_ABORT("BoomerAmgSolver requires a ParOperator operator!"); + } + void SetOperator(const ParOperator &op); }; } // namespace palace diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 1fb26870a..4966c4276 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -27,8 +27,8 @@ HypreAmsSolver::HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, ams_singular(op_singular), print((print_lvl > 1) ? print_lvl - 1 : 0) { // From MFEM: The AMS preconditioner may sometimes require inverting singular matrices - // with BoomerAMG, which are handled correctly in hypre's Solve method, but can produce - // hypre errors in the Setup (specifically in the row l1-norm computation). See the + // with BoomerAMG, which are handled correctly in Hypre's Solve method, but can produce + // Hypre errors in the Setup (specifically in the row l1-norm computation). See the // documentation of MFEM's SetErrorMode() for more details. error_mode = IGNORE_HYPRE_ERRORS; @@ -199,7 +199,7 @@ void HypreAmsSolver::InitializeSolver() } } -void HypreAmsSolver::SetOperator(const Operator &op) +void HypreAmsSolver::SetOperator(const ParOperator &op) { // When the operator changes, we need to rebuild the AMS solver but can use the unchanged // auxiliary space matrices. @@ -209,9 +209,7 @@ void HypreAmsSolver::SetOperator(const Operator &op) InitializeSolver(); } - auto *PtAP = const_cast(dynamic_cast(&op)); - MFEM_VERIFY(PtAP, "HypreAmsSolver requires a ParOperator operator!"); - A = &PtAP->ParallelAssemble(); + A = &const_cast(&op)->ParallelAssemble(); height = A->Height(); width = A->Width(); diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp index dc923a2fe..a2ebbc328 100644 --- a/palace/linalg/ams.hpp +++ b/palace/linalg/ams.hpp @@ -67,7 +67,11 @@ class HypreAmsSolver : public mfem::HypreSolver } ~HypreAmsSolver() override; - void SetOperator(const Operator &op) override; + void SetOperator(const Operator &op) override + { + MFEM_ABORT("HypreAmsSolver requires a ParOperator operator!"); + } + void SetOperator(const ParOperator &op); operator HYPRE_Solver() const override { return ams; } diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 5278ed8bf..9fd975997 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -14,7 +14,7 @@ ChebyshevSmoother::ChebyshevSmoother(int smooth_it, int poly_order) { } -void ChebyshevSmoother::SetOperator(const mfem::Operator &op) +void ChebyshevSmoother::SetOperator(const ParOperator &op) { A = &op; @@ -29,11 +29,9 @@ void ChebyshevSmoother::SetOperator(const mfem::Operator &op) // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See // mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing: // polynomial versus Gauss-Seidel, JCP (2003). - const auto *PtAP = dynamic_cast(A); - MFEM_VERIFY(PtAP, "ChebyshevSmoother requires a ParOperator operator!"); DiagonalOperator Dinv(dinv); SymmetricProductOperator DinvA(Dinv, *A); - lambda_max = 1.1 * linalg::SpectralNorm(PtAP->GetComm(), DinvA, false); + lambda_max = 1.1 * linalg::SpectralNorm(A->GetComm(), DinvA, false); } void ChebyshevSmoother::Mult(const mfem::Vector &x, mfem::Vector &y) const diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index 7364c4acd..96d5117ca 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -25,7 +25,7 @@ class ChebyshevSmoother : public mfem::Solver const int pc_it, order; // System matrix (not owned). - const Operator *A; + const ParOperator *A; // Inverse diagonal scaling of the operator. Vector dinv; @@ -39,7 +39,11 @@ class ChebyshevSmoother : public mfem::Solver public: ChebyshevSmoother(int smooth_it, int poly_order); - void SetOperator(const Operator &op) override; + void SetOperator(const Operator &op) override + { + MFEM_ABORT("ChebyshevSmoother requires a ParOperator operator!"); + } + void SetOperator(const ParOperator &op); void Mult(const Vector &x, Vector &y) const override; diff --git a/palace/linalg/complex.cpp b/palace/linalg/complex.cpp index 34e71e281..bb7bc7b84 100644 --- a/palace/linalg/complex.cpp +++ b/palace/linalg/complex.cpp @@ -30,11 +30,6 @@ ComplexVector::ComplexVector(const Vector &xr, const Vector &xi) : Vector(2 * xr Set(xr, xi); } -int ComplexVector::Size() const -{ - return Vector::Size() / 2; -} - void ComplexVector::SetSize(int n) { Vector::SetSize(2 * n); @@ -68,7 +63,7 @@ ComplexVector &ComplexVector::operator*=(std::complex s) { if (s.imag() != 0.0) { - const int N = Size(); + const int N = Size() / 2; const double sr = s.real(); const double si = s.imag(); auto *XR = Real().ReadWrite(); @@ -105,7 +100,7 @@ std::complex ComplexVector::TransposeDot(const ComplexVector &y) const void ComplexVector::AXPY(std::complex alpha, const ComplexVector &y) { - const int N = Size(); + const int N = Size() / 2; const double ar = alpha.real(); const double ai = alpha.imag(); const auto *YR = y.Real().Read(); @@ -125,7 +120,7 @@ void ComplexVector::AXPY(std::complex alpha, const ComplexVector &y) void ComplexVector::AXPBY(std::complex alpha, const ComplexVector &y, std::complex beta) { - const int N = Size(); + const int N = Size() / 2; const double ar = alpha.real(); const double ai = alpha.imag(); const double br = beta.real(); @@ -149,7 +144,7 @@ void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &y, std::complex beta, const ComplexVector &z, std::complex gamma) { - const int N = Size(); + const int N = Size() / 2; const double ar = alpha.real(); const double ai = alpha.imag(); const double br = beta.real(); @@ -176,7 +171,7 @@ void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &y, void ComplexOperator::Mult(const Vector &x, Vector &y) const { - MFEM_ASSERT(x.Size() == 2 * width && y.Size() == 2 * height, + MFEM_ASSERT(x.Size() == width && y.Size() == height, "Incompatible dimensions for ComplexOperator::Mult!"); Vector xr, xi, yr, yi; xr.MakeRef(const_cast(x), 0, width / 2); @@ -190,7 +185,7 @@ void ComplexOperator::Mult(const Vector &x, Vector &y) const void ComplexOperator::MultTranspose(const Vector &x, Vector &y) const { - MFEM_ASSERT(x.Size() == 2 * height && y.Size() == 2 * width, + MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexOperator::MultTranspose!"); Vector xr, xi, yr, yi; xr.MakeRef(const_cast(x), 0, height / 2); @@ -204,7 +199,7 @@ void ComplexOperator::MultTranspose(const Vector &x, Vector &y) const void ComplexOperator::MultHermitianTranspose(const Vector &x, Vector &y) const { - MFEM_ASSERT(x.Size() == 2 * height && y.Size() == 2 * width, + MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexOperator::MultHermitianTranspose!"); Vector xr, xi, yr, yi; xr.MakeRef(const_cast(x), 0, height / 2); @@ -218,7 +213,7 @@ void ComplexOperator::MultHermitianTranspose(const Vector &x, Vector &y) const void ComplexOperator::AddMult(const Vector &x, Vector &y, const double a) const { - MFEM_ASSERT(x.Size() == 2 * width && y.Size() == 2 * height, + MFEM_ASSERT(x.Size() == width && y.Size() == height, "Incompatible dimensions for ComplexOperator::AddMult!"); Vector xr, xi, yr, yi; xr.MakeRef(const_cast(x), 0, width / 2); @@ -232,7 +227,7 @@ void ComplexOperator::AddMult(const Vector &x, Vector &y, const double a) const void ComplexOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const { - MFEM_ASSERT(x.Size() == 2 * height && y.Size() == 2 * width, + MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexOperator::AddMultTranspose!"); Vector xr, xi, yr, yi; xr.MakeRef(const_cast(x), 0, height / 2); @@ -247,7 +242,7 @@ void ComplexOperator::AddMultTranspose(const Vector &x, Vector &y, const double void ComplexOperator::AddMultHermitianTranspose(const Vector &x, Vector &y, const double a) const { - MFEM_ASSERT(x.Size() == 2 * height && y.Size() == 2 * width, + MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexOperator::AddMultHermitianTranspose!"); Vector xr, xi, yr, yi; xr.MakeRef(const_cast(x), 0, height / 2); @@ -273,17 +268,17 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&A, lxi_.SetSize(A_->Width()); lyr_.SetSize(A_->Height()); lyi_.SetSize(A_->Height()); - txr_.SetSize(width); - txi_.SetSize(width); + txr_.SetSize(width / 2); + txi_.SetSize(width / 2); if (height != width) { - tyr_.SetSize(height); - tyi_.SetSize(height); + tyr_.SetSize(height / 2); + tyi_.SetSize(height / 2); } else { - tyr_.MakeRef(txr_, 0, height); - tyi_.MakeRef(txi_, 0, height); + tyr_.MakeRef(txr_, 0, height / 2); + tyi_.MakeRef(txi_, 0, height / 2); } } @@ -291,8 +286,8 @@ void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, const std::complex a, bool zero_real, bool zero_imag) const { - MFEM_ASSERT(xr.Size() == width && xi.Size() == width && yr.Size() == height && - yi.Size() == height, + MFEM_ASSERT(xr.Size() == width / 2 && xi.Size() == width / 2 && yr.Size() == height / 2 && + yi.Size() == height / 2, "Incompatible dimensions for ComplexParOperator::AddMult!"); if (trial_dbc_tdof_list_) { @@ -376,8 +371,8 @@ void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Ve Vector &yi, const std::complex a, bool zero_real, bool zero_imag) const { - MFEM_ASSERT(xr.Size() == height && xi.Size() == height && yr.Size() == width && - yi.Size() == width, + MFEM_ASSERT(xr.Size() == height / 2 && xi.Size() == height / 2 && + yr.Size() == width / 2 && yi.Size() == width / 2, "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); if (test_dbc_tdof_list_) { @@ -462,8 +457,8 @@ void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vecto const std::complex a, bool zero_real, bool zero_imag) const { - MFEM_ASSERT(xr.Size() == height && xi.Size() == height && yr.Size() == width && - yi.Size() == width, + MFEM_ASSERT(xr.Size() == height / 2 && xi.Size() == height / 2 && + yr.Size() == width / 2 && yi.Size() == width / 2, "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); if (test_dbc_tdof_list_) { @@ -552,17 +547,17 @@ ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&Ar, MFEM_VERIFY((!Ar_ || !Ai_) || (Ar_->Height() == Ai_->Height() && Ar_->Width() == Ai_->Width()), "Mismatch in dimension of real and imaginary matrix parts!"); - txr_.SetSize(width); - txi_.SetSize(width); + txr_.SetSize(width / 2); + txi_.SetSize(width / 2); if (height != width) { - tyr_.SetSize(height); - tyi_.SetSize(height); + tyr_.SetSize(height / 2); + tyi_.SetSize(height / 2); } else { - tyr_.MakeRef(txr_, 0, height); - tyi_.MakeRef(txi_, 0, height); + tyr_.MakeRef(txr_, 0, height / 2); + tyi_.MakeRef(txi_, 0, height / 2); } } @@ -670,7 +665,7 @@ void ComplexWrapperOperator::AddMult(const Vector &xr, const Vector &xi, Vector if (a.real() != 0.0 && a.imag() != 0.0) { Mult(xr, xi, tyr_, tyi_, zero_real, zero_imag); - const int N = height; + const int N = height / 2; const double ar = a.real(); const double ai = a.imag(); const auto *TYR = tyr_.Read(); diff --git a/palace/linalg/complex.hpp b/palace/linalg/complex.hpp index 943fb2b98..683c01c5f 100644 --- a/palace/linalg/complex.hpp +++ b/palace/linalg/complex.hpp @@ -19,7 +19,8 @@ namespace palace // Vector and operator classes for complex-valued linear algebra. // -// A complex-valued vector represented as two real vectors, one for each component. +// A complex-valued vector represented as two real vectors, one for each component. The +// value returned by the vector size is twice the actual complex-valued size. class ComplexVector : public Vector { private: @@ -35,9 +36,6 @@ class ComplexVector : public Vector // Copy constructor from separately provided real and imaginary parts. ComplexVector(const Vector &xr, const Vector &xi); - // Returns the vector size. - int Size() const; - // Set the size of the vector. See the notes for Vector::SetSize for behavior in the // cases where n is less than or greater than Size() or Capacity(). void SetSize(int n); @@ -104,12 +102,13 @@ class ComplexVector : public Vector } }; -// Abstract base class for complex-valued operators. +// Abstract base class for complex-valued operators. The values returned by the operator +// height and width are twice the actual complex-valued size. class ComplexOperator : public Operator { public: - ComplexOperator(int s) : Operator(s) {} - ComplexOperator(int h, int w) : Operator(h, w) {} + ComplexOperator(int s) : Operator(2 * s) {} + ComplexOperator(int h, int w) : Operator(2 * h, 2 * w) {} // Test whether or not the operator is purely real or imaginary. virtual bool IsReal() const = 0; diff --git a/palace/linalg/curlcurl.cpp b/palace/linalg/curlcurl.cpp index 1bfaa6788..5ac31b8cf 100644 --- a/palace/linalg/curlcurl.cpp +++ b/palace/linalg/curlcurl.cpp @@ -11,78 +11,64 @@ namespace palace { -CurlCurlMassSolver::CurlCurlMassSolver(const MaterialOperator &mat_op, - const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, - double tol, int max_it, int print) +CurlCurlMassSolver::CurlCurlMassSolver( + const MaterialOperator &mat_op, mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + const std::vector> &nd_dbc_tdof_lists, + const std::vector> &h1_dbc_tdof_lists, double tol, int max_it, + int print) : mfem::Solver(nd_fespaces.GetFinestFESpace().GetTrueVSize()) { - - // XX TODO NEW ParOperator FRAMEWORK - constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; constexpr MaterialPropertyType MatTypeEps = MaterialPropertyType::PERMITTIVITY_REAL; MaterialPropertyCoefficient muinv_func(mat_op); MaterialPropertyCoefficient epsilon_func(mat_op); - MFEM_VERIFY(dbc_marker.Size() == - nd_fespaces.GetFinestFESpace().GetParMesh()->bdr_attributes.Max(), - "Invalid boundary marker for curl-curl solver!"); for (int s = 0; s < 2; s++) { auto &A_ = (s == 0) ? A : AuxA; - A_.reserve(nd_fespaces.GetNumLevels()); - for (int l = 0; l < nd_fespaces.GetNumLevels(); l++) + auto &fespaces = (s == 0) ? nd_fespaces : h1_fespaces; + auto &dbc_tdof_lists = (s == 0) ? nd_dbc_tdof_lists : h1_dbc_tdof_lists; + A_.clear(); + A_.reserve(fespaces.GetNumLevels()); + for (int l = 0; l < fespaces.GetNumLevels(); l++) { - auto &fespace_l = - (s == 0) ? nd_fespaces.GetFESpaceAtLevel(l) : h1_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - fespace_l.GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - - mfem::ParBilinearForm a(&fespace_l); - if (s == 1) + auto &fespace_l = fespaces.GetFESpaceAtLevel(l); + auto a = std::make_unique(&fespace_l); + if (s == 0) { - a.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); + a->AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); + a->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); } else { - a.AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); - a.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); + a->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); } - // a.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - a.Assemble(); - a.Finalize(); - mfem::HypreParMatrix *hA = a.ParallelAssemble(); - hA->EliminateBC(dbc_tdof_list_l, mfem::Operator::DiagonalPolicy::DIAG_ONE); - A_.emplace_back(hA); + // XX TODO: Partial assembly option? + a->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + a->Assemble(0); + a->Finalize(0); + A_.push_back(std::make_unique(std::move(a), fespace_l, fespace_l)); + A_.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], + Operator::DiagonalPolicy::DIAG_ONE); } } - // XX TODO VISIT - - // // The system matrix for the projection is real and SPD. For the coarse-level AMG - // solve, - // // we don't use an exact solve on the coarsest level. - // auto ams = std::make_unique(nd_fespaces.GetFESpaceAtLevel(0), - // &h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, - // false, false, 0); - // auto gmg = std::make_unique(std::move(ams), dbc_marker, - // nd_fespaces, &h1_fespaces, 1, 1, - // 2); - // gmg->SetOperator(A, &AuxA); - // pc = std::move(gmg); + // The system matrix K + M is real and SPD. We use Hypre's AMS solver as the coarse-level + // multigrid solve. + auto ams = std::make_unique(nd_fespaces.GetFESpaceAtLevel(0), + h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, + false, false, 0); + auto gmg = std::make_unique(std::move(ams), nd_fespaces, + &h1_fespaces, 1, 1, 2); - ksp = std::make_unique(nd_fespaces.GetFinestFESpace().GetComm()); - ksp->SetRelTol(tol); - ksp->SetMaxIter(max_it); - ksp->SetPrintLevel(print); - ksp->SetOperator(*A.back()); - ksp->SetPreconditioner(*pc); + auto pcg = std::make_unique(nd_fespaces.GetFinestFESpace().GetComm()); + pcg->iterative_mode = false; + pcg->SetRelTol(tol); + pcg->SetMaxIter(max_it); + pcg->SetPrintLevel(print); - xr.SetSize(height); - xi.SetSize(height); - yr.SetSize(height); - yi.SetSize(height); + ksp = std::make_unique(std::move(pcg), std::move(ams)); + ksp->SetOperator(*A.back(), A, &AuxA); } } // namespace palace diff --git a/palace/linalg/curlcurl.hpp b/palace/linalg/curlcurl.hpp index 5a1d1b60a..684546581 100644 --- a/palace/linalg/curlcurl.hpp +++ b/palace/linalg/curlcurl.hpp @@ -5,47 +5,47 @@ #define PALACE_LINALG_CURL_CURL_HPP #include +#include #include -#include "linalg/petsc.hpp" +#include "linalg/complex.hpp" +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { class MaterialOperator; +class KspSolver; // -// This solver implements a solver for the operator K + M in the Nedelec space. +// This solver implements a solver for the operator K + M in a Nedelec space. // class CurlCurlMassSolver : public mfem::Solver { private: - // H(curl) norm operator A = K + M. - std::vector> A, AuxA; + // H(curl) norm operator A = K + M and its projection Gᵀ A G. + std::vector> A, AuxA; - // Linear solver and preconditioner for the linear system A y = x; - std::unique_ptr ksp; - std::unique_ptr pc; - - // Workspace objects for solver application. - mutable mfem::Vector xr, xi, yr, yi; + // Linear solver for the linear system A y = x; + std::unique_ptr ksp; public: - CurlCurlMassSolver(const MaterialOperator &mat_op, const mfem::Array &dbc_marker, + CurlCurlMassSolver(const MaterialOperator &mat_op, mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + const std::vector> &nd_dbc_tdof_lists, + const std::vector> &h1_dbc_tdof_lists, double tol, int max_it, int print); - void SetOperator(const mfem::Operator &op) override {} + void SetOperator(const Operator &op) override {} - void Mult(const mfem::Vector &x, mfem::Vector &y) const override { ksp->Mult(x, y); } - void Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const + void Mult(const Vector &x, Vector &y) const override { ksp->Mult(x, y); } + void Mult(const ComplexVector &x, ComplexVector &y) { - x.GetToVectors(xr, xi); - Mult(xr, yr); - Mult(xi, yi); - y.SetFromVectors(yr, yi); + Mult(x.Real(), y.Real()); + Mult(x.Imag(), y.Imag()); } - using mfem::Operator::Mult; }; } // namespace palace diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 5239ab2ef..87126ee52 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -13,7 +13,7 @@ DistRelaxationSmoother::DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_f mfem::ParFiniteElementSpace &h1_fespace, int smooth_it, int cheby_smooth_it, int cheby_order) - : mfem::Solver(), pc_it(smooth_it), A(nullptr), A_G(nullptr), dbc_tdof_list_G(nullptr) + : mfem::Solver(), pc_it(smooth_it), A(nullptr), A_G(nullptr) { // Construct discrete gradient matrix for the auxiliary space. { @@ -32,7 +32,7 @@ DistRelaxationSmoother::DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_f B_G->iterative_mode = false; } -void DistRelaxationSmoother::SetOperator(const Operator &op, const Operator &op_G) +void DistRelaxationSmoother::SetOperator(const ParOperator &op, const ParOperator &op_G) { A = &op; A_G = &op_G; @@ -40,10 +40,6 @@ void DistRelaxationSmoother::SetOperator(const Operator &op, const Operator &op_ A_G->Height() == G->Width() && A_G->Width() == G->Width(), "Invalid operator sizes for DistRelaxationSmoother!"); - const auto *PtAP_G = dynamic_cast(&op_G); - MFEM_VERIFY(PtAP_G, "DistRelaxationSmoother requires ParOperator operators!"); - dbc_tdof_list_G = PtAP_G->GetEssentialTrueDofs(); - height = A->Height(); width = A->Width(); r.SetSize(height); @@ -68,9 +64,9 @@ void DistRelaxationSmoother::Mult(const Vector &x, Vector &y) const A->Mult(y, r); subtract(x, r, r); G->MultTranspose(r, x_G); - if (dbc_tdof_list_G) + if (A_G->GetEssentialTrueDofs()) { - x_G.SetSubVector(*dbc_tdof_list_G, 0.0); + x_G.SetSubVector(*A_G->GetEssentialTrueDofs(), 0.0); } B_G->Mult(x_G, y_G); G->AddMult(y_G, y, 1.0); @@ -94,9 +90,9 @@ void DistRelaxationSmoother::MultTranspose(const Vector &x, Vector &y) const { y = 0.0; } - if (dbc_tdof_list_G) + if (A_G->GetEssentialTrueDofs()) { - x_G.SetSubVector(*dbc_tdof_list_G, 0.0); + x_G.SetSubVector(*A_G->GetEssentialTrueDofs(), 0.0); } B_G->MultTranspose(x_G, y_G); G->AddMult(y_G, y, 1.0); diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index a248eb150..823ea816b 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -25,14 +25,11 @@ class DistRelaxationSmoother : public mfem::Solver // Number of smoother iterations. const int pc_it; - // System matrix and its projection G^T A G (not owned). - const Operator *A, *A_G; - - // Dirichlet boundary conditions in the auxiliary space (not owned). - const mfem::Array *dbc_tdof_list_G; + // System matrix and its projection Gᵀ A G (not owned). + const ParOperator *A, *A_G; // Discrete gradient matrix. - std::unique_ptr G; + std::unique_ptr G; // Point smoother objects for each matrix. mutable std::unique_ptr B; @@ -51,8 +48,7 @@ class DistRelaxationSmoother : public mfem::Solver MFEM_ABORT("SetOperator with a single operator is not implemented for " "DistRelaxationSmoother, use the two argument signature instead!"); } - - void SetOperator(const Operator &op, const Operator &op_G); + void SetOperator(const ParOperator &op, const ParOperator &op_G); void Mult(const Vector &x, Vector &y) const override; diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index c3dbe6384..b2b719a0a 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -12,81 +12,74 @@ namespace palace { -// XX TODO: THIS PROBABLY NEEDS TO CONSIDER IN ALL 3 BILINEAR FORMS THE EFFECTS OF -// THE INPUT BDR_MARKER? - -// XX TODO NEW ParOperator FRAMEWORK - DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, - const mfem::Array &bdr_marker, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, - int max_it, int print) + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + const std::vector> &h1_bdr_tdof_lists, + double tol, int max_it, int print) : mfem::Solver(nd_fespace.GetTrueVSize()) { constexpr MaterialPropertyType MatType = MaterialPropertyType::PERMITTIVITY_REAL; MaterialPropertyCoefficient epsilon_func(mat_op); - MFEM_VERIFY(bdr_marker.Size() == - h1_fespaces.GetFinestFESpace().GetParMesh()->bdr_attributes.Max(), - "Invalid boundary marker for divergence-free solver!"); - M.reserve(h1_fespaces.GetNumLevels()); - for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) { - auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - h1_fespace_l.GetEssentialTrueDofs(bdr_marker, dbc_tdof_list_l); - - mfem::ParBilinearForm m(&h1_fespace_l); - m.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); - // m.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - m.Assemble(); - m.Finalize(); - mfem::HypreParMatrix *hM = m.ParallelAssemble(); - hM->EliminateBC(dbc_tdof_list_l, mfem::Operator::DiagonalPolicy::DIAG_ONE); - M.emplace_back(hM); + M.clear(); + M.reserve(h1_fespaces.GetNumLevels()); + for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) + { + auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); + auto m = std::make_unique(&h1_fespace_l); + m->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); + // XX TODO: Partial assembly option? + m->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + m->Assemble(0); + m->Finalize(0); + M.push_back(std::make_unique(std::move(m), h1_fespace_l, h1_fespace_l)); + M.back()->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], + Operator::DiagonalPolicy::DIAG_ONE); + } } { - mfem::ParMixedBilinearForm weakDiv(&nd_fespace, &h1_fespaces.GetFinestFESpace()); - weakDiv.AddDomainIntegrator( + // XX TODO: Partial assembly option? + auto weakDiv = std::make_unique( + &nd_fespace, &h1_fespaces.GetFinestFESpace()); + weakDiv->AddDomainIntegrator( new mfem::MixedVectorWeakDivergenceIntegrator(epsilon_func)); - // weakDiv.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - weakDiv.Assemble(); - weakDiv.Finalize(); - WeakDiv.reset(weakDiv.ParallelAssemble()); + weakDiv->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + weakDiv->Assemble(); + weakDiv->Finalize(); + WeakDiv = std::make_unique(std::move(weakDiv), nd_fespace, + h1_fespaces.GetFinestFESpace()); } { - mfem::ParDiscreteLinearOperator grad(&h1_fespaces.GetFinestFESpace(), &nd_fespace); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - Grad.reset(grad.ParallelAssemble()); + // XX TODO: Partial assembly option? + auto grad = std::make_unique( + &h1_fespaces.GetFinestFESpace(), &nd_fespace); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + grad->Assemble(); + grad->Finalize(); + Grad = std::make_unique(std::move(grad), h1_fespaces.GetFinestFESpace(), + nd_fespace, true); } - h1_fespaces.GetFinestFESpace().GetEssentialTrueDofs(bdr_marker, h1_bdr_tdof_list); - // XX TODO VISIT + // The system matrix for the projection is real and SPD. For the coarse-level AMG solve, + // we don't use an exact solve on the coarsest level. + auto amg = std::make_unique(1, 1, 0); + auto gmg = std::make_unique(std::move(amg), h1_fespaces, + nullptr, 1, 1, 2); - // // The system matrix for the projection is real and SPD. For the coarse-level AMG - // solve, - // // we don't use an exact solve on the coarsest level. - // auto amg = std::make_unique(); - // auto gmg = std::make_unique(std::move(amg), bdr_marker, - // h1_fespaces, nullptr, 1, 1, 2); - // gmg->SetOperator(M); - // pc = std::move(gmg); + auto pcg = std::make_unique(h1_fespaces.GetFinestFESpace().GetComm()); + pcg->iterative_mode = false; + pcg->SetRelTol(tol); + pcg->SetAbsTol(std::numeric_limits::epsilon()); + pcg->SetMaxIter(max_it); + pcg->SetPrintLevel(print); - ksp = std::make_unique(h1_fespaces.GetFinestFESpace().GetComm()); - ksp->SetRelTol(tol); - ksp->SetAbsTol(std::numeric_limits::epsilon()); - ksp->SetMaxIter(max_it); - ksp->SetPrintLevel(print); - ksp->SetOperator(*M.back()); - ksp->SetPreconditioner(*pc); + ksp = std::make_unique(std::move(pcg), std::move(amg)); + ksp->SetOperator(*M.back(), M); psi.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); rhs.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); - xr.SetSize(height); - xi.SetSize(height); } } // namespace palace diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp index a1635a978..bf400df6c 100644 --- a/palace/linalg/divfree.hpp +++ b/palace/linalg/divfree.hpp @@ -5,13 +5,18 @@ #define PALACE_LINALG_DIV_FREE_HPP #include +#include #include -#include "linalg/petsc.hpp" +#include "linalg/complex.hpp" +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { class MaterialOperator; +class KspSolver; // // This solver implements a projection onto a divergence-free space satisfying Gᵀ M x = 0, @@ -22,61 +27,56 @@ class DivFreeSolver : public mfem::Solver { private: // Operators for the divergence-free projection. - std::unique_ptr WeakDiv, Grad; - std::vector> M; + std::unique_ptr WeakDiv, Grad; + std::vector> M; - // Linear solver and preconditioner for the projected linear system (Gᵀ M G) y = x. - std::unique_ptr ksp; - std::unique_ptr pc; + // Linear solver for the projected linear system (Gᵀ M G) y = x. + std::unique_ptr ksp; // Workspace objects for solver application. - mutable mfem::Vector psi, rhs, xr, xi; - - // Boundary condition dofs for essential BCs. - mfem::Array h1_bdr_tdof_list; + mutable Vector psi, rhs; public: - DivFreeSolver(const MaterialOperator &mat_op, const mfem::Array &bdr_marker, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, int max_it, - int print); + DivFreeSolver(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + const std::vector> &h1_bdr_tdof_lists, double tol, + int max_it, int print); - void SetOperator(const mfem::Operator &op) override {} + void SetOperator(const Operator &op) override {} // Given a vector of Nedelec dofs for an arbitrary vector field, compute the Nedelec dofs // of the irrotational portion of this vector field. The resulting vector will satisfy - // ∇ x x = 0. - void Mult(mfem::Vector &x) const + // ∇ x y = 0. + void Mult(Vector &y) const { - // Compute the divergence of x. - WeakDiv->Mult(x, rhs); + // Compute the divergence of y. + WeakDiv->Mult(y, rhs); // Apply essential BC and solve the linear system. - psi = 0.0; - rhs.SetSubVector(h1_bdr_tdof_list, 0.0); + if (M.back()->GetEssentialTrueDofs()) + { + rhs.SetSubVector(*M.back()->GetEssentialTrueDofs(), 0.0); + } ksp->Mult(rhs, psi); - // Compute the irrotational portion of x and subtract. - Grad->AddMult(psi, x, 1.0); + // Compute the irrotational portion of y and subtract. + Grad->AddMult(psi, y, 1.0); } - void Mult(const mfem::Vector &x, mfem::Vector &y) const override + void Mult(const Vector &x, Vector &y) const override { y = x; Mult(y); } - void Mult(petsc::PetscParVector &x) const + void Mult(ComplexVector &y) const { - x.GetToVectors(xr, xi); - Mult(xr); - Mult(xi); - x.SetFromVectors(xr, xi); + Mult(y.Real()); + Mult(y.Imag()); } - void Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const + void Mult(const ComplexVector &x, ComplexVector &y) const { - y.Copy(x); + y = x; Mult(y); } - using mfem::Operator::Mult; }; } // namespace palace diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 9e2a5ef54..d0a6310d1 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -22,7 +22,7 @@ GeometricMultigridSolver::GeometricMultigridSolver( MFEM_VERIFY(n_levels > 0, "Empty finite element space hierarchy during multigrid solver setup!"); A_.resize(n_levels, nullptr); - dbc_tdof_lists_.resize(n_levels, nullptr); + P_.resize(n_levels, nullptr); x_.resize(n_levels, Vector()); y_.resize(n_levels, Vector()); r_.resize(n_levels, Vector()); @@ -34,10 +34,13 @@ GeometricMultigridSolver::GeometricMultigridSolver( R_.resize(n_levels, mfem::Array()); // Configure prolongation operators. - P_.reserve(n_levels); for (int l = 0; l < n_levels; l++) { - P_.push_back(fespaces.GetProlongationAtLevel(l)); + const auto *PtAP_l = + dynamic_cast(fespaces.GetProlongationAtLevel(l)); + MFEM_VERIFY(PtAP_l, + "GeometricMultigridSolver requires ParOperator prolongation operators!"); + P_[l] = PtAP_l; } // Use the supplied level 0 (coarse) solver. @@ -65,8 +68,8 @@ GeometricMultigridSolver::GeometricMultigridSolver( } void GeometricMultigridSolver::SetOperator( - const std::vector> &ops, - const std::vector> *aux_ops) + const std::vector> &ops, + const std::vector> *aux_ops) { const int n_levels = static_cast(A_.size()); MFEM_VERIFY(static_cast(ops.size()) == n_levels && @@ -84,13 +87,11 @@ void GeometricMultigridSolver::SetOperator( } else { + + // XX TODO TEST IF THIS ACTUALLY WORKS AT RUNTIME... + B_[l]->SetOperator(*ops[l]); } - - // Configure lists of essential boundary condition true dofs. - const auto *PtAP_l = dynamic_cast(ops[l].get()); - MFEM_VERIFY(PtAP_l, "GeometricMultigridSolver requires ParOperator operators!"); - dbc_tdof_lists_[l] = PtAP_l->GetEssentialTrueDofs(); } // Operator size is given by the fine level dimensions. @@ -167,11 +168,12 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const // Coarse grid correction. P_[l - 1]->ArrayMultTranspose(R_[l], X_[l - 1]); - if (dbc_tdof_lists_[l - 1]) + if (A_[l - 1]->GetEssentialTrueDofs()) { + const mfem::Array &dbc_tdof_list = *A_[l - 1]->GetEssentialTrueDofs(); for (int j = 0; j < n_rhs; j++) { - X_[l - 1][j]->SetSubVector(*dbc_tdof_lists_[l - 1], 0.0); + X_[l - 1][j]->SetSubVector(dbc_tdof_list, 0.0); } } VCycle(l - 1, false); diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index 4d8998fb3..a5d9c0667 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -26,10 +26,7 @@ class GeometricMultigridSolver : public mfem::Solver const int pc_it; // System matrices at each multigrid level and prolongation operators (not owned). - std::vector A_, P_; - - // Essential Dirichlet boundary conditions at each level (not owned). - std::vector *> dbc_tdof_lists_; + std::vector A_, P_; // Smoothers for each level. Coarse level solver is B_[0]. std::vector> B_; @@ -64,8 +61,8 @@ class GeometricMultigridSolver : public mfem::Solver MFEM_ABORT("SetOperator with a single operator is not implemented for " "GeometricMultigridSolver, use the other signature instead!"); } - void SetOperator(const std::vector> &ops, - const std::vector> *aux_ops = nullptr); + void SetOperator(const std::vector> &ops, + const std::vector> *aux_ops = nullptr); void Mult(const Vector &x, Vector &y) const override { diff --git a/palace/linalg/jacobi.cpp b/palace/linalg/jacobi.cpp index b4ccb86b1..0a494b84d 100644 --- a/palace/linalg/jacobi.cpp +++ b/palace/linalg/jacobi.cpp @@ -8,7 +8,7 @@ namespace palace { -void JacobiSmoother::SetOperator(const Operator &op) +void JacobiSmoother::SetOperator(const ParOperator &op) { height = op.Height(); width = op.Width(); diff --git a/palace/linalg/jacobi.hpp b/palace/linalg/jacobi.hpp index 2b240d3af..cc1fbff7f 100644 --- a/palace/linalg/jacobi.hpp +++ b/palace/linalg/jacobi.hpp @@ -25,7 +25,11 @@ class JacobiSmoother : public mfem::Solver public: JacobiSmoother() : mfem::Solver() {} - void SetOperator(const Operator &op) override; + void SetOperator(const Operator &op) override + { + MFEM_ABORT("JacobiSmoother requires a ParOperator operator!"); + } + void SetOperator(const ParOperator &op); void Mult(const Vector &x, Vector &y) const override; diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index 6a0bb9b15..1a2dd7914 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -242,36 +242,30 @@ KspSolver::KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy ksp_->SetPreconditioner(*pc_); } -void KspSolver::SetOperator(const Operator &op, const Operator &pc_op) +void KspSolver::SetOperator(const Operator &op, + const std::vector> &pc_ops, + const std::vector> *aux_pc_ops) { // Unset the preconditioner before so that IterativeSolver::SetOperator does not set the // preconditioner operator again. - pc_->SetOperator(pc_op); - // ksp_->SetPreconditioner(nullptr); //XX TODO WAITING MFEM PATCH - ksp_->SetOperator(op); - ksp_->SetPreconditioner(*pc_); - height = op.Height(); - width = op.Width(); -} - -void KspSolver::SetOperator(const Operator &op, - const std::vector> &pc_ops, - const std::vector> *aux_pc_ops) -{ auto *gmg = dynamic_cast(pc_.get()); if (gmg) { - // Unset the preconditioner before so that IterativeSolver::SetOperator does not set the - // preconditioner operator again. gmg->SetOperator(pc_ops, aux_pc_ops); - // ksp_->SetPreconditioner(nullptr); //XX TODO WAITING MFEM PATCH - ksp_->SetOperator(op); - ksp_->SetPreconditioner(*pc_); } else { - SetOperator(op, *pc_ops.back()); + MFEM_VERIFY( + !aux_pc_ops, + "Auxiliary space operators should not be specified for KspSolver::SetOperator " + "unless the preconditioner is a GeometricMultigridSolver!"); + pc_->SetOperator(*pc_ops.back()); } + // ksp_->SetPreconditioner(nullptr); //XX TODO WAITING MFEM PATCH + ksp_->SetOperator(op); + ksp_->SetPreconditioner(*pc_); + height = op.Height(); + width = op.Width(); } void KspSolver::Mult(const Vector &x, Vector &y) const @@ -302,9 +296,17 @@ ComplexKspSolver::ComplexKspSolver(const IoData &iodata, ksp_->SetPreconditioner(*pc_); } +void ComplexKspSolver::SetOperator( + const ComplexOperator &op, const std::vector> &pc_ops, + const std::vector> *aux_pc_ops) +{ + KspSolver::SetOperator(op, pc_ops, aux_pc_ops); // XX TODO TEST THIS AT RUNTIME... +} + void ComplexKspSolver::Mult(const ComplexVector &x, ComplexVector &y) const { - KspSolver::Mult(x, y); // XX TODO TEST THIS... + KspSolver::Mult(x, y); // XX TODO TEST THIS AT RUNTIME... + y.Sync(); } // XX TODO REMOVE diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp index 321a080d1..52c290b7e 100644 --- a/palace/linalg/ksp.hpp +++ b/palace/linalg/ksp.hpp @@ -13,6 +13,7 @@ namespace palace { +class ComplexParOperator; class ComplexVector; class IoData; @@ -43,10 +44,14 @@ class KspSolver : public mfem::Solver int NumTotalMult() const { return ksp_mult; } int NumTotalMultIter() const { return ksp_mult_it; } - void SetOperator(const Operator &op) override { SetOperator(op, op); } - void SetOperator(const Operator &op, const Operator &pc_op); - void SetOperator(const Operator &op, const std::vector> &pc_ops, - const std::vector> *pc_aux_ops = nullptr); + void SetOperator(const Operator &op) override + { + MFEM_ABORT("SetOperator with a single operator is not implemented for KspSolver, you " + "must specify the preconditioner operator as well!"); + } + virtual void + SetOperator(const Operator &op, const std::vector> &pc_ops, + const std::vector> *pc_aux_ops = nullptr); void Mult(const Vector &x, Vector &y) const override; }; @@ -57,6 +62,18 @@ class ComplexKspSolver : public KspSolver ComplexKspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); + using KspSolver::SetOperator; + void SetOperator( + const Operator &op, const std::vector> &pc_ops, + const std::vector> *pc_aux_ops = nullptr) override + { + MFEM_ABORT("SetOperator with a real-valued operator is not implemented for " + "ComplexKspSolver, use the complex-valued signature instead!"); + } + void SetOperator(const ComplexOperator &op, + const std::vector> &pc_ops, + const std::vector> *pc_aux_ops = nullptr); + void Mult(const Vector &x, Vector &y) const override { MFEM_ABORT("Mult with a real-valued vector is not implemented for " diff --git a/palace/linalg/mumps.cpp b/palace/linalg/mumps.cpp index 479ed5755..a54ba6e87 100644 --- a/palace/linalg/mumps.cpp +++ b/palace/linalg/mumps.cpp @@ -43,11 +43,9 @@ MumpsSolver::MumpsSolver(MPI_Comm comm, mfem::MUMPSSolver::MatType sym, } } -void MumpsSolver::SetOperator(const Operator &op) +void MumpsSolver::SetOperator(const ParOperator &op) { - auto *PtAP = const_cast(dynamic_cast(&op)); - MFEM_VERIFY(PtAP, "MumpsSolver requires a ParOperator operator!"); - mfem::MUMPSSolver::SetOperator(PtAP->ParallelAssemble()); + mfem::MUMPSSolver::SetOperator(const_cast(&op)->ParallelAssemble()); } } // namespace palace diff --git a/palace/linalg/mumps.hpp b/palace/linalg/mumps.hpp index 00469a400..7cb4c77ad 100644 --- a/palace/linalg/mumps.hpp +++ b/palace/linalg/mumps.hpp @@ -39,7 +39,11 @@ class MumpsSolver : public mfem::MUMPSSolver { } - void SetOperator(const Operator &op) override; + void SetOperator(const Operator &op) override + { + MFEM_ABORT("MumpsSolver requires a ParOperator operator!"); + } + void SetOperator(const ParOperator &op); }; } // namespace palace diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index 5689ebf9a..bf919f3a3 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -104,13 +104,11 @@ StrumpackSolverBase::StrumpackSolverBase( } template -void StrumpackSolverBase::SetOperator(const Operator &op) +void StrumpackSolverBase::SetOperator(const ParOperator &op) { // Convert the input operator to a distributed STRUMPACK matrix (always assume a symmetric // sparsity pattern). Safe to delete the matrix since STRUMPACK copies it on input. - auto *PtAP = const_cast(dynamic_cast(&op)); - MFEM_VERIFY(PtAP, "StrumpackSolver requires a ParOperator operator!"); - mfem::STRUMPACKRowLocMatrix A(PtAP->ParallelAssemble(), true); + mfem::STRUMPACKRowLocMatrix A(const_cast(&op)->ParallelAssemble(), true); // Set up base class. StrumpackSolverType::SetOperator(A); diff --git a/palace/linalg/strumpack.hpp b/palace/linalg/strumpack.hpp index 081fa794e..fd05de362 100644 --- a/palace/linalg/strumpack.hpp +++ b/palace/linalg/strumpack.hpp @@ -37,7 +37,11 @@ class StrumpackSolverBase : public StrumpackSolverType { } - void SetOperator(const Operator &op) override; + void SetOperator(const Operator &op) override + { + MFEM_ABORT("StrumpackSolver requires a ParOperator operator!"); + } + void SetOperator(const ParOperator &op); }; using StrumpackSolver = StrumpackSolverBase; diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index f4201e34e..e018a113f 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -72,7 +72,7 @@ SuperLUSolver::SuperLUSolver(MPI_Comm comm, config::LinearSolverData::SymFactTyp solver.SetSymmetricPattern(true); // Always symmetric sparsity pattern } -void SuperLUSolver::SetOperator(const mfem::Operator &op) +void SuperLUSolver::SetOperator(const ParOperator &op) { // We need to save A because SuperLU does not copy the input matrix. For repeated // factorizations, always reuse the sparsity pattern. @@ -80,9 +80,8 @@ void SuperLUSolver::SetOperator(const mfem::Operator &op) { solver.SetFact(mfem::superlu::SamePattern_SameRowPerm); } - auto *PtAP = const_cast(dynamic_cast(&op)); - MFEM_VERIFY(PtAP, "SuperLUSolver requires a ParOperator operator!"); - A = std::make_unique(PtAP->ParallelAssemble()); + A = std::make_unique( + const_cast(&op)->ParallelAssemble()); // Set up base class. solver.SetOperator(*A); diff --git a/palace/linalg/superlu.hpp b/palace/linalg/superlu.hpp index 74e857423..e8355f9bb 100644 --- a/palace/linalg/superlu.hpp +++ b/palace/linalg/superlu.hpp @@ -35,7 +35,11 @@ class SuperLUSolver : public mfem::Solver { } - void SetOperator(const Operator &op) override; + void SetOperator(const Operator &op) override + { + MFEM_ABORT("SuperLUSolver requires a ParOperator operator!"); + } + void SetOperator(const ParOperator &op); void Mult(const Vector &x, Vector &y) const override { solver.Mult(x, y); } void ArrayMult(const mfem::Array &X, diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp index 468c9c755..a50c3d967 100644 --- a/palace/models/domainpostoperator.cpp +++ b/palace/models/domainpostoperator.cpp @@ -12,146 +12,13 @@ namespace palace { -DomainPostOperatorMF::DomainPostOperatorMF(const IoData &iodata, - const MaterialOperator &mat, - mfem::ParFiniteElementSpace &h1_fespace) - : mat_op(mat), ones(&h1_fespace) -{ - // Define a constant 1 function on the scalar finite element space for computing volume - // integrals. - ones.mfem::Vector::operator=(1.0); - - // Use the provided domain postprocessing indices to group for postprocessing bulk - // dielectric loss. - for (const auto &[idx, data] : iodata.domains.postpro.dielectric) - { - mfem::Array &attr_marker = - attr_markers.emplace(idx, h1_fespace.GetParMesh()->attributes.Max()).first->second; - attr_marker = 0; - for (auto attr : data.attributes) - { - attr_marker[attr - 1] = 1; - } - } -} - -double -DomainPostOperatorMF::GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E) const -{ - // Compute the electric field energy integral as: E_elec = 1/2 Re{∫_Ω Dᴴ E dV}. - std::map dummy_l2s; - EnergyDensityCoefficient - ue_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(ue_func); -} - -double DomainPostOperatorMF::GetElectricFieldEnergy(const mfem::ParGridFunction &E) const -{ - std::map dummy_l2s; - EnergyDensityCoefficient - ue_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(ue_func); -} - -double -DomainPostOperatorMF::GetMagneticFieldEnergy(const mfem::ParComplexGridFunction &B) const -{ - // Compute the magnetic field energy integral as: E_mag = 1/2 Re{∫_Ω Bᴴ H dV}. - std::map dummy_l2s; - EnergyDensityCoefficient - um_func(B, mat_op, dummy_l2s); - return GetVolumeIntegral(um_func); -} - -double DomainPostOperatorMF::GetMagneticFieldEnergy(const mfem::ParGridFunction &B) const -{ - std::map dummy_l2s; - EnergyDensityCoefficient - um_func(B, mat_op, dummy_l2s); - return GetVolumeIntegral(um_func); -} - -double DomainPostOperatorMF::GetDomainElectricFieldEnergy( - int idx, const mfem::ParComplexGridFunction &E) const -{ - // Compute the electric field energy integral for only a portion of the domain. - auto it = attr_markers.find(idx); - MFEM_VERIFY(it != attr_markers.end(), - "Invalid domain index when postprocessing bulk dielectric loss!"); - std::map dummy_l2s; - EnergyDensityCoefficient - ue_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(ue_func, it->second); -} - -double -DomainPostOperatorMF::GetDomainElectricFieldEnergy(int idx, - const mfem::ParGridFunction &E) const -{ - auto it = attr_markers.find(idx); - MFEM_VERIFY(it != attr_markers.end(), - "Invalid domain index when postprocessing bulk dielectric loss!"); - std::map dummy_l2s; - EnergyDensityCoefficient - ue_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(ue_func, it->second); -} - -double DomainPostOperatorMF::GetDomainElectricFieldEnergyLoss( - int idx, const mfem::ParComplexGridFunction &E) const -{ - // Compute the electric field energy integral for only a portion of the domain. - auto it = attr_markers.find(idx); - MFEM_VERIFY(it != attr_markers.end(), - "Invalid domain index when postprocessing bulk dielectric loss!"); - std::map dummy_l2s; - EnergyDensityCoefficient - uei_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(uei_func, it->second); -} - -double -DomainPostOperatorMF::GetDomainElectricFieldEnergyLoss(int idx, - const mfem::ParGridFunction &E) const -{ - auto it = attr_markers.find(idx); - MFEM_VERIFY(it != attr_markers.end(), - "Invalid domain index when postprocessing bulk dielectric loss!"); - std::map dummy_l2s; - EnergyDensityCoefficient - uei_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(uei_func, it->second); -} - -double DomainPostOperatorMF::GetVolumeIntegral(mfem::Coefficient &f) const -{ - // Integrate the coefficient over the entire domain. - mfem::ParLinearForm s(ones.ParFESpace()); - s.AddDomainIntegrator(new DomainLFIntegrator(f)); - s.UseFastAssembly(false); - s.Assemble(); - return s(ones); -} - -double DomainPostOperatorMF::GetVolumeIntegral(mfem::Coefficient &f, - mfem::Array &attr_marker) const -{ - // Integrate the coefficient over the domain attributes making up this domain index. - mfem::ParLinearForm s(ones.ParFESpace()); - s.AddDomainIntegrator(new DomainLFIntegrator(f), attr_marker); - s.UseFastAssembly(false); - s.Assemble(); - return s(ones); -} - DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOperator &mat_op, mfem::ParFiniteElementSpace *nd_fespace, mfem::ParFiniteElementSpace *rt_fespace) - : m0ND(nd_fespace ? std::optional(nd_fespace) : std::nullopt), - m0RT(rt_fespace ? std::optional(rt_fespace) : std::nullopt) + : M_ND(nd_fespace ? std::optional(nd_fespace) : std::nullopt), + M_RT(rt_fespace ? std::optional(rt_fespace) : std::nullopt) { - if (m0ND.has_value()) + if (M_ND.has_value()) { // Construct ND mass matrix to compute the electric field energy integral as: // E_elec = 1/2 Re{∫_Ω Dᴴ E dV} as (M_eps * e)ᴴ e. @@ -160,10 +27,12 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera constexpr MaterialPropertyType MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; constexpr MaterialPropertyType MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; MaterialPropertyCoefficient epsilon_func(mat_op); - m0ND->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); - // m0ND->SetAssemblyLevel(mfem::AssemblyLevel::FULL); - m0ND->Assemble(); - m0ND->Finalize(); + M_ND->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); + // XX TODO: Partial assembly option? + M_ND->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + M_ND->Assemble(0); + M_ND->Finalize(0); + D.SetSize(M_ND->Height()); // Use the provided domain postprocessing indices to group for postprocessing bulk // dielectric loss. @@ -184,40 +53,45 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera epsilon_func_i.AddCoefficient( std::make_unique>(mat_op, -1.0), attr_marker); - auto &m0 = m0NDi.emplace(idx, std::make_pair(nd_fespace, nd_fespace)).first->second; - mfem::ParBilinearForm &m0r = m0.first; - mfem::ParBilinearForm &m0i = m0.second; - m0r.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func_r)); - m0i.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func_i)); - // m0r.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - // m0i.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - m0r.Assemble(); - m0i.Assemble(); - m0r.Finalize(); - m0i.Finalize(); + auto &M = M_NDi.emplace(idx, std::make_pair(nd_fespace, nd_fespace)).first->second; + mfem::BilinearForm &Mr = M.first; + mfem::BilinearForm &Mi = M.second; + Mr.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func_r)); + Mi.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func_i)); + // XX TODO: Partial assembly option? + Mr.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + Mi.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + Mr.Assemble(); + Mi.Assemble(); + Mr.Finalize(); + Mi.Finalize(); } } - if (m0RT.has_value()) + if (M_RT.has_value()) { // Construct RT mass matrix to compute the magnetic field energy integral as: // E_mag = 1/2 Re{∫_Ω Bᴴ H dV} as (M_muinv * b)ᴴ b. constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; MaterialPropertyCoefficient muinv_func(mat_op); - m0RT->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func)); - // m0RT->SetAssemblyLevel(mfem::AssemblyLevel::FULL); - m0RT->Assemble(); - m0RT->Finalize(); + M_RT->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func)); + // XX TODO: Partial assembly option? + M_RT->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + M_RT->Assemble(0); + M_RT->Finalize(0); + H.SetSize(M_RT->Height()); } } double DomainPostOperator::GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E) const { - if (m0ND.has_value()) + if (M_ND.has_value()) { - double res = m0ND->InnerProduct(E.real(), E.real()); - res += m0ND->InnerProduct(E.imag(), E.imag()); + M_ND->Mult(E.real(), D); + double res = mfem::InnerProduct(E.real(), D); + M_ND->Mult(E.imag(), D); + res += mfem::InnerProduct(E.imag(), D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -228,9 +102,10 @@ DomainPostOperator::GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E double DomainPostOperator::GetElectricFieldEnergy(const mfem::ParGridFunction &E) const { - if (m0ND.has_value()) + if (M_ND.has_value()) { - double res = m0ND->InnerProduct(E, E); + M_ND->Mult(E, D); + double res = mfem::InnerProduct(E, D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -242,10 +117,12 @@ double DomainPostOperator::GetElectricFieldEnergy(const mfem::ParGridFunction &E double DomainPostOperator::GetMagneticFieldEnergy(const mfem::ParComplexGridFunction &B) const { - if (m0RT.has_value()) + if (M_RT.has_value()) { - double res = m0RT->InnerProduct(B.real(), B.real()); - res += m0RT->InnerProduct(B.imag(), B.imag()); + M_RT->Mult(B.real(), H); + double res = mfem::InnerProduct(B.real(), H); + M_RT->Mult(B.imag(), H); + res += mfem::InnerProduct(B.imag(), H); Mpi::GlobalSum(1, &res, B.ParFESpace()->GetComm()); return 0.5 * res; } @@ -256,9 +133,10 @@ DomainPostOperator::GetMagneticFieldEnergy(const mfem::ParComplexGridFunction &B double DomainPostOperator::GetMagneticFieldEnergy(const mfem::ParGridFunction &B) const { - if (m0RT.has_value()) + if (M_RT.has_value()) { - double res = m0RT->InnerProduct(B, B); + M_RT->Mult(B, H); + double res = mfem::InnerProduct(B, H); Mpi::GlobalSum(1, &res, B.ParFESpace()->GetComm()); return 0.5 * res; } @@ -271,11 +149,13 @@ double DomainPostOperator::GetDomainElectricFieldEnergy( int idx, const mfem::ParComplexGridFunction &E) const { // Compute the electric field energy integral for only a portion of the domain. - auto it = m0NDi.find(idx); - MFEM_VERIFY(it != m0NDi.end(), + auto it = M_NDi.find(idx); + MFEM_VERIFY(it != M_NDi.end(), "Invalid domain index when postprocessing bulk dielectric loss!"); - double res = it->second.first.InnerProduct(E.real(), E.real()); - res += it->second.first.InnerProduct(E.imag(), E.imag()); + it->second.first.Mult(E.real(), D); + double res = mfem::InnerProduct(E.real(), D); + it->second.first.Mult(E.imag(), D); + res += mfem::InnerProduct(E.imag(), D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -284,10 +164,11 @@ double DomainPostOperator::GetDomainElectricFieldEnergy(int idx, const mfem::ParGridFunction &E) const { - auto it = m0NDi.find(idx); - MFEM_VERIFY(it != m0NDi.end(), + auto it = M_NDi.find(idx); + MFEM_VERIFY(it != M_NDi.end(), "Invalid domain index when postprocessing bulk dielectric loss!"); - double res = it->second.first.InnerProduct(E, E); + it->second.first.Mult(E, D); + double res = mfem::InnerProduct(E, D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -296,11 +177,13 @@ double DomainPostOperator::GetDomainElectricFieldEnergyLoss( int idx, const mfem::ParComplexGridFunction &E) const { // Compute the electric field energy integral for only a portion of the domain. - auto it = m0NDi.find(idx); - MFEM_VERIFY(it != m0NDi.end(), + auto it = M_NDi.find(idx); + MFEM_VERIFY(it != M_NDi.end(), "Invalid domain index when postprocessing bulk dielectric loss!"); - double res = it->second.second.InnerProduct(E.real(), E.real()); - res += it->second.second.InnerProduct(E.imag(), E.imag()); + it->second.second.Mult(E.real(), D); + double res = mfem::InnerProduct(E.real(), D); + it->second.second.Mult(E.imag(), D); + res += mfem::InnerProduct(E.imag(), D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -309,10 +192,11 @@ double DomainPostOperator::GetDomainElectricFieldEnergyLoss(int idx, const mfem::ParGridFunction &E) const { - auto it = m0NDi.find(idx); - MFEM_VERIFY(it != m0NDi.end(), + auto it = M_NDi.find(idx); + MFEM_VERIFY(it != M_NDi.end(), "Invalid domain index when postprocessing bulk dielectric loss!"); - double res = it->second.second.InnerProduct(E, E); + it->second.second.Mult(E, D); + double res = mfem::InnerProduct(E, D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } diff --git a/palace/models/domainpostoperator.hpp b/palace/models/domainpostoperator.hpp index 2af2f9f15..e017b6f9f 100644 --- a/palace/models/domainpostoperator.hpp +++ b/palace/models/domainpostoperator.hpp @@ -8,6 +8,7 @@ #include #include #include +#include "linalg/vector.hpp" namespace palace { @@ -15,45 +16,6 @@ namespace palace class IoData; class MaterialOperator; -// -// A class handling domain postprocessing (matrix-free). -// -class DomainPostOperatorMF -{ -private: - // Reference to material property operator (not owned). - const MaterialOperator &mat_op; - - // Unit function used for computing volume integrals. - mfem::ParGridFunction ones; - - // Mapping from domain index to marker and loss tangent for postprocessing bulk dielectic - // loss. - mutable std::map> attr_markers; - - double GetVolumeIntegral(mfem::Coefficient &f) const; - double GetVolumeIntegral(mfem::Coefficient &f, mfem::Array &attr_marker) const; - -public: - DomainPostOperatorMF(const IoData &iodata, const MaterialOperator &mat, - mfem::ParFiniteElementSpace &h1_fespace); - - // Access underlying bulk loss postprocessing data structures (for keys). - const auto &GetEps() const { return attr_markers; } - auto SizeEps() const { return attr_markers.size(); } - - // Get volume integrals computing bulk electric or magnetic field energy. - double GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E) const; - double GetElectricFieldEnergy(const mfem::ParGridFunction &E) const; - double GetMagneticFieldEnergy(const mfem::ParComplexGridFunction &B) const; - double GetMagneticFieldEnergy(const mfem::ParGridFunction &B) const; - double GetDomainElectricFieldEnergy(int idx, const mfem::ParComplexGridFunction &E) const; - double GetDomainElectricFieldEnergy(int idx, const mfem::ParGridFunction &E) const; - double GetDomainElectricFieldEnergyLoss(int idx, - const mfem::ParComplexGridFunction &E) const; - double GetDomainElectricFieldEnergyLoss(int idx, const mfem::ParGridFunction &E) const; -}; - // // A class handling domain postprocessing. // @@ -61,8 +23,11 @@ class DomainPostOperator { private: // Bilinear forms for computing field energy integrals over domains. - std::optional m0ND, m0RT; - std::map> m0NDi; + std::optional M_ND, M_RT; + std::map> M_NDi; + + // Temporary vectors for inner product calculations. + mutable Vector D, H; public: DomainPostOperator(const IoData &iodata, const MaterialOperator &mat_op, @@ -70,8 +35,8 @@ class DomainPostOperator mfem::ParFiniteElementSpace *rt_fespace); // Access underlying bulk loss postprocessing data structures (for keys). - const auto &GetEps() const { return m0NDi; } - auto SizeEps() const { return m0NDi.size(); } + const auto &GetEps() const { return M_NDi; } + auto SizeEps() const { return M_NDi.size(); } // Get volume integrals computing bulk electric or magnetic field energy. double GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E) const; diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp index 79473acb0..d6747737e 100644 --- a/palace/models/spaceoperator.hpp +++ b/palace/models/spaceoperator.hpp @@ -88,6 +88,16 @@ class SpaceOperator SpaceOperator(const IoData &iodata, const std::vector> &mesh); + // Return list of all PEC boundary true dofs for all finite element space levels. + const std::vector> &GetNDDbcTDofLists() const + { + return nd_dbc_tdof_lists; + } + const std::vector> &GetH1DbcTDofLists() const + { + return h1_dbc_tdof_lists; + } + // Returns lists of all boundary condition true dofs, PEC included, for the auxiliary // H1 space hierarchy. These are all boundaries which affect the stiffness and damping // (K and C) matrices, used for nullspace corrections. @@ -127,7 +137,7 @@ class SpaceOperator }; std::unique_ptr GetSystemMatrix(OperatorType type, - mfem::Operator::DiagonalPolicy diag_policy); + Operator::DiagonalPolicy diag_policy); std::unique_ptr GetComplexSystemMatrix(OperatorType type, Operator::DiagonalPolicy diag_policy) { @@ -135,7 +145,7 @@ class SpaceOperator } std::unique_ptr GetComplexSystemMatrix(OperatorType type, double omega, - mfem::Operator::DiagonalPolicy diag_policy); + Operator::DiagonalPolicy diag_policy); // Construct the complete frequency or time domain system matrix using the provided // stiffness, damping, mass, and extra matrices: diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index 7814092cf..d0cbe5330 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -173,6 +173,8 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera print](double a0, double a1) -> std::unique_ptr { + // XX TODO WORKING ON MONDAY!! + // Configure the system matrix and also the matrix (matrices) from which the // preconditioner will be constructed. this->A = spaceop.GetSystemMatrix(a0, a1, 1.0, this->K.get(), this->C.get(), diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index e90cd33bb..20228d7f7 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -323,9 +323,9 @@ class BdrHVectorCoefficient : public mfem::VectorCoefficient }; WavePortData::WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op, - const mfem::Array &dbc_marker, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace) + mfem::ParFiniteElementSpace &h1_fespace, + const mfem::Array &dbc_marker) { excitation = data.excitation; mode_idx = data.mode_idx; @@ -341,7 +341,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera // field by Eₜ = eₜ/kₙ and Eₙ = ieₙ. This is solved on the global mesh so the result is a // grid function over the entire space, not just the port boundary (so that it can be // queried from functions which use the global mesh). - GetTrueDofs(dbc_marker, nd_fespace, h1_fespace, nd_attr_tdof_list, h1_attr_tdof_list); + GetTrueDofs(nd_fespace, h1_fespace, dbc_marker, nd_attr_tdof_list, h1_attr_tdof_list); // Construct the system matrices. We will actually solve the shifted problem: // [Bₜₜ Bₜₙ] [eₜ] = λ [Bₜₜ + 1/Θ² Aₜₜ Bₜₙ] [eₜ] @@ -485,9 +485,9 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera } } -void WavePortData::GetTrueDofs(const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpace &nd_fespace, +void WavePortData::GetTrueDofs(mfem::ParFiniteElementSpace &nd_fespace, mfem::ParFiniteElementSpace &h1_fespace, + const mfem::Array &dbc_marker, mfem::Array &nd_tdof_list, mfem::Array &h1_tdof_list) { @@ -850,7 +850,7 @@ void WavePortOperator::SetUpBoundaryProperties(const IoData &iodata, // Set up wave port data structures. for (const auto &[idx, data] : iodata.boundaries.waveport) { - ports.try_emplace(idx, data, mat_op, dbc_marker, nd_fespace, h1_fespace); + ports.try_emplace(idx, data, mat_op, nd_fespace, h1_fespace, dbc_marker); } MFEM_VERIFY( ports.empty() || iodata.problem.type == config::ProblemData::Type::DRIVEN, diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp index ac6e46d9f..1ea469074 100644 --- a/palace/models/waveportoperator.hpp +++ b/palace/models/waveportoperator.hpp @@ -66,9 +66,9 @@ class WavePortData // std::unique_ptr ksp; // Helper function to get true degrees of freedom on the port. - void GetTrueDofs(const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, mfem::Array &nd_tdof_list, + void GetTrueDofs(mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace, + const mfem::Array &dbc_marker, mfem::Array &nd_tdof_list, mfem::Array &h1_tdof_list); // Configure and solve the linear eigenvalue problem for the boundary mode. @@ -78,8 +78,8 @@ class WavePortData public: WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op, - const mfem::Array &dbc_marker, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace); + mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace, const mfem::Array &dbc_marker); const mfem::Array &GetMarker() const { return attr_marker; } mfem::Array &GetMarker() { return attr_marker; } From cca2d8757945064e22ad5db5fc3a3bfe66cc1408 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Sun, 14 May 2023 18:24:08 -0700 Subject: [PATCH 04/41] Integrate new linear algebra to real-valued simulation types: electrostatic, magnetostatic, transient --- palace/drivers/basesolver.cpp | 7 +- palace/drivers/basesolver.hpp | 3 +- palace/drivers/drivensolver.cpp | 1 - palace/drivers/eigensolver.cpp | 1 - palace/drivers/electrostaticsolver.cpp | 81 +-- palace/drivers/magnetostaticsolver.cpp | 82 +-- palace/drivers/transientsolver.cpp | 6 +- palace/fem/multigrid.hpp | 2 + palace/linalg/distrelaxation.hpp | 124 ---- palace/linalg/hypre.cpp | 856 +------------------------ palace/linalg/hypre.hpp | 56 +- palace/linalg/ksp.cpp | 458 ++----------- palace/linalg/ksp.hpp | 151 +---- palace/linalg/pc.cpp | 217 ------- palace/linalg/pc.hpp | 69 -- palace/models/romoperator.hpp | 1 - palace/models/timeoperator.cpp | 213 +----- palace/models/timeoperator.hpp | 11 +- 18 files changed, 149 insertions(+), 2190 deletions(-) delete mode 100644 palace/linalg/pc.cpp delete mode 100644 palace/linalg/pc.hpp diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp index 07b98f7f3..e07917e84 100644 --- a/palace/drivers/basesolver.cpp +++ b/palace/drivers/basesolver.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "linalg/ksp.hpp" #include "models/domainpostoperator.hpp" #include "models/postoperator.hpp" #include "models/surfacepostoperator.hpp" @@ -100,7 +101,7 @@ void BaseSolver::SaveMetadata(const mfem::ParFiniteElementSpace &fespace) const } } -void BaseSolver::SaveMetadata(int ksp_mult, int ksp_it) const +void BaseSolver::SaveMetadata(const KspSolver &ksp) const { if (post_dir.length() == 0) { @@ -109,8 +110,8 @@ void BaseSolver::SaveMetadata(int ksp_mult, int ksp_it) const if (root) { json meta = LoadMetadata(post_dir); - meta["LinearSolver"]["TotalSolves"] = ksp_mult; - meta["LinearSolver"]["TotalIts"] = ksp_it; + meta["LinearSolver"]["TotalSolves"] = ksp.NumTotalMult(); + meta["LinearSolver"]["TotalIts"] = ksp.NumTotalMultIter(); WriteMetadata(post_dir, meta); } } diff --git a/palace/drivers/basesolver.hpp b/palace/drivers/basesolver.hpp index 11efabd93..444ca5778 100644 --- a/palace/drivers/basesolver.hpp +++ b/palace/drivers/basesolver.hpp @@ -21,6 +21,7 @@ namespace palace { class IoData; +class KspSolver; class PostOperator; class Timer; @@ -80,7 +81,7 @@ class BaseSolver // These methods write different simulation metadata to a JSON file in post_dir. void SaveMetadata(const mfem::ParFiniteElementSpace &fespace) const; - void SaveMetadata(int ksp_mult, int ksp_it) const; + void SaveMetadata(const KspSolver &ksp) const; void SaveMetadata(const Timer &timer) const; }; diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index baff3b1aa..4a1a51227 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -8,7 +8,6 @@ #include "linalg/complex.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" -#include "linalg/pc.hpp" #include "linalg/petsc.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 8f73a1b8f..16a776b5a 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -10,7 +10,6 @@ #include "linalg/feast.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" -#include "linalg/pc.hpp" #include "linalg/petsc.hpp" #include "linalg/slepc.hpp" #include "models/lumpedportoperator.hpp" diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp index 4f6cbca15..111b90798 100644 --- a/palace/drivers/electrostaticsolver.cpp +++ b/palace/drivers/electrostaticsolver.cpp @@ -4,17 +4,14 @@ #include "electrostaticsolver.hpp" #include -#include "linalg/gmg.hpp" +#include "linalg/ksp.hpp" #include "linalg/operator.hpp" -#include "linalg/pc.hpp" #include "models/laplaceoperator.hpp" #include "models/postoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" #include "utils/timer.hpp" -// XX TODO WORKING FOR MONDAY! - namespace palace { @@ -31,46 +28,9 @@ void ElectrostaticSolver::Solve(std::vector> &mes laplaceop.GetStiffnessMatrix(K); SaveMetadata(laplaceop.GetH1Space()); - // XX TODO REVISIT BELOW... - - // // Set up the linear solver. - // std::unique_ptr pc = - // ConfigurePreconditioner(iodata, laplaceop.GetDbcMarker(), laplaceop.GetH1Spaces()); - // auto *gmg = dynamic_cast(pc.get()); - // if (gmg) - // { - // gmg->SetOperator(K); - // } - // else - // { - // pc->SetOperator(*K.back()); - // } - - // mfem::IterativeSolver::PrintLevel print = - // mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - // if (iodata.problem.verbose > 0) - // { - // print.Summary(); - // if (iodata.problem.verbose > 1) - // { - // print.Iterations(); - // if (iodata.problem.verbose > 2) - // { - // print.All(); - // } - // } - // } - // mfem::CGSolver pcg(mesh.back()->GetComm()); - // pcg.SetRelTol(iodata.solver.linear.tol); - // pcg.SetMaxIter(iodata.solver.linear.max_it); - // pcg.SetPrintLevel(print); - // pcg.SetOperator(*K.back()); // Call before SetPreconditioner, PC operator set - // separately pcg.SetPreconditioner(*pc); if (iodata.solver.linear.ksp_type != - // config::LinearSolverData::KspType::DEFAULT && - // iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) - // { - // Mpi::Warning("Electrostatic problem type always uses CG as the Krylov solver!\n"); - // } + // Set up the linear solver. + KspSolver ksp(iodata, laplaceop.GetH1Spaces()); + ksp.SetOperator(*K.back(), K); // Terminal indices are the set of boundaries over which to compute the capacitance // matrix. Terminal boundaries are aliases for ports. @@ -93,28 +53,19 @@ void ElectrostaticSolver::Solve(std::vector> &mes Mpi::Print("\nIt {:d}/{:d}: Index = {:d} (elapsed time = {:.2e} s)\n", step + 1, nstep, idx, Timer::Duration(timer.Now() - t0).count()); - // // Form and solve the linear system for a prescribed nonzero voltage on the - // specified - // // terminal. - // Mpi::Print("\n"); - // V[step].SetSize(RHS.Size()); - // laplaceop.GetExcitationVector(idx, *K.back(), *Ke.back(), V[step], RHS); - // timer.construct_time += timer.Lap(); + // Form and solve the linear system for a prescribed nonzero voltage on the specified + // terminal. + Mpi::Print("\n"); + laplaceop.GetExcitationVector(idx, *K.back(), V[step], RHS); + timer.construct_time += timer.Lap(); - // pcg.Mult(RHS, V[step]); - // if (!pcg.GetConverged()) - // { - // Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - // pcg.GetNumIterations()); - // } - // ksp_it += pcg.GetNumIterations(); - // timer.solve_time += timer.Lap(); + ksp.Mult(RHS, V[step]); + timer.solve_time += timer.Lap(); - // // V[step]->Print(); - // Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n", - // std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), V[step], V[step])), - // std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), RHS, RHS))); - // timer.postpro_time += timer.Lap(); + Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n", + linalg::Norml2(K.back()->GetComm(), V[step]), + linalg::Norml2(K.back()->GetComm(), RHS)); + timer.postpro_time += timer.Lap(); // Next terminal. step++; @@ -122,7 +73,7 @@ void ElectrostaticSolver::Solve(std::vector> &mes // Postprocess the capacitance matrix from the computed field solutions. const auto io_time_prev = timer.io_time; - SaveMetadata(nstep, ksp_it); + SaveMetadata(ksp); Postprocess(laplaceop, postop, V, timer); timer.postpro_time += timer.Lap() - (timer.io_time - io_time_prev); } diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp index fc8118751..b16609bb0 100644 --- a/palace/drivers/magnetostaticsolver.cpp +++ b/palace/drivers/magnetostaticsolver.cpp @@ -4,9 +4,8 @@ #include "magnetostaticsolver.hpp" #include -#include "linalg/gmg.hpp" +#include "linalg/ksp.hpp" #include "linalg/operator.hpp" -#include "linalg/pc.hpp" #include "models/curlcurloperator.hpp" #include "models/postoperator.hpp" #include "models/surfacecurrentoperator.hpp" @@ -14,8 +13,6 @@ #include "utils/iodata.hpp" #include "utils/timer.hpp" -// XX TODO WORKING FOR MONDAY! - namespace palace { @@ -31,47 +28,9 @@ void MagnetostaticSolver::Solve(std::vector> &mes curlcurlop.GetStiffnessMatrix(K); SaveMetadata(curlcurlop.GetNDSpace()); - // XX TODO REVISIT BELOW... - - // // Set up the linear solver. - // std::unique_ptr pc = - // ConfigurePreconditioner(iodata, curlcurlop.GetDbcMarker(), - // curlcurlop.GetNDSpaces(), &curlcurlop.GetH1Spaces()); - // auto *gmg = dynamic_cast(pc.get()); - // if (gmg) - // { - // gmg->SetOperator(K); - // } - // else - // { - // pc->SetOperator(*K.back()); - // } - - // mfem::IterativeSolver::PrintLevel print = - // mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - // if (iodata.problem.verbose > 0) - // { - // print.Summary(); - // if (iodata.problem.verbose > 1) - // { - // print.Iterations(); - // if (iodata.problem.verbose > 2) - // { - // print.All(); - // } - // } - // } - // mfem::CGSolver pcg(mesh.back()->GetComm()); - // pcg.SetRelTol(iodata.solver.linear.tol); - // pcg.SetMaxIter(iodata.solver.linear.max_it); - // pcg.SetPrintLevel(print); - // pcg.SetOperator(*K.back()); // Call before SetPreconditioner, PC operator set - // separately pcg.SetPreconditioner(*pc); if (iodata.solver.linear.ksp_type != - // config::LinearSolverData::KspType::DEFAULT && - // iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) - // { - // Mpi::Warning("Magnetostatic problem type always uses CG as the Krylov solver!\n"); - // } + // Set up the linear solver. + KspSolver ksp(iodata, curlcurlop.GetNDSpaces(), &curlcurlop.GetH1Spaces()); + ksp.SetOperator(*K.back(), K); // Terminal indices are the set of boundaries over which to compute the inductance matrix. PostOperator postop(iodata, curlcurlop, "magnetostatic"); @@ -87,32 +46,27 @@ void MagnetostaticSolver::Solve(std::vector> &mes // Main loop over current source boundaries. Mpi::Print("\nComputing magnetostatic fields for {:d} source boundar{}\n", nstep, (nstep > 1) ? "ies" : "y"); - int step = 0, ksp_it = 0; + int step = 0; auto t0 = timer.Now(); for (const auto &[idx, data] : curlcurlop.GetSurfaceCurrentOp()) { Mpi::Print("\nIt {:d}/{:d}: Index = {:d} (elapsed time = {:.2e} s)\n", step + 1, nstep, idx, Timer::Duration(timer.Now() - t0).count()); - // // Form and solve the linear system for a prescribed current on the specified - // source. Mpi::Print("\n"); A[step].SetSize(RHS.Size()); A[step] = 0.0; - // curlcurlop.GetExcitationVector(idx, RHS); - // timer.construct_time += timer.Lap(); + // Form and solve the linear system for a prescribed current on the specified source. + Mpi::Print("\n"); + A[step].SetSize(RHS.Size()); + A[step] = 0.0; + curlcurlop.GetExcitationVector(idx, RHS); + timer.construct_time += timer.Lap(); - // pcg.Mult(RHS, A[step]); - // if (!pcg.GetConverged()) - // { - // Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - // pcg.GetNumIterations()); - // } - // ksp_it += pcg.GetNumIterations(); - // timer.solve_time += timer.Lap(); + ksp.Mult(RHS, A[step]); + timer.solve_time += timer.Lap(); - // // A[step]->Print(); - // Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n", - // std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), A[step], A[step])), - // std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), RHS, RHS))); - // timer.postpro_time += timer.Lap(); + Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n", + linalg::Norml2(K.back()->GetComm(), A[step]), + linalg::Norml2(K.back()->GetComm(), RHS)); + timer.postpro_time += timer.Lap(); // Next source. step++; @@ -120,7 +74,7 @@ void MagnetostaticSolver::Solve(std::vector> &mes // Postprocess the capacitance matrix from the computed field solutions. const auto io_time_prev = timer.io_time; - SaveMetadata(nstep, ksp_it); + SaveMetadata(ksp); Postprocess(curlcurlop, postop, A, timer); timer.postpro_time += timer.Lap() - (timer.io_time - io_time_prev); } diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp index 29e51d9a3..6a33e93dc 100644 --- a/palace/drivers/transientsolver.cpp +++ b/palace/drivers/transientsolver.cpp @@ -105,8 +105,8 @@ void TransientSolver::Solve(std::vector> &mesh, postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp()); Mpi::Print(" Sol. ||E|| = {:.6e}, ||B|| = {:.6e}\n", - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), E, E)), - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), B, B))); + linalg::Norml2(mesh.back()->GetComm(), E), + linalg::Norml2(mesh.back()->GetComm(), B)); if (!iodata.solver.transient.only_port_post) { E_elec = postop.GetEFieldEnergy(); @@ -124,7 +124,7 @@ void TransientSolver::Solve(std::vector> &mesh, // Increment time step. step++; } - SaveMetadata(timeop.GetTotalKspMult(), timeop.GetTotalKspIter()); + SaveMetadata(timeop.GetLinearSolver()); } std::function TransientSolver::GetTimeExcitation(bool dot) const diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp index b35b50f7f..6387b9aae 100644 --- a/palace/fem/multigrid.hpp +++ b/palace/fem/multigrid.hpp @@ -86,6 +86,8 @@ mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy( } mfem::ParFiniteElementSpaceHierarchy fespaces(mesh[0].get(), fespace, false, true); + // XX TODO: LibCEED transfer operators! + // h-refinement for (std::size_t l = 1; l < mesh.size(); l++) { diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index 823ea816b..a4e8a338c 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -53,130 +53,6 @@ class DistRelaxationSmoother : public mfem::Solver void Mult(const Vector &x, Vector &y) const override; void MultTranspose(const Vector &x, Vector &y) const override; - - // //XX TODO REMOVE... - // void Mult(const mfem::Vector &x, mfem::Vector &y) const override - // { - // mfem::Array X(1); - // mfem::Array Y(1); - // X[0] = &x; - // Y[0] = &y; - // ArrayMult(X, Y); - // } - - // void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - // { - // mfem::Array X(1); - // mfem::Array Y(1); - // X[0] = &x; - // Y[0] = &y; - // ArrayMultTranspose(X, Y); - // } - - // void ArrayMult(const mfem::Array &X, - // mfem::Array &Y) const override - // { - // // Initialize. - // const int nrhs = X.Size(); - // mfem::Array R(nrhs), X_G(nrhs), Y_G(nrhs); - // std::vector rrefs(nrhs), xgrefs(nrhs), ygrefs(nrhs); - // if (nrhs * height != r.Size()) - // { - // r.SetSize(nrhs * height); - // x_G.SetSize(nrhs * A_G->Height()); - // y_G.SetSize(nrhs * A_G->Height()); - // } - // for (int j = 0; j < nrhs; j++) - // { - // rrefs[j].MakeRef(r, j * height, height); - // xgrefs[j].MakeRef(x_G, j * A_G->Height(), A_G->Height()); - // ygrefs[j].MakeRef(y_G, j * A_G->Height(), A_G->Height()); - // R[j] = &rrefs[j]; - // X_G[j] = &xgrefs[j]; - // Y_G[j] = &ygrefs[j]; - // } - - // // Apply smoother. - // for (int it = 0; it < pc_it; it++) - // { - // // y = y + B (x - A y) - // B->iterative_mode = (iterative_mode || it > 0); - // B->ArrayMult(X, Y); - - // // y = y + G B_G Gᵀ (x - A y) - // A->ArrayMult(Y, R); - // for (int j = 0; j < nrhs; j++) - // { - // subtract(*X[j], *R[j], *R[j]); - // } - // G->ArrayMultTranspose(R, X_G); - // for (int j = 0; j < nrhs; j++) - // { - // X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - // } - // B_G->ArrayMult(X_G, Y_G); - // G->ArrayAddMult(Y_G, Y, 1.0); - // } - // } - - // void ArrayMultTranspose(const mfem::Array &X, - // mfem::Array &Y) const override - // { - // // Initialize. - // const int nrhs = X.Size(); - // mfem::Array R(nrhs), X_G(nrhs), Y_G(nrhs); - // std::vector rrefs(nrhs), xgrefs(nrhs), ygrefs(nrhs); - // if (nrhs * height != r.Size()) - // { - // r.SetSize(nrhs * height); - // x_G.SetSize(nrhs * A_G->Height()); - // y_G.SetSize(nrhs * A_G->Height()); - // } - // for (int j = 0; j < nrhs; j++) - // { - // rrefs[j].MakeRef(r, j * height, height); - // xgrefs[j].MakeRef(x_G, j * A_G->Height(), A_G->Height()); - // ygrefs[j].MakeRef(y_G, j * A_G->Height(), A_G->Height()); - // R[j] = &rrefs[j]; - // X_G[j] = &xgrefs[j]; - // Y_G[j] = &ygrefs[j]; - // } - - // // Apply transpose. - // B->iterative_mode = true; - // for (int it = 0; it < pc_it; it++) - // { - // // y = y + G B_Gᵀ Gᵀ (x - A y) - // if (iterative_mode || it > 0) - // { - // A->ArrayMult(Y, R); - // for (int j = 0; j < nrhs; j++) - // { - // subtract(*X[j], *R[j], *R[j]); - // } - // G->ArrayMultTranspose(R, X_G); - // for (int j = 0; j < nrhs; j++) - // { - // X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - // } - // B_G->ArrayMultTranspose(X_G, Y_G); - // G->ArrayAddMult(Y_G, Y, 1.0); - // } - // else - // { - // G->ArrayMultTranspose(X, X_G); - // for (int j = 0; j < nrhs; j++) - // { - // X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - // } - // B_G->ArrayMultTranspose(X_G, Y_G); - // G->ArrayMult(Y_G, Y); - // } - - // // y = y + Bᵀ (x - A y) - // B->ArrayMultTranspose(X, Y); - // } - // } }; } // namespace palace diff --git a/palace/linalg/hypre.cpp b/palace/linalg/hypre.cpp index 4a2167051..ec918718d 100644 --- a/palace/linalg/hypre.cpp +++ b/palace/linalg/hypre.cpp @@ -3,14 +3,12 @@ #include "hypre.hpp" -#include - namespace mfem { mfem::HypreParMatrix * HypreParMatrixFromBlocks(mfem::Array2D &blocks, - mfem::Array2D *blockCoeff) + mfem::Array2D *coeff) { mfem::Array2D blocks_without_const(blocks.NumRows(), blocks.NumCols()); @@ -21,857 +19,7 @@ HypreParMatrixFromBlocks(mfem::Array2D &blocks, blocks_without_const(i, j) = const_cast(blocks(i, j)); } } - return HypreParMatrixFromBlocks(blocks_without_const, blockCoeff); + return HypreParMatrixFromBlocks(blocks_without_const, coeff); } } // namespace mfem - -namespace palace::hypre -{ - -void hypreParCSREliminateRowsCols(hypre_ParCSRMatrix *A, const mfem::Array &rows_cols, - hypre::DiagonalPolicy diag_policy, HYPRE_Complex diag, - bool ignore_rows) -{ - hypre_error_flag = 0; - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag); - HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); - HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); - HYPRE_Int ncols_A_diag = hypre_CSRMatrixNumCols(A_diag); - - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd); - HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); - HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); - HYPRE_Int ncols_A_offd = hypre_CSRMatrixNumCols(A_offd); - - HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A); - HYPRE_Int *marker_offd = nullptr; - - HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A); - HYPRE_Int nrows_local = hypre_CSRMatrixNumRows(A_diag); - - HYPRE_Int i, j, k, nnz_diag, nnz_offd, A_diag_i_i, A_offd_i_i; - - // Get markers for columns of the diagonal and off-diagonal matrix to eliminate - // (from mfem::internal::hypre_ParCSRMatrixEliminateAAe). - HYPRE_Int *eliminate_diag_col, *eliminate_offd_col; - { - hypre_ParCSRCommHandle *comm_handle; - hypre_ParCSRCommPkg *comm_pkg; - HYPRE_Int num_sends, *int_buf_data; - HYPRE_Int index, start; - - eliminate_diag_col = mfem_hypre_CTAlloc_host(HYPRE_Int, ncols_A_diag); - eliminate_offd_col = mfem_hypre_CTAlloc_host(HYPRE_Int, ncols_A_offd); - - // Make sure A has a communication package. - comm_pkg = hypre_ParCSRMatrixCommPkg(A); - if (!comm_pkg) - { - hypre_MatvecCommPkgCreate(A); - comm_pkg = hypre_ParCSRMatrixCommPkg(A); - } - - // Which of the local rows are to be eliminated. - for (i = 0; i < ncols_A_diag; i++) - { - eliminate_diag_col[i] = 0; - } - for (i = 0; i < rows_cols.Size(); i++) - { - eliminate_diag_col[rows_cols[i]] = 1; - } - - // Use a Matvec communication pattern to find (in eliminate_col) which of the local offd - // columns are to be eliminated. - num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - int_buf_data = mfem_hypre_CTAlloc_host( - HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); - index = 0; - for (i = 0; i < num_sends; i++) - { - start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1); j++) - { - k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); - int_buf_data[index++] = eliminate_diag_col[k]; - } - } - comm_handle = - hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_offd_col); - - // Finish the communication. - hypre_ParCSRCommHandleDestroy(comm_handle); - - mfem_hypre_TFree_host(int_buf_data); - } - - marker_offd = hypre_CTAlloc(HYPRE_Int, ncols_A_offd, HYPRE_MEMORY_HOST); - - nnz_diag = nnz_offd = A_diag_i_i = A_offd_i_i = 0; - for (i = 0; i < nrows_local; i++) - { - // Drop eliminated entries in the diagonal block. - for (j = A_diag_i_i; j < A_diag_i[i + 1]; j++) - { - HYPRE_Int col = A_diag_j[j]; - HYPRE_Complex val = A_diag_a[j]; - if ((!ignore_rows && eliminate_diag_col[i]) || eliminate_diag_col[col]) - { - // Always keep the diagonal entry (even if it is 0). - if (!ignore_rows && i == col) - { - if (diag_policy == DiagonalPolicy::USER) - { - val = diag; - } - else if (diag_policy == DiagonalPolicy::ONE) - { - val = 1.0; - } - else if (diag_policy == DiagonalPolicy::ZERO) - { - val = 0.0; - } - // else (diag_policy == DiagonalPolicy::KEEP) - } - else - { - continue; - } - } - A_diag_j[nnz_diag] = col; - A_diag_a[nnz_diag] = val; - nnz_diag++; - } - - // Drop eliminated entries in the off-diagonal block. - for (j = A_offd_i_i; j < A_offd_i[i + 1]; j++) - { - HYPRE_Int col = A_offd_j[j]; - HYPRE_Complex val = A_offd_a[j]; - if ((!ignore_rows && eliminate_diag_col[i]) || eliminate_offd_col[col]) - { - // In normal cases: diagonal entry should not appear in A_offd (but this can still - // be possible). - if (!ignore_rows && i + first_row == col_map_offd_A[col]) - { - if (diag_policy == DiagonalPolicy::USER) - { - val = diag; - } - else if (diag_policy == DiagonalPolicy::ONE) - { - val = 1.0; - } - else if (diag_policy == DiagonalPolicy::ZERO) - { - val = 0.0; - } - // else (diag_policy == DiagonalPolicy::KEEP) - } - else - { - continue; - } - } - if (marker_offd[col] == 0) - { - marker_offd[col] = 1; - } - A_offd_j[nnz_offd] = col; - A_offd_a[nnz_offd] = val; - nnz_offd++; - } - A_diag_i_i = A_diag_i[i + 1]; - A_offd_i_i = A_offd_i[i + 1]; - A_diag_i[i + 1] = nnz_diag; - A_offd_i[i + 1] = nnz_offd; - } - - mfem_hypre_TFree_host(eliminate_offd_col); - mfem_hypre_TFree_host(eliminate_diag_col); - - hypre_CSRMatrixNumNonzeros(A_diag) = nnz_diag; - hypre_CSRMatrixNumNonzeros(A_offd) = nnz_offd; - hypre_ParCSRMatrixSetNumNonzeros(A); - hypre_ParCSRMatrixDNumNonzeros(A) = (HYPRE_Real)hypre_ParCSRMatrixNumNonzeros(A); - - for (i = 0, k = 0; i < ncols_A_offd; i++) - { - if (marker_offd[i]) - { - col_map_offd_A[k] = col_map_offd_A[i]; - marker_offd[i] = k++; - } - } - hypre_CSRMatrixNumCols(A_offd) = k; // ncols_A_offd = k - for (i = 0; i < nnz_offd; i++) - { - A_offd_j[i] = marker_offd[A_offd_j[i]]; - } - - hypre_TFree(marker_offd, HYPRE_MEMORY_HOST); - - if (hypre_ParCSRMatrixCommPkg(A)) - { - hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(A)); - } - hypre_MatvecCommPkgCreate(A); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -void hypreParCSREliminateRowsColsv2(hypre_ParCSRMatrix *A, - const mfem::Array &rows_cols, - hypre::DiagonalPolicy diag_policy, HYPRE_Complex diag, - bool ignore_rows) -{ - hypre_error_flag = 0; - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Int ncols_A_diag = hypre_CSRMatrixNumRows(A_diag); - - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Int ncols_A_offd = hypre_CSRMatrixNumCols(A_offd); - - const auto n_ess_dofs = rows_cols.Size(); - const auto ess_dofs_d = - rows_cols.GetMemory().Read(mfem::GetHypreMemoryClass(), n_ess_dofs); - - // Start communication to figure out which columns need to be eliminated in the - // off-diagonal block. - hypre_ParCSRCommHandle *comm_handle; - HYPRE_Int *int_buf_data, *eliminate_row, *eliminate_col; - { - eliminate_row = mfem_hypre_CTAlloc(HYPRE_Int, ncols_A_diag); - eliminate_col = mfem_hypre_CTAlloc(HYPRE_Int, ncols_A_offd); - - // Make sure A has a communication package. - hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); - if (!comm_pkg) - { - hypre_MatvecCommPkgCreate(A); - comm_pkg = hypre_ParCSRMatrixCommPkg(A); - } - - // Which of the local rows are to be eliminated? - MFEM_HYPRE_FORALL(i, ncols_A_diag, { eliminate_row[i] = 0; }); - MFEM_HYPRE_FORALL(i, n_ess_dofs, { eliminate_row[ess_dofs_d[i]] = 1; }); - - // Use a matvec communication pattern to find (in eliminate_col) which of the local offd - // columns are to be eliminated. - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int int_buf_sz = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - int_buf_data = mfem_hypre_CTAlloc(HYPRE_Int, int_buf_sz); - - HYPRE_Int *send_map_elmts; -#if defined(HYPRE_USING_GPU) - hypre_ParCSRCommPkgCopySendMapElmtsToDevice(comm_pkg); - send_map_elmts = hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg); -#else - send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); -#endif - MFEM_HYPRE_FORALL(i, int_buf_sz, { - auto k = send_map_elmts[i]; - int_buf_data[i] = eliminate_row[k]; - }); - -#if defined(HYPRE_USING_GPU) - // Try to use device-aware MPI for the communication if available. - comm_handle = - hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, - HYPRE_MEMORY_DEVICE, eliminate_col); -#else - comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_col); -#endif - } - - // Eliminate rows and columns in the diagonal block. - if (ignore_rows) - { - const auto nrows_A_diag = hypre_CSRMatrixNumRows(A_diag); - const auto I = A_diag->i; - const auto J = A_diag->j; - auto data = A_diag->data; - MFEM_HYPRE_FORALL(i, nrows_A_diag, { - for (auto j = I[i]; j < I[i + 1]; j++) - { - data[j] *= 1 - eliminate_row[J[j]]; - } - }); - } - else - { - const auto I = A_diag->i; - const auto J = A_diag->j; - auto data = A_diag->data; - MFEM_HYPRE_FORALL(i, n_ess_dofs, { - const auto idof = ess_dofs_d[i]; - for (auto j = I[idof]; j < I[idof + 1]; j++) - { - const auto jdof = J[j]; - if (jdof == idof) - { - if (diag_policy == DiagonalPolicy::USER) - { - data[j] = diag; - } - else if (diag_policy == DiagonalPolicy::ONE) - { - data[j] = 1.0; - } - else if (diag_policy == DiagonalPolicy::ZERO) - { - data[j] = 0.0; - } - // else (diag_policy == DiagonalPolicy::KEEP) - } - else - { - data[j] = 0.0; - for (auto k = I[jdof]; k < I[jdof + 1]; k++) - { - if (J[k] == idof) - { - data[k] = 0.0; - break; - } - } - } - } - }); - } - - // Eliminate rows in the off-diagonal block. - if (!ignore_rows) - { - const auto I = A_offd->i; - auto data = A_offd->data; - MFEM_HYPRE_FORALL(i, n_ess_dofs, { - const auto idof = ess_dofs_d[i]; - for (auto j = I[idof]; j < I[idof + 1]; j++) - { - data[j] = 0.0; - } - }); - } - - // Wait for MPI communication to finish. - hypre_ParCSRCommHandleDestroy(comm_handle); - mfem_hypre_TFree(int_buf_data); - mfem_hypre_TFree(eliminate_row); - - // Eliminate columns in the off-diagonal block. - { - const auto nrows_A_offd = hypre_CSRMatrixNumRows(A_offd); - const auto I = A_offd->i; - const auto J = A_offd->j; - auto data = A_offd->data; - MFEM_HYPRE_FORALL(i, nrows_A_offd, { - for (auto j = I[i]; j < I[i + 1]; j++) - { - data[j] *= 1 - eliminate_col[J[j]]; - } - }); - } - - mfem_hypre_TFree(eliminate_col); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -hypre_ParCSRMatrix *hypreParCSREliminateRowsWithCols(hypre_ParCSRMatrix *A, - const mfem::Array &rows) -{ - hypre_error_flag = 0; - - HYPRE_Int nrows_local = hypre_ParCSRMatrixNumRows(A); - HYPRE_Int ncols_local = hypre_ParCSRMatrixNumCols(A); - - HYPRE_Int *diag_rows_bc, *offd_rows_bc; - - hypre_ParCSRMatrix *At, *B; - - HYPRE_Int i, j, k; - - diag_rows_bc = mfem_hypre_CTAlloc_host(HYPRE_Int, nrows_local); - - // Which of the local rows are to be eliminated. - for (i = 0; i < rows.Size(); i++) - { - diag_rows_bc[rows[i]] = 1; - } - - hypre_ParCSRMatrixTranspose(A, &At, 1); - hypre_MatvecCommPkgCreate(At); - - // Use a Matvec communication pattern to find which of the rows connected to local columns - // are to be eliminated. - { - hypre_ParCSRCommHandle *comm_handle; - hypre_ParCSRCommPkg *comm_pkg; - HYPRE_Int num_sends, *int_buf_data; - HYPRE_Int index, start; - - offd_rows_bc = mfem_hypre_TAlloc_host( - HYPRE_Int, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(At))); - - comm_pkg = hypre_ParCSRMatrixCommPkg(At); - num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - int_buf_data = mfem_hypre_TAlloc_host( - HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); - index = 0; - for (i = 0; i < num_sends; i++) - { - start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1); j++) - { - k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); - int_buf_data[index++] = diag_rows_bc[k]; - } - } - comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_rows_bc); - - // Finish the communication. - hypre_ParCSRCommHandleDestroy(comm_handle); - - hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST); - } - - // Eliminate the columns of the original matrix (rows in the transposed matrix). - { - hypre_CSRMatrix *Atd = hypre_ParCSRMatrixDiag(At); - HYPRE_Real *AtdA = hypre_CSRMatrixData(Atd); - HYPRE_Int *AtdI = hypre_CSRMatrixI(Atd); - HYPRE_Int *AtdJ = hypre_CSRMatrixJ(Atd); - - hypre_CSRMatrix *Ato = hypre_ParCSRMatrixOffd(At); - HYPRE_Real *AtoA = hypre_CSRMatrixData(Ato); - HYPRE_Int *AtoI = hypre_CSRMatrixI(Ato); - HYPRE_Int *AtoJ = hypre_CSRMatrixJ(Ato); - - HYPRE_Int elim; - - for (i = 0; i < ncols_local; i++) - { - // A column is eliminated if it has a nonzero in an eliminated row. - elim = 0; - for (j = AtdI[i]; j < AtdI[i + 1]; j++) - { - if (diag_rows_bc[AtdJ[j]]) - { - elim = 1; - break; - } - } - if (!elim && AtoI) - { - for (j = AtoI[i]; j < AtoI[i + 1]; j++) - { - if (offd_rows_bc[AtoJ[j]]) - { - elim = 1; - break; - } - } - } - if (elim) - { - for (j = AtdI[i]; j < AtdI[i + 1]; j++) - { - // if (!diag_rows_bc[AtdJ[j]]) - AtdA[j] = 0.0; - } - if (AtoI) - { - for (j = AtoI[i]; j < AtoI[i + 1]; j++) - { - // if (!offd_rows_bc[AtoJ[j]]) - AtoA[j] = 0.0; - } - } - } - } - } - - hypre_TFree(diag_rows_bc, HYPRE_MEMORY_HOST); - hypre_TFree(offd_rows_bc, HYPRE_MEMORY_HOST); - - // Create as a new matrix. - hypre_ParCSRMatrixTranspose(At, &B, 1); - hypre_MatvecCommPkgCreate(B); - hypre_ParCSRMatrixDestroy(At); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); - return B; -} - -hypre_ParCSRMatrix *hypreParCSREliminateColsWithRows(hypre_ParCSRMatrix *A, - const mfem::Array &cols) -{ - hypre_error_flag = 0; - - HYPRE_Int nrows_local = hypre_ParCSRMatrixNumRows(A); - - HYPRE_Int *diag_cols_bc, *offd_cols_bc; - - hypre_ParCSRMatrix *B; - - HYPRE_Int i, j, k; - - diag_cols_bc = mfem_hypre_CTAlloc_host(HYPRE_Int, nrows_local); - - // Which of the local columns are to be eliminated. - for (i = 0; i < cols.Size(); i++) - { - diag_cols_bc[cols[i]] = 1; - } - - // Clone the original matrix. - B = hypre_ParCSRMatrixClone(A, 1); - hypre_MatvecCommPkgCreate(B); - - // Use a Matvec communication pattern to find which of the off-diagonal columns are to be - // eliminated. - { - hypre_ParCSRCommHandle *comm_handle; - hypre_ParCSRCommPkg *comm_pkg; - HYPRE_Int num_sends, *int_buf_data; - HYPRE_Int index, start; - - offd_cols_bc = mfem_hypre_TAlloc_host( - HYPRE_Int, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(B))); - - comm_pkg = hypre_ParCSRMatrixCommPkg(B); - num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - int_buf_data = mfem_hypre_TAlloc_host( - HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); - index = 0; - for (i = 0; i < num_sends; i++) - { - start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1); j++) - { - k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); - int_buf_data[index++] = diag_cols_bc[k]; - } - } - comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_cols_bc); - - // Finish the communication. - hypre_ParCSRCommHandleDestroy(comm_handle); - - hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST); - } - - // Eliminate the rows of the original matrix. - { - hypre_CSRMatrix *Bd = hypre_ParCSRMatrixDiag(B); - HYPRE_Real *BdA = hypre_CSRMatrixData(Bd); - HYPRE_Int *BdI = hypre_CSRMatrixI(Bd); - HYPRE_Int *BdJ = hypre_CSRMatrixJ(Bd); - - hypre_CSRMatrix *Bo = hypre_ParCSRMatrixOffd(B); - HYPRE_Real *BoA = hypre_CSRMatrixData(Bo); - HYPRE_Int *BoI = hypre_CSRMatrixI(Bo); - HYPRE_Int *BoJ = hypre_CSRMatrixJ(Bo); - - HYPRE_Int elim; - - for (i = 0; i < nrows_local; i++) - { - // A column is eliminated if it has a nonzero in an eliminated row. - elim = 0; - for (j = BdI[i]; j < BdI[i + 1]; j++) - { - if (diag_cols_bc[BdJ[j]]) - { - elim = 1; - break; - } - } - if (!elim && BoI) - { - for (j = BoI[i]; j < BoI[i + 1]; j++) - { - if (offd_cols_bc[BoJ[j]]) - { - elim = 1; - break; - } - } - } - if (elim) - { - for (j = BdI[i]; j < BdI[i + 1]; j++) - { - // if (!diag_cols_bc[BdJ[j]]) - BdA[j] = 0.0; - } - if (BoI) - { - for (j = BoI[i]; j < BoI[i + 1]; j++) - { - // if (!offd_cols_bc[BoJ[j]]) - BoA[j] = 0.0; - } - } - } - } - } - - hypre_TFree(diag_cols_bc, HYPRE_MEMORY_HOST); - hypre_TFree(offd_cols_bc, HYPRE_MEMORY_HOST); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); - return B; -} - -void hypreParCSRCopy(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B) -{ - hypre_error_flag = 0; - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag); - HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); - HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); - HYPRE_Int ncols_A_diag = hypre_CSRMatrixNumCols(A_diag); - - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd); - HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); - HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); - - HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A); - - hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B); - HYPRE_Real *B_diag_a = hypre_CSRMatrixData(B_diag); - HYPRE_Int *B_diag_i = hypre_CSRMatrixI(B_diag); - HYPRE_Int *B_diag_j = hypre_CSRMatrixJ(B_diag); - HYPRE_Int ncols_B_diag = hypre_CSRMatrixNumCols(B_diag); - - hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B); - HYPRE_Real *B_offd_a = hypre_CSRMatrixData(B_offd); - HYPRE_Int *B_offd_i = hypre_CSRMatrixI(B_offd); - HYPRE_Int *B_offd_j = hypre_CSRMatrixJ(B_offd); - - HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B); - - HYPRE_Int i, j, pos; - - HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A); - HYPRE_Int nrows_local = hypre_CSRMatrixNumRows(A_diag); - MFEM_VERIFY(first_row == hypre_ParCSRMatrixFirstRowIndex(B) && - nrows_local == hypre_CSRMatrixNumRows(B_diag) && - ncols_A_diag == ncols_B_diag, - "Invalid mismatch in matrix sizes/distribution!"); - - // Copy the diagonal block A => B. - { - HYPRE_Int *marker = mfem_hypre_CTAlloc_host(HYPRE_Int, ncols_A_diag); - for (j = 0; j < ncols_A_diag; j++) - { - marker[j] = -1; - } - - for (i = 0; i < nrows_local; i++) - { - for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) - { - marker[A_diag_j[j]] = j; - } - - for (j = B_diag_i[i]; j < B_diag_i[i + 1]; j++) - { - // Skip entries not in sparsity pattern of B to copy. All columns of B are marked in - // the array because sparsity(B) ⊆ sparsity(A). - pos = marker[B_diag_j[j]]; - MFEM_VERIFY(pos >= A_diag_i[i], - "Found nonzero entry of B in copy which is not in A!"); - B_diag_a[j] = A_diag_a[pos]; - } - } - mfem_hypre_TFree_host(marker); - } - - // Copy the off-diagonal block A => B. - { - for (i = 0; i < nrows_local; i++) - { - std::map marker; - // std::unordered_map marker; - for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) - { - marker.insert(std::make_pair(col_map_offd_A[A_offd_j[j]], j)); - } - - for (j = B_offd_i[i]; j < B_offd_i[i + 1]; j++) - { - auto it = marker.find(col_map_offd_B[B_offd_j[j]]); - MFEM_VERIFY(it != marker.end(), - "Found nonzero entry of B in copy which is not in A!"); - pos = it->second; - B_offd_a[j] = A_offd_a[pos]; - } - } - } - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -void hypreParCSRRowSums(hypre_ParCSRMatrix *A, mfem::Vector &rowsums) -{ - hypre_error_flag = 0; - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag); - HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); - - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd); - HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); - - HYPRE_Int nrows_local = hypre_CSRMatrixNumRows(A_diag); - - HYPRE_Int i, j; - HYPRE_Real rowsum; - - for (i = 0; i < nrows_local; i++) - { - rowsum = 0.0; - for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) - { - rowsum += std::abs(A_diag_a[j]); - } - for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) - { - rowsum += std::abs(A_offd_a[j]); - } - rowsums(i) = rowsum; - } - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -void hypreParCSRInfNorm(hypre_ParCSRMatrix *Ar, hypre_ParCSRMatrix *Ai, HYPRE_Real *norm) -{ - hypre_error_flag = 0; - - MPI_Comm comm = hypre_ParCSRMatrixComm(Ar); - - hypre_CSRMatrix *Ar_diag = hypre_ParCSRMatrixDiag(Ar); - HYPRE_Real *Ar_diag_a = hypre_CSRMatrixData(Ar_diag); - HYPRE_Int *Ar_diag_i = hypre_CSRMatrixI(Ar_diag); - HYPRE_Int *Ar_diag_j = hypre_CSRMatrixJ(Ar_diag); - HYPRE_Int ncols_Ar_diag = hypre_CSRMatrixNumCols(Ar_diag); - - hypre_CSRMatrix *Ar_offd = hypre_ParCSRMatrixOffd(Ar); - HYPRE_Real *Ar_offd_a = hypre_CSRMatrixData(Ar_offd); - HYPRE_Int *Ar_offd_i = hypre_CSRMatrixI(Ar_offd); - HYPRE_Int *Ar_offd_j = hypre_CSRMatrixJ(Ar_offd); - - HYPRE_BigInt *col_map_offd_Ar = hypre_ParCSRMatrixColMapOffd(Ar); - - hypre_CSRMatrix *Ai_diag = hypre_ParCSRMatrixDiag(Ai); - HYPRE_Real *Ai_diag_a = hypre_CSRMatrixData(Ai_diag); - HYPRE_Int *Ai_diag_i = hypre_CSRMatrixI(Ai_diag); - HYPRE_Int *Ai_diag_j = hypre_CSRMatrixJ(Ai_diag); - HYPRE_Int ncols_Ai_diag = hypre_CSRMatrixNumCols(Ai_diag); - - hypre_CSRMatrix *Ai_offd = hypre_ParCSRMatrixOffd(Ai); - HYPRE_Real *Ai_offd_a = hypre_CSRMatrixData(Ai_offd); - HYPRE_Int *Ai_offd_i = hypre_CSRMatrixI(Ai_offd); - HYPRE_Int *Ai_offd_j = hypre_CSRMatrixJ(Ai_offd); - - HYPRE_BigInt *col_map_offd_Ai = hypre_ParCSRMatrixColMapOffd(Ai); - - HYPRE_Int *marker_diag; - - HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(Ar); - HYPRE_Int nrows_local = hypre_CSRMatrixNumRows(Ar_diag); - MFEM_VERIFY(first_row == hypre_ParCSRMatrixFirstRowIndex(Ai) && - nrows_local == hypre_CSRMatrixNumRows(Ai_diag) && - ncols_Ar_diag == ncols_Ai_diag, - "Invalid mismatch in matrix sizes/distribution!"); - - HYPRE_Int i, j, pos; - HYPRE_Real rowsum, maxsum = 0.0; - - // We assume the sparsity of the imaginary part is a subset of the real part. Entries - // outside the sparsity of the real part will be ignored for the calculation of matrix - // norm. - marker_diag = mfem_hypre_CTAlloc_host(HYPRE_Int, ncols_Ai_diag); - for (j = 0; j < ncols_Ai_diag; j++) - { - marker_diag[j] = -1; - } - - for (i = 0; i < nrows_local; i++) - { - rowsum = 0.0; - - // Diagonal part - for (j = Ai_diag_i[i]; j < Ai_diag_i[i + 1]; j++) - { - marker_diag[Ai_diag_j[j]] = j; - } - - for (j = Ar_diag_i[i]; j < Ar_diag_i[i + 1]; j++) - { - pos = marker_diag[Ar_diag_j[j]]; - if (pos >= Ai_diag_i[i]) - { - // Column entry is nonzero in both Ar and Ai. - rowsum += std::hypot(Ar_diag_a[j], Ai_diag_a[pos]); - } - else - { - rowsum += std::abs(Ar_diag_a[j]); - } - } - - // Off-diagonal part - std::map marker_offd; - // std::unordered_map marker_offd; - for (j = Ai_offd_i[i]; j < Ai_offd_i[i + 1]; j++) - { - marker_offd.insert(std::make_pair(col_map_offd_Ai[Ai_offd_j[j]], j)); - } - - for (j = Ar_offd_i[i]; j < Ar_offd_i[i + 1]; j++) - { - auto it = marker_offd.find(col_map_offd_Ar[Ar_offd_j[j]]); - if (it != marker_offd.end()) - { - // Column entry is nonzero in both Ar and Ai. - pos = it->second; - rowsum += std::hypot(Ar_offd_a[j], Ai_offd_a[pos]); - } - else - { - rowsum += std::abs(Ar_offd_a[j]); - } - } - - maxsum = std::max(maxsum, rowsum); - } - - mfem_hypre_TFree_host(marker_diag); - - MPI_Allreduce(&maxsum, norm, 1, HYPRE_MPI_REAL, MPI_MAX, comm); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -} // namespace palace::hypre diff --git a/palace/linalg/hypre.hpp b/palace/linalg/hypre.hpp index 606e5c2b9..00131a05d 100644 --- a/palace/linalg/hypre.hpp +++ b/palace/linalg/hypre.hpp @@ -6,67 +6,17 @@ #include +// XX TODO REVISIT AFTER WAVE PORT OPERATOR IF NEEDED.... + namespace mfem { -// XX TODO REMOVE WHAT CAN BE REMOVED... - // Convenience wrapper for casting away the const on the pointers and dispatching onto the // original function that has the argument type: mfem::Array2D &. mfem::HypreParMatrix * HypreParMatrixFromBlocks(mfem::Array2D &blocks, - mfem::Array2D *blockCoeff = nullptr); + mfem::Array2D *coeff = nullptr); } // namespace mfem -namespace palace::hypre -{ - -// -// Extensions to Hypre linear algebra routines. -// - -// Version 1: Eliminates (including from the sparsity pattern) the given list of -// rows/columns from the square matrix and sets the diagonal value according to -// diag_policy. A combination of mfem::HypreParMatrix:: EliminateRowsCols and -// hypre_ParCSRMatrixDropSmallEntriesHost. Specialized for host operation currently. -// Version 2: A mfem::HypreParMatrix::EliminateBC with option to specify a general scalar -// for eliminated rows. -// The specified rows/columns should be in local numbering. -enum class DiagonalPolicy -{ - USER, - ZERO, - ONE, - KEEP -}; -void hypreParCSREliminateRowsCols(hypre_ParCSRMatrix *A, const mfem::Array &rows_cols, - hypre::DiagonalPolicy diag_policy, - HYPRE_Complex diag = 0.0, bool ignore_rows = false); -void hypreParCSREliminateRowsColsv2(hypre_ParCSRMatrix *A, - const mfem::Array &rows_cols, - hypre::DiagonalPolicy diag_policy, - HYPRE_Complex diag = 0.0, bool ignore_rows = false); - -// Eliminates (zeros) the given list of rows (columns), and also eliminates all columns -// (rows) which contain a nonzero in the specified rows (columns) to be eliminated. From -// Hypre's hypre_AMESetup. Returns as a new matrix (leaves the old matrix intact). The -// specified rows (columns) should be in local numbering. -hypre_ParCSRMatrix *hypreParCSREliminateRowsWithCols(hypre_ParCSRMatrix *A, - const mfem::Array &rows); -hypre_ParCSRMatrix *hypreParCSREliminateColsWithRows(hypre_ParCSRMatrix *A, - const mfem::Array &cols); - -// Copy the entries from A into B, for sparsity(B) ⊆ sparsity(A). -void hypreParCSRCopy(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B); - -// Get the row sums (with absolute value) of the local rows of the matrix. -void hypreParCSRRowSums(hypre_ParCSRMatrix *A, mfem::Vector &rowsums); - -// Compute the matrix infinity norm for a complex matrix stored with separate real and -// imaginary parts, for sparsity(Ai) ⊆ sparsity(Ar). -void hypreParCSRInfNorm(hypre_ParCSRMatrix *Ar, hypre_ParCSRMatrix *Ai, HYPRE_Real *norm); - -} // namespace palace::hypre - #endif // PALACE_LINALG_HYPRE_HPP diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index 1a2dd7914..77152c205 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -189,8 +189,10 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, // (ownership of pc is transfered to the GeometricMultigridSolver). When a special // auxiliary space smoother for pre-/post-smoothing is not desired, the auxiliary // space is a nullptr here. - return std::make_unique(iodata, std::move(pc), fespaces, - aux_fespaces); + return std::make_unique( + iodata, std::move(pc), fespaces, + (iodata.problem.type != config::ProblemData::Type::MAGNETOSTATIC) ? aux_fespaces + : nullptr); } else { @@ -232,14 +234,38 @@ class ComplexBlockDiagonalSolver : public mfem::Solver KspSolver::KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) - : mfem::Solver(), ksp_mult(0), ksp_mult_it(0) + : KspSolver(ConfigureKrylovSolver(fespaces.GetFinestFESpace().GetComm(), iodata), + ConfigurePreconditionerSolver(fespaces.GetFinestFESpace().GetComm(), iodata, + fespaces, aux_fespaces)) { - MFEM_VERIFY(fespaces.GetNumLevels() > 0, - "Empty finite element space hierarchy linear solver setup!"); - MPI_Comm comm = fespaces.GetFESpaceAtLevel(0).GetComm(); - pc_ = ConfigurePreconditionerSolver(comm, iodata, fespaces, aux_fespaces); - ksp_ = ConfigureKrylovSolver(comm, iodata); +} + +KspSolver::KspSolver(std::unique_ptr &&ksp, + std::unique_ptr &&pc) + : mfem::Solver(), ksp_(std::move(ksp)), pc_(std::move(pc)), ksp_mult(0), ksp_mult_it(0) +{ + ksp_->SetPreconditioner(*pc_); +} + +void KspSolver::SetOperator(const Operator &op, const Operator &pc_op) +{ + // Unset the preconditioner before so that IterativeSolver::SetOperator does not set the + // preconditioner operator again. + auto *gmg = dynamic_cast(pc_.get()); + if (gmg) + { + MFEM_ABORT("KspSolver with a GeometricMultigridSolver preconditioner must " + "use the other signature for SetOperator!"); + } + else + { + pc_->SetOperator(pc_op); + } + // ksp_->SetPreconditioner(nullptr); //XX TODO WAITING MFEM PATCH + ksp_->SetOperator(op); ksp_->SetPreconditioner(*pc_); + height = op.Height(); + width = op.Width(); } void KspSolver::SetOperator(const Operator &op, @@ -285,15 +311,21 @@ void KspSolver::Mult(const Vector &x, Vector &y) const ComplexKspSolver::ComplexKspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) - : KspSolver() + : KspSolver(ConfigureKrylovSolver(fespaces.GetFinestFESpace().GetComm(), iodata), + std::make_unique(ConfigurePreconditionerSolver( + fespaces.GetFinestFESpace().GetComm(), iodata, fespaces, aux_fespaces))) { - MFEM_VERIFY(fespaces.GetNumLevels() > 0, - "Empty finite element space hierarchy linear solver setup!"); - MPI_Comm comm = fespaces.GetFESpaceAtLevel(0).GetComm(); - auto pcr = ConfigurePreconditionerSolver(comm, iodata, fespaces, aux_fespaces); - pc_ = std::make_unique(std::move(pcr)); - ksp_ = ConfigureKrylovSolver(comm, iodata); - ksp_->SetPreconditioner(*pc_); +} + +ComplexKspSolver::ComplexKspSolver(std::unique_ptr &&ksp, + std::unique_ptr &&pc) + : KspSolver(std::move(ksp), std::make_unique(std::move(pc))) +{ +} + +void ComplexKspSolver::SetOperator(const ComplexOperator &op, const Operator &pc_op) +{ + KspSolver::SetOperator(op, pc_op); // XX TODO TEST THIS AT RUNTIME... } void ComplexKspSolver::SetOperator( @@ -309,398 +341,4 @@ void ComplexKspSolver::Mult(const ComplexVector &x, ComplexVector &y) const y.Sync(); } -// XX TODO REMOVE - -// KspSolver::KspSolver(MPI_Comm comm, const IoData &iodata, const std::string &prefix) -// : clcustom(false), print(iodata.problem.verbose), print_opts(true), check_final(true), -// solve(0) -// { -// PalacePetscCall(KSPCreate(comm, &ksp)); -// PalacePetscCall(KSPSetOptionsPrefix(ksp, prefix.c_str())); -// Configure(iodata); -// ConfigureVerbose(print, prefix); -// } - -// KspSolver::KspSolver(MPI_Comm comm, int print_lvl, const std::string &prefix) -// : clcustom(false), print(print_lvl), print_opts(true), check_final(true), solve(0) -// { -// PalacePetscCall(KSPCreate(comm, &ksp)); -// PalacePetscCall(KSPSetOptionsPrefix(ksp, prefix.c_str())); -// ConfigureVerbose(print, prefix); -// } - -// KspSolver::~KspSolver() -// { -// MPI_Comm comm; -// PalacePetscCall(PetscObjectGetComm(reinterpret_cast(ksp), &comm)); -// PalacePetscCall(KSPDestroy(&ksp)); -// } - -// void KspSolver::Configure(const IoData &iodata) -// { -// // Configure the Krylov solver. GMRES is the default solver for frequency domain -// // problems. -// switch (iodata.solver.linear.ksp_type) -// { -// case config::LinearSolverData::KspType::CG: -// SetType(Type::CG); -// break; -// case config::LinearSolverData::KspType::CGSYM: -// SetType(Type::CGSYM); -// break; -// case config::LinearSolverData::KspType::FCG: -// SetType(Type::FCG); -// break; -// case config::LinearSolverData::KspType::MINRES: -// SetType(Type::MINRES); -// break; -// case config::LinearSolverData::KspType::GMRES: -// case config::LinearSolverData::KspType::DEFAULT: -// SetType(Type::GMRES); -// SetGMRESOptions(iodata.solver.linear.max_size, iodata.solver.linear.orthog_mgs, -// iodata.solver.linear.orthog_cgs2); -// break; -// case config::LinearSolverData::KspType::FGMRES: -// SetType(Type::FGMRES); -// SetGMRESOptions(iodata.solver.linear.max_size, iodata.solver.linear.orthog_mgs, -// iodata.solver.linear.orthog_cgs2); -// break; -// case config::LinearSolverData::KspType::BCGS: -// SetType(Type::BCGS); -// break; -// case config::LinearSolverData::KspType::BCGSL: -// SetType(Type::BCGSL); -// break; -// case config::LinearSolverData::KspType::FBCGS: -// SetType(Type::FBCGS); -// break; -// case config::LinearSolverData::KspType::QMRCGS: -// SetType(Type::QMRCGS); -// break; -// case config::LinearSolverData::KspType::TFQMR: -// SetType(Type::TFQMR); -// break; -// default: -// MFEM_ABORT("Unexpected type for KspSolver configuration!"); -// break; -// } -// SetTol(iodata.solver.linear.tol); -// SetMaxIter(iodata.solver.linear.max_it); - -// // Reuse previous solution as guess for later solves if desired. -// SetNonzeroInitialGuess(iodata.solver.linear.ksp_initial_guess); - -// // Optionally use left or right preconditioning (otherwise use PETSc default for the -// given -// // solver). -// if (iodata.solver.linear.pc_side_type == config::LinearSolverData::SideType::LEFT) -// { -// PalacePetscCall(KSPSetPCSide(ksp, PC_LEFT)); -// } -// else if (iodata.solver.linear.pc_side_type == -// config::LinearSolverData::SideType::RIGHT) -// { -// PalacePetscCall(KSPSetPCSide(ksp, PC_RIGHT)); -// } -// } - -// void KspSolver::ConfigureVerbose(int print, const std::string &prefix) -// { -// // Manage debugging output. -// if (print > 0) -// { -// std::string opts = "-ksp_converged_reason"; -// if (print > 1) -// { -// opts.append(" -ksp_monitor"); -// } -// if (print > 3) -// { -// opts.append(" -ksp_view"); -// } -// if (prefix.length() > 0) -// { -// PetscOptionsPrefixPush(nullptr, prefix.c_str()); -// } -// PetscOptionsInsertString(nullptr, opts.c_str()); -// if (prefix.length() > 0) -// { -// PetscOptionsPrefixPop(nullptr); -// } -// } -// } - -// void KspSolver::SetType(KspSolver::Type type, bool piped) -// { -// switch (type) -// { -// case Type::CG: -// PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPECG) : KSPSetType(ksp, KSPCG)); -// PalacePetscCall(KSPCGSetType(ksp, KSP_CG_HERMITIAN)); -// break; -// case Type::CGSYM: -// PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPECG) : KSPSetType(ksp, KSPCG)); -// PalacePetscCall(KSPCGSetType(ksp, KSP_CG_SYMMETRIC)); -// break; -// case Type::FCG: -// PalacePetscCall(KSPSetType(ksp, KSPFCG)); -// break; -// case Type::GMRES: -// PalacePetscCall((piped) ? KSPSetType(ksp, KSPPGMRES) : KSPSetType(ksp, KSPGMRES)); -// break; -// case Type::FGMRES: -// PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPEFGMRES) -// : KSPSetType(ksp, KSPFGMRES)); -// break; -// case Type::MINRES: -// PalacePetscCall(KSPSetType(ksp, KSPMINRES)); -// break; -// case Type::BCGS: -// PalacePetscCall(KSPSetType(ksp, KSPBCGS)); -// break; -// case Type::BCGSL: -// PalacePetscCall(KSPSetType(ksp, KSPBCGSL)); -// PalacePetscCall(KSPBCGSLSetEll(ksp, 2)); // PETSc default -// break; -// case Type::FBCGS: -// PalacePetscCall(KSPSetType(ksp, KSPFBCGS)); -// break; -// case Type::QMRCGS: -// PalacePetscCall(KSPSetType(ksp, KSPQMRCGS)); -// break; -// case Type::TFQMR: -// PalacePetscCall(KSPSetType(ksp, KSPTFQMR)); -// break; -// case Type::CHOLESKY: -// { -// PC pc; -// PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); -// PalacePetscCall(KSPGetPC(ksp, &pc)); -// PalacePetscCall(PCSetType(pc, PCCHOLESKY)); -// SetCheckFinal(false); -// } -// break; -// case Type::LU: -// { -// PC pc; -// PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); -// PalacePetscCall(KSPGetPC(ksp, &pc)); -// PalacePetscCall(PCSetType(pc, PCLU)); -// SetCheckFinal(false); -// } -// break; -// default: -// MFEM_ABORT("Unexpected type for KspSolver!"); -// break; -// } -// } - -// void KspSolver::SetTol(PetscReal tol) -// { -// PalacePetscCall(KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, -// PETSC_DEFAULT)); -// } - -// void KspSolver::SetAbsTol(PetscReal tol) -// { -// PalacePetscCall(KSPSetTolerances(ksp, PETSC_DEFAULT, tol, PETSC_DEFAULT, -// PETSC_DEFAULT)); -// } - -// void KspSolver::SetMaxIter(PetscInt maxits) -// { -// PalacePetscCall( -// KSPSetTolerances(ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, maxits)); -// } - -// void KspSolver::SetGMRESOptions(PetscInt maxsize, bool mgs, bool cgs2) -// { -// PalacePetscCall(KSPGMRESSetRestart(ksp, maxsize)); -// if (mgs) -// { -// PalacePetscCall( -// KSPGMRESSetOrthogonalization(ksp, KSPGMRESModifiedGramSchmidtOrthogonalization)); -// } -// else if (cgs2) -// { -// PalacePetscCall(KSPGMRESSetCGSRefinementType(ksp, KSP_GMRES_CGS_REFINE_ALWAYS)); -// } -// } - -// void KspSolver::SetTabLevel(PetscInt l) -// { -// PalacePetscCall(PetscObjectSetTabLevel(reinterpret_cast(ksp), l)); -// } - -// void KspSolver::SetNonzeroInitialGuess(bool guess) -// { -// PalacePetscCall(KSPSetInitialGuessNonzero(ksp, guess ? PETSC_TRUE : PETSC_FALSE)); -// } - -// void KspSolver::SetOperator(const petsc::PetscParMatrix &A, bool copy_prefix) -// { -// // If A is the same as before, PETSc will reuse things like symbolic factorizations -// // automatically. -// PalacePetscCall(KSPSetOperators(ksp, A, A)); -// if (copy_prefix) -// { -// // Set Mat prefix to be the same as KSP to enable setting command-line options. -// const char *prefix; -// PalacePetscCall(KSPGetOptionsPrefix(ksp, &prefix)); -// PalacePetscCall(MatSetOptionsPrefix(A, prefix)); -// } -// } - -// void KspSolver::SetPreconditioner(const KspPreconditioner &op) -// { -// // The PETSc shell preconditioner does not take ownership of the preconditioner object. -// PC pc; -// PalacePetscCall(KSPGetPC(ksp, &pc)); -// PalacePetscCall(PCSetType(pc, PCSHELL)); -// PalacePetscCall(PCShellSetContext(pc, (void *)&op)); -// PalacePetscCall(PCShellSetSetUp(pc, KspPreconditioner::PCSetUp)); -// PalacePetscCall(PCShellSetApply(pc, KspPreconditioner::PCApply)); -// PalacePetscCall(PCShellSetDestroy(pc, KspPreconditioner::PCDestroy)); -// } - -// void KspSolver::Customize() const -// { -// if (!clcustom) -// { -// PalacePetscCall(KSPSetFromOptions(ksp)); -// if (print > 0 && print_opts) -// { -// PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); -// Mpi::Print(GetComm(), "\n"); -// } -// clcustom = true; -// } -// } - -// void KspSolver::Mult(const petsc::PetscParVector &b, petsc::PetscParVector &x) const -// { -// KSPConvergedReason reason; -// PetscReal norm0 = 1.0, norm; -// if (check_final) -// { -// norm0 = b.Norml2(); -// } -// Customize(); -// PalacePetscCall(KSPSolve(ksp, b, x)); -// PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); -// if (check_final && reason < 0) -// { -// Mat A; -// Vec r; -// PalacePetscCall(VecDuplicate(b, &r)); -// PalacePetscCall(KSPGetOperators(ksp, &A, nullptr)); -// PalacePetscCall(MatMult(A, x, r)); -// PalacePetscCall(VecAXPY(r, -1.0, b)); -// PalacePetscCall(VecNorm(r, NORM_2, &norm)); -// PalacePetscCall(VecDestroy(&r)); -// Mpi::Warning(GetComm(), -// "Linear solver did not converge, " -// "norm(Ax-b)/norm(b) = {:.3e} (norm(b) = {:.3e})!\n", -// norm / norm0, norm0); -// } -// solve++; -// } - -// void KspSolver::Reset() -// { -// PalacePetscCall(KSPReset(ksp)); -// } - -// PetscInt KspSolver::GetTotalNumMult() const -// { -// return solve; -// } - -// PetscInt KspSolver::GetNumIter() const -// { -// PetscInt num_it; -// PalacePetscCall(KSPGetIterationNumber(ksp, &num_it)); -// return num_it; -// } - -// PetscInt KspSolver::GetTotalNumIter() const -// { -// PetscInt num_it; -// PalacePetscCall(KSPGetTotalIterations(ksp, &num_it)); -// return num_it; -// } - -// MPI_Comm KspSolver::GetComm() const -// { -// return ksp ? PetscObjectComm(reinterpret_cast(ksp)) : MPI_COMM_NULL; -// } - -// void KspSolver::SolveJacobi(const petsc::PetscParMatrix &A, const petsc::PetscParVector -// &b, -// petsc::PetscParVector &x, PetscInt sym, PetscReal tol, -// PetscInt max_it) -// { -// MPI_Comm comm; -// KSP ksp; -// PC pc; -// KSPConvergedReason reason; - -// comm = A.GetComm(); -// PalacePetscCall(KSPCreate(comm, &ksp)); -// PalacePetscCall(KSPSetOperators(ksp, A, A)); -// PalacePetscCall(KSPSetType(ksp, (sym == 1) ? KSPCG : KSPGMRES)); -// PalacePetscCall(KSPGetPC(ksp, &pc)); -// PalacePetscCall(PCSetType(pc, PCJACOBI)); -// PalacePetscCall(PCJacobiSetFixDiagonal(pc, PETSC_TRUE)); -// PalacePetscCall(KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, max_it)); -// // std::string opts = "-ksp_converged_reason -ksp_monitor"; -// // PetscOptionsInsertString(nullptr, opts.c_str()); -// // PalacePetscCall(KSPSetFromOptions(ksp)); -// x.SetZero(); -// PalacePetscCall(KSPSolve(ksp, b, x)); -// PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); -// MFEM_VERIFY(reason > 0, "PETSc KSP did not converge!"); -// PalacePetscCall(KSPDestroy(&ksp)); -// } - -// void KspSolver::SolveDirect(const petsc::PetscParMatrix &A, const petsc::PetscParVector -// &b, -// petsc::PetscParVector &x, PetscInt sym) -// { -// MPI_Comm comm; -// KSP ksp; -// PC pc; -// KSPConvergedReason reason; - -// comm = A.GetComm(); -// PalacePetscCall(KSPCreate(comm, &ksp)); -// PalacePetscCall(KSPSetOperators(ksp, A, A)); -// PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); -// PalacePetscCall(KSPGetPC(ksp, &pc)); -// #if defined(PETSC_HAVE_MUMPS) || defined(PETSC_HAVE_SUPERLU_DIST) -// PalacePetscCall(PCSetType(pc, (sym > 0) ? PCCHOLESKY : PCLU)); -// #if defined(PETSC_HAVE_MUMPS) -// PalacePetscCall(PCFactorSetMatSolverType(pc, MATSOLVERMUMPS)); -// #elif defined(PETSC_HAVE_SUPERLU_DIST) -// PalacePetscCall(PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU_DIST)); -// #endif -// #else -// // Use PETSc default serial direct solver. -// PalacePetscCall(PCSetType(pc, PCREDUNDANT)); -// PalacePetscCall(PCRedundantSetNumber(pc, Mpi::Size(comm))); -// { -// KSP ksp_in; -// PC pc_in; -// PalacePetscCall(PCRedundantGetKSP(pc, &ksp_in)); -// PalacePetscCall(KSPGetPC(ksp_in, &pc_in)); -// PalacePetscCall(PCSetType(pc_in, (sym > 0) ? PCCHOLESKY : PCLU)); -// } -// #endif -// x.SetZero(); -// PalacePetscCall(KSPSolve(ksp, b, x)); -// PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); -// MFEM_VERIFY(reason > 0, "PETSc KSP did not converge!"); -// PalacePetscCall(KSPDestroy(&ksp)); -// } - } // namespace palace diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp index 52c290b7e..7b51fcf3d 100644 --- a/palace/linalg/ksp.hpp +++ b/palace/linalg/ksp.hpp @@ -36,10 +36,7 @@ class KspSolver : public mfem::Solver KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); KspSolver(std::unique_ptr &&ksp, - std::unique_ptr &&pc) - : ksp_(std::move(ksp)), pc_(std::move(pc)), ksp_mult(0), ksp_mult_it(0) - { - } + std::unique_ptr &&pc); int NumTotalMult() const { return ksp_mult; } int NumTotalMultIter() const { return ksp_mult_it; } @@ -49,6 +46,8 @@ class KspSolver : public mfem::Solver MFEM_ABORT("SetOperator with a single operator is not implemented for KspSolver, you " "must specify the preconditioner operator as well!"); } + + virtual void SetOperator(const Operator &op, const Operator &pc_op); virtual void SetOperator(const Operator &op, const std::vector> &pc_ops, const std::vector> *pc_aux_ops = nullptr); @@ -61,8 +60,15 @@ class ComplexKspSolver : public KspSolver public: ComplexKspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); + ComplexKspSolver(std::unique_ptr &&ksp, + std::unique_ptr &&pc); using KspSolver::SetOperator; + void SetOperator(const Operator &op, const Operator &pc_op) override + { + MFEM_ABORT("SetOperator with a real-valued operator is not implemented for " + "ComplexKspSolver, use the complex-valued signature instead!"); + } void SetOperator( const Operator &op, const std::vector> &pc_ops, const std::vector> *pc_aux_ops = nullptr) override @@ -70,6 +76,8 @@ class ComplexKspSolver : public KspSolver MFEM_ABORT("SetOperator with a real-valued operator is not implemented for " "ComplexKspSolver, use the complex-valued signature instead!"); } + + void SetOperator(const ComplexOperator &op, const Operator &pc_op); void SetOperator(const ComplexOperator &op, const std::vector> &pc_ops, const std::vector> *pc_aux_ops = nullptr); @@ -82,141 +90,6 @@ class ComplexKspSolver : public KspSolver void Mult(const ComplexVector &x, ComplexVector &y) const; }; -// XX TODO REMOVE - -// class IoData; -// class KspPreconditioner; - -// namespace petsc -// { - -// class PetscParMatrix; -// class PetscParVector; - -// } // namespace petsc - -// // -// // A wrapper of PETSc's KSP class for solving linear systems. -// // -// class KspSolver -// { -// public: -// enum class Type -// { -// CG, -// CGSYM, -// FCG, -// MINRES, -// GMRES, -// FGMRES, -// BCGS, -// BCGSL, -// FBCGS, -// QMRCGS, -// TFQMR, -// CHOLESKY, -// LU -// }; - -// private: -// // The actual PETSc object. -// KSP ksp; - -// // Boolean to handle SetFromOptions calls. -// mutable bool clcustom; - -// // Control print level for debugging. -// int print; - -// // Print PETSc options database prior to solve. -// bool print_opts; - -// // Check for final residual if not converged. Defaults to true. -// bool check_final; - -// // Counter for number of calls to Mult method for a linear solve. -// mutable PetscInt solve; - -// // Set up debugging output and configure the solver based on user specified parameters. -// void Configure(const IoData &iodata); -// void ConfigureVerbose(int print, const std::string &prefix); - -// // Customize object with command line options set. -// void Customize() const; - -// public: -// // Calls PETSc's KSPCreate. -// KspSolver(MPI_Comm comm, const IoData &iodata, const std::string &prefix = -// std::string()); KspSolver(MPI_Comm comm, int print_lvl, const std::string &prefix = -// std::string()); - -// // Calls PETSc's KSPDestroy. -// ~KspSolver(); - -// // Sets the solver type. -// void SetType(Type type, bool piped = false); - -// // Set solver tolerance. -// void SetTol(PetscReal tol); - -// // Set solver tolerance. -// void SetAbsTol(PetscReal tol); - -// // Set maximum number of iterations. -// void SetMaxIter(PetscInt maxits); - -// // Set options specific to GMRES and FGMRES solvers. -// void SetGMRESOptions(PetscInt maxsize, bool mgs, bool cgs2); - -// // Sets the tab level for KSP output. -// void SetTabLevel(PetscInt l); - -// // Set flag to print PETSc options database at start of solve. -// void SetPrintOptions(bool opts) { print_opts = opts; } - -// // Set flag to check final residual if unconverged. -// void SetCheckFinal(bool check) { check_final = check; } - -// // Set an initial vector for the solution subspace. -// void SetNonzeroInitialGuess(bool guess); - -// // Sets the MVP and preconditioner matrix. -// void SetOperator(const petsc::PetscParMatrix &A, bool copy_prefix = true); - -// // Configures a shell preconditioner based on the given preconditioner object. -// void SetPreconditioner(const KspPreconditioner &op); - -// // Application of the solver. -// void Mult(const petsc::PetscParVector &b, petsc::PetscParVector &x) const; - -// // Call KSPReset, for example if the operator dimension has changed. -// void Reset(); - -// // Get number of solver calls. -// PetscInt GetTotalNumMult() const; - -// // Get number of solver iterations. -// PetscInt GetNumIter() const; -// PetscInt GetTotalNumIter() const; - -// // Get the associated MPI communicator. -// MPI_Comm GetComm() const; - -// // Conversion function to PETSc's KSP type. -// operator KSP() const { return ksp; } - -// // Typecasting to PETSc object. -// operator PetscObject() const { return reinterpret_cast(ksp); } - -// // Simple static linear solve methods. The sym variable defines the matrix type: 0 for -// // general, 1 for SPD, 2 for symmetric indefinite (definitions from MUMPS). -// static void SolveJacobi(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, -// petsc::PetscParVector &x, PetscInt sym, double PetscReal -// = 1.0e-9, PetscInt max_it = 5000); -// static void SolveDirect(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, -// petsc::PetscParVector &x, PetscInt sym); -// }; - } // namespace palace #endif // PALACE_LINALG_KSP_SOLVER_HPP diff --git a/palace/linalg/pc.cpp b/palace/linalg/pc.cpp deleted file mode 100644 index 0cbe9cb5a..000000000 --- a/palace/linalg/pc.cpp +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "pc.hpp" - -#include -#include "linalg/amg.hpp" -#include "linalg/ams.hpp" -#include "linalg/gmg.hpp" -#include "linalg/mumps.hpp" -#include "linalg/strumpack.hpp" -#include "linalg/superlu.hpp" -#include "utils/iodata.hpp" - -namespace palace -{ - -std::unique_ptr -ConfigurePreconditioner(const IoData &iodata, const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) -{ - config::LinearSolverData::Type type = iodata.solver.linear.type; - if (type == config::LinearSolverData::Type::DEFAULT) - { - if (iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC || - (iodata.problem.type == config::ProblemData::Type::TRANSIENT && - iodata.solver.transient.type == config::TransientSolverData::Type::CENTRAL_DIFF)) - { - type = config::LinearSolverData::Type::BOOMER_AMG; - } - else if (iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC || - iodata.problem.type == config::ProblemData::Type::TRANSIENT) - { - type = config::LinearSolverData::Type::AMS; - } - else - { - // Prefer sparse direct solver for frequency domain problems if available. -#if defined(MFEM_USE_SUPERLU) - type = config::LinearSolverData::Type::SUPERLU; -#elif defined(MFEM_USE_STRUMPACK) - type = config::LinearSolverData::Type::STRUMPACK; -#elif defined(MFEM_USE_MUMPS) - type = config::LinearSolverData::Type::MUMPS; -#else - type = config::LinearSolverData::Type::AMS; -#endif - } - } - int print = iodata.problem.verbose - 1; - MPI_Comm comm = fespaces.GetFESpaceAtLevel(0).GetComm(); - std::unique_ptr pc; - switch (type) - { - case config::LinearSolverData::Type::AMS: - // Can either be the coarse solve for geometric multigrid or the solver at the finest - // space (in which case fespaces.GetNumLevels() == 1). - pc = std::make_unique( - iodata, fespaces.GetFESpaceAtLevel(0), - aux_fespaces ? &aux_fespaces->GetFESpaceAtLevel(0) : nullptr, print); - break; - case config::LinearSolverData::Type::BOOMER_AMG: - pc = std::make_unique(iodata, print); - break; - case config::LinearSolverData::Type::SUPERLU: -#if defined(MFEM_USE_SUPERLU) - pc = std::make_unique(comm, iodata, print); -#else - MFEM_ABORT("Solver was not built with SuperLU_DIST support, please choose a " - "different solver!"); -#endif - break; - case config::LinearSolverData::Type::STRUMPACK: -#if defined(MFEM_USE_STRUMPACK) - pc = std::make_unique(comm, iodata, print); - break; -#endif - case config::LinearSolverData::Type::STRUMPACK_MP: -#if defined(MFEM_USE_STRUMPACK) && \ - (STRUMPACK_VERSION_MAJOR >= 6 && STRUMPACK_VERSION_MINOR >= 3 && \ - STRUMPACK_VERSION_PATCH > 1) - pc = std::make_unique(comm, iodata, print); -#else - MFEM_ABORT("Solver was not built with STRUMPACK support or uses STRUMPACK older than " - "6.3.1 which does not include mixed-precision support, please choose a " - "different solver!"); -#endif - break; - case config::LinearSolverData::Type::MUMPS: -#if defined(MFEM_USE_MUMPS) - pc = std::make_unique(comm, iodata, print); -#else - MFEM_ABORT( - "Solver was not built with MUMPS support, please choose a different solver!"); -#endif - break; - case config::LinearSolverData::Type::DEFAULT: - case config::LinearSolverData::Type::INVALID: - MFEM_ABORT("Unexpected type for KspPreconditioner configuration!"); - break; - } - if (iodata.solver.linear.mat_gmg) - { - // This will construct the multigrid hierarchy using pc as the coarse solver - // (ownership of pc is transfered to the GeometricMultigridSolver). When a special - // auxiliary space smoother for pre-/post-smoothing is not desired, h1_fespace is just - // a nullptr. - return std::make_unique(iodata, std::move(pc), dbc_marker, - fespaces, aux_fespaces); - } - else - { - return pc; - } -} - -void KspPreconditioner::Init(int n) -{ - // Set up temporary vector storage. -#if defined(PETSC_USE_COMPLEX) - if (x_.Size() == 2 * n && y_.Size() == 2 * n) - { - return; - } - x_.SetSize(2 * n); - y_.SetSize(2 * n); -#else - if (x_.Size() == n && y_.Size() == n) - { - return; - } - x_.SetSize(n); - y_.SetSize(n); -#endif -} - -void KspPreconditioner::SetOperator(const mfem::Operator &op) -{ - pc_->SetOperator(op); - Init(op.Height()); -} - -void KspPreconditioner::SetOperator( - const std::vector> &ops, - const std::vector> *aux_ops) -{ - auto *gmg = dynamic_cast(pc_.get()); - if (gmg) - { - gmg->SetOperator(ops, aux_ops); - Init(ops.back()->Height()); - } - else - { - SetOperator(*ops.back()); - } -} - -void KspPreconditioner::Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const -{ -#if defined(PETSC_USE_COMPLEX) - mfem::Vector xr_, xi_, yr_, yi_; - mfem::Array X(2); - mfem::Array Y(2); - xr_.MakeRef(x_, 0, x_.Size() / 2); - xi_.MakeRef(x_, x_.Size() / 2, x_.Size() / 2); - yr_.MakeRef(y_, 0, y_.Size() / 2); - yi_.MakeRef(y_, y_.Size() / 2, y_.Size() / 2); - X[0] = &xr_; - X[1] = &xi_; - Y[0] = &yr_; - Y[1] = &yi_; - // yr_ = 0.0; - // yi_ = 0.0; - x.GetToVectors(xr_, xi_); - pc_->ArrayMult(X, Y); - y.SetFromVectors(yr_, yi_); -#else - // y_ = 0.0; - x.GetToVector(x_); - pc_->Mult(x_, y_); - y.SetFromVector(y_); -#endif -} - -PetscErrorCode KspPreconditioner::PCSetUp(PC pc) -{ - // The preconditioner operators are set up outside of the linear solve by the user, so - // this method does nothing. - PetscFunctionBeginUser; - PetscFunctionReturn(0); -} - -PetscErrorCode KspPreconditioner::PCApply(PC pc, Vec x, Vec y) -{ - // Apply the preconditioner. If PETSc is compiled with complex number support, the real - // preconditioner applied in block diagonal form. - KspPreconditioner *op; - petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(PCShellGetContext(pc, (void **)&op)); - MFEM_VERIFY(op, "Invalid PETSc shell PC context!"); - op->Mult(xx, yy); - PetscFunctionReturn(0); -} - -PetscErrorCode KspPreconditioner::PCDestroy(PC pc) -{ - // Ownership of the preconditioner context is not inherited by the shell preconditioner, - // so this does nothing. - PetscFunctionBeginUser; - PetscFunctionReturn(0); -} - -} // namespace palace diff --git a/palace/linalg/pc.hpp b/palace/linalg/pc.hpp deleted file mode 100644 index 252d3d15f..000000000 --- a/palace/linalg/pc.hpp +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LINALG_KSP_PRECONDITIONER_HPP -#define PALACE_LINALG_KSP_PRECONDITIONER_HPP - -#include -#include -#include -#include "linalg/petsc.hpp" - -namespace palace -{ - -class IoData; - -// XX TODO REFACTOR INTO KSP.HPP/CPP FOR REAL-VALUED LINEAR SOLVER! KspSolver, -// ComplexKspSolver... - -// Global method for preconditioner configuration and construction. -std::unique_ptr -ConfigurePreconditioner(const IoData &iodata, const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); - -// -// Class for preconditioning with interfaces to PETSc linear solvers or those from -// MFEM/Hypre. -// -class KspPreconditioner -{ -private: - // The actual preconditioner solver. - std::unique_ptr pc_; - - // Temporary vectors for preconditioner application. - mutable mfem::Vector x_, y_; - - // Helper function for setup. - void Init(int n); - -public: - KspPreconditioner(std::unique_ptr &&pc) : pc_(std::move(pc)) {} - KspPreconditioner(const IoData &iodata, const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr) - : pc_(ConfigurePreconditioner(iodata, dbc_marker, fespaces, aux_fespaces)) - { - if (pc_->Height()) - { - Init(pc_->Height()); - } - } - - void SetOperator(const mfem::Operator &op); - void SetOperator(const std::vector> &ops, - const std::vector> *aux_ops = nullptr); - - void Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const; - - // Wrapper functions for PETSc PCSHELL. - static PetscErrorCode PCSetUp(PC pc); - static PetscErrorCode PCApply(PC pc, Vec x, Vec y); - static PetscErrorCode PCDestroy(PC pc); -}; - -} // namespace palace - -#endif // PALACE_LINALG_KSP_PRECONDITIONER_HPP diff --git a/palace/models/romoperator.hpp b/palace/models/romoperator.hpp index b5fc05acf..c772e5861 100644 --- a/palace/models/romoperator.hpp +++ b/palace/models/romoperator.hpp @@ -12,7 +12,6 @@ #include #include "linalg/curlcurl.hpp" #include "linalg/ksp.hpp" -#include "linalg/pc.hpp" #include "linalg/petsc.hpp" namespace palace diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index d0cbe5330..4768ddb28 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -4,9 +4,8 @@ #include "timeoperator.hpp" #include -#include "linalg/gmg.hpp" #include "linalg/jacobi.hpp" -#include "linalg/pc.hpp" +#include "linalg/ksp.hpp" #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" @@ -19,7 +18,7 @@ namespace class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOperator { -private: +public: // System matrices and excitation RHS. std::unique_ptr K, M, C; Vector NegJ; @@ -30,38 +29,14 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Internal objects for solution of linear systems during time stepping. double a0_, a1_; + std::unique_ptr kspM, kspA; std::unique_ptr A; std::vector> B, AuxB; mutable Vector RHS; - // XX TODO REMOVE - // std::function(double, double)> GetSystemMatrix; - // std::function> &, - // std::vector> &)> - // GetPreconditionerMatrix; - // std::function(double, double)> GetSystemMatrix; - - // Linear system solvers and settings for implicit time integration. - std::unique_ptr kspM, kspA; - std::unique_ptr pcM, pcA; - mutable int kspM_mult, kspA_mult, kspM_it, kspA_it; - // Bindings to SpaceOperator functions to get the system matrix and preconditioner, and // construct the linear solver. - std::function(double a0, double a1)> - ConfigureLinearSolver; - - void FormRHS(const Vector &u, const Vector &du, Vector &rhs) const - { - // Multiply: rhs = -(K u + C du) - g'(t) J. - rhs = 0.0; - K->AddMult(u, rhs, -1.0); - if (C) - { - C->AddMult(du, rhs, -1.0); - } - rhs.Add(dJcoef(t), NegJ); - } + std::function(double a0, double a1)> ConfigureLinearSolver; public: TimeDependentCurlCurlOperator(const IoData &iodata, SpaceOperator &spaceop, @@ -87,154 +62,60 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Set up linear solvers. { // PCG with a simple Jacobi preconditioner for mass matrix systems. - auto jac = std::make_unique(); - jac->SetOperator(*M); - pcM = std::move(jac); - auto pcg = std::make_unique(M->GetComm()); pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; pcg->SetRelTol(iodata.solver.linear.tol); pcg->SetMaxIter(iodata.solver.linear.max_it); pcg->SetPrintLevel(0); - pcg->SetOperator(*M); - pcg->SetPreconditioner(*pcM); - kspM = std::move(pcg); + kspM = + std::make_unique(std::move(pcg), std::make_unique()); + kspM->SetOperator(*M, *M); } { // For explicit schemes, recommended to just use cheaper preconditioners. Otherwise, // use AMS or a direct solver. The system matrix is formed as a sequence of matrix // vector products, and is only assembled for preconditioning. - - // // XX TODO ADDRESS, WITH BCS, ETC..... - // pcA = ConfigurePreconditioner(iodata, spaceop.GetDbcMarker(), - // spaceop.GetNDSpaces(), - // &spaceop.GetH1Spaces()); - - // XX TODO TEST IF THE BELOW WORKS? - - // auto pcg = std::make_unique(comm); - // pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; - // pcg->SetRelTol(iodata.solver.linear.tol); - // pcg->SetMaxIter(iodata.solver.linear.max_it); - // pcg->SetPrintLevel(print); - // pcg->SetOperator(*this); - // pcg->SetPreconditioner(*pcA); - // kspA = std::move(pcg); - - // XX TODO REMOVE - // GetSystemMatrix = [this, &spaceop](double a0, double a1) -> - // std::unique_ptr - // { - // return spaceop.GetSystemMatrix(a0, a1, 1.0, this->K.get(), this->C.get(), - // this->M.get()); - // } - // GetPreconditionerMatrix = [&spaceop](double a0, double a1, - // std::vector> &B, - // std::vector> &AuxB) - // { spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0, B, AuxB); }; - // ConfigureLinearSolver = [=](std::unique_ptr &A, - // std::unique_ptr &pc) -> std::unique_ptr - // { - // auto pcg = std::make_unique(comm); - // pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; - // pcg->SetRelTol(iodata.solver.linear.tol); - // pcg->SetMaxIter(iodata.solver.linear.max_it); - // pcg->SetPrintLevel(print); - // pcg->SetOperator(*A); - // pcg->SetPreconditioner(*pc); - // } - - // The time domain system matrix is A = a0 K + a1 C + M, which constructed using the - // assembled K, C, and M matrices and the coefficients a0 and a1 defined by the time - // integrator. - if (iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::DEFAULT && - iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) - { - Mpi::Warning("Transient problem type always uses CG as the Krylov solver!\n"); - } - bool iterative_mode = iodata.solver.linear.ksp_initial_guess; - double tol = iodata.solver.linear.tol; - int max_it = iodata.solver.linear.max_it; - mfem::IterativeSolver::PrintLevel print = - mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - if (iodata.problem.verbose > 0) + ConfigureLinearSolver = [this, &iodata, + &spaceop](double a0, double a1) -> std::unique_ptr { - print.Summary(); - if (iodata.problem.verbose > 1) - { - print.Iterations(); - if (iodata.problem.verbose > 2) - { - print.All(); - } - } - } - ConfigureLinearSolver = [this, &spaceop, iterative_mode, tol, max_it, - print](double a0, - double a1) -> std::unique_ptr - { - // XX TODO WORKING ON MONDAY!! - // Configure the system matrix and also the matrix (matrices) from which the // preconditioner will be constructed. this->A = spaceop.GetSystemMatrix(a0, a1, 1.0, this->K.get(), this->C.get(), this->M.get()); spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0, this->B, this->AuxB); - // Configure the preconditioner. - auto *gmg = dynamic_cast(this->pcA.get()); - - // XX TODO WIP - // if (gmg) - // { - // gmg->SetOperator(this->B, &this->AuxB); - // } - // else - // { - // this->pcA->SetOperator(*this->B.back()); - // } - - // Construct and return the linear solver. - auto pcg = std::make_unique(this->M->GetComm()); - pcg->iterative_mode = iterative_mode; - pcg->SetRelTol(tol); - pcg->SetMaxIter(max_it); - pcg->SetPrintLevel(print); - pcg->SetOperator(*this->A); - pcg->SetPreconditioner(*this->pcA); - return pcg; + // Configure the solver. + auto ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), + &spaceop.GetH1Spaces()); + ksp->SetOperator(*this->A, this->B, &this->AuxB); + return ksp; }; } - kspM_mult = kspA_mult = kspM_it = kspA_it = 0; } - const ParOperator &GetK() const { return *K; } - const ParOperator &GetM() const { return *M; } - const ParOperator &GetC() const { return *C; } - - int GetNumMult() const { return kspM_mult; } // XX TODO REVISIT WITH KspSolver - int GetNumMultIter() const { return kspM_it; } - int GetNumImplicitSolve() const { return kspA_mult; } - int GetNumImplicitSolveIter() const { return kspA_it; } + void FormRHS(const Vector &u, const Vector &du, Vector &rhs) const + { + // Multiply: rhs = -(K u + C du) - g'(t) J. + rhs = 0.0; + K->AddMult(u, rhs, -1.0); + if (C) + { + C->AddMult(du, rhs, -1.0); + } + rhs.Add(dJcoef(t), NegJ); + } void Mult(const Vector &u, const Vector &du, Vector &ddu) const override { // Solve: M ddu = -(K u + C du) - g'(t) J. Mpi::Print("\n"); - if (kspM_mult == 0) + if (kspM->NumTotalMult() == 0) { // Operators have already been set in constructor. ddu = 0.0; } FormRHS(u, du, RHS); kspM->Mult(RHS, ddu); - if (!kspM->GetConverged()) - { - Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - kspM->GetNumIterations()); - } - kspM_mult++; - kspM_it += kspM->GetNumIterations(); } void ImplicitSolve(const double a0, const double a1, const Vector &u, const Vector &du, @@ -243,43 +124,21 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Solve: (a0 K + a1 C + M) k = -(K u + C du) - g'(t) J, where a0 may be 0 in the // explicit case. At first iteration, construct the solver. Also don't print a newline // if already done by the mass matrix solve at the first iteration. - if (kspA_mult > 0) + if (kspA && kspA->NumTotalMult() > 0) { Mpi::Print("\n"); } - if (kspA_mult == 0 || a0 != a0_ || a1 != a1_) + if (!kspA || a0 != a0_ || a1 != a1_) { // Configure the linear solver, including the system matrix and also the matrix // (matrices) from which the preconditioner will be constructed. kspA = ConfigureLinearSolver(a0, a1); - - // XX TODO WORKING: REMOVE THE BELOW IF THIS WORKS... - - // A = GetSystemMatrix(a0, a1); - // GetPreconditionerMatrix(a0, a1, P, AuxP); - // auto *gmg = dynamic_cast(pcA.get()); - // if (gmg) - // { - // gmg->SetOperator(P, &AuxP); - // } - // else - // { - // pcA->SetOperator(*P.back()); - // } - a0_ = a0; a1_ = a1; k = 0.0; } FormRHS(u, du, RHS); kspA->Mult(RHS, k); - if (!kspA->GetConverged()) - { - Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - kspA->GetNumIterations()); - } - kspA_mult++; - kspA_it += kspA->GetNumIterations(); } }; @@ -332,23 +191,19 @@ TimeOperator::TimeOperator(const IoData &iodata, SpaceOperator &spaceop, op = std::make_unique(iodata, spaceop, djcoef, 0.0, type); } -int TimeOperator::GetTotalKspMult() const -{ - const auto &curlcurl = dynamic_cast(*op); - return curlcurl.GetNumMult() + curlcurl.GetNumImplicitSolve(); -} - -int TimeOperator::GetTotalKspIter() const +const KspSolver &TimeOperator::GetLinearSolver() const { const auto &curlcurl = dynamic_cast(*op); - return curlcurl.GetNumMultIter() + curlcurl.GetNumImplicitSolveIter(); + MFEM_VERIFY(curlcurl.kspA, + "No linear solver for time-depdendent operator has been constructed!\n"); + return *curlcurl.kspA; } double TimeOperator::GetMaxTimeStep() const { const auto &curlcurl = dynamic_cast(*op); - const ParOperator &M = curlcurl.GetM(); - const ParOperator &K = curlcurl.GetK(); + const ParOperator &M = *curlcurl.M; + const ParOperator &K = *curlcurl.K; // Solver for M⁻¹. constexpr double lin_tol = 1.0e-9; diff --git a/palace/models/timeoperator.hpp b/palace/models/timeoperator.hpp index 57c56bcf1..ede63b4d1 100644 --- a/palace/models/timeoperator.hpp +++ b/palace/models/timeoperator.hpp @@ -14,6 +14,7 @@ namespace palace { class IoData; +class KspSolver; class SpaceOperator; // @@ -43,13 +44,11 @@ class TimeOperator const Vector &GetEdot() const { return dE; } const Vector &GetB() const { return B; } - // Is time integration scheme explicit or implicit. - bool isExplicit() const { return op->isExplicit(); } + // Return the linear solver associated with the implicit or explicit time integrator. + const KspSolver &GetLinearSolver() const; - // Return number of linear solves and linear solver iterations performed during time - // integration. - int GetTotalKspMult() const; - int GetTotalKspIter() const; + // Return if the time integration scheme explicit or implicit. + bool isExplicit() const { return op->isExplicit(); } // Estimate the maximum stable time step based on the maximum eigenvalue of the // undamped system matrix M⁻¹ K. From bcf744435eb7712585058abac3127938e47f0501 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 16 May 2023 11:59:02 -0700 Subject: [PATCH 05/41] Integrate new linear algebra with driven (no adaptive for now) and eigenmode simulation types Eigenvalue solver updates applied to SLEPc and ARPACK, not FEAST yet. Need to incorporate into wave ports too. --- palace/drivers/drivensolver.cpp | 205 ++- palace/drivers/eigensolver.cpp | 459 +++---- palace/drivers/electrostaticsolver.cpp | 2 +- palace/linalg/CMakeLists.txt | 1 - palace/linalg/ams.cpp | 4 +- palace/linalg/ams.hpp | 6 +- palace/linalg/arpack.cpp | 980 ++++++-------- palace/linalg/arpack.hpp | 161 +-- palace/linalg/chebyshev.cpp | 41 +- palace/linalg/complex.cpp | 89 +- palace/linalg/complex.hpp | 29 +- palace/linalg/{eigen.hpp => eps.hpp} | 62 +- palace/linalg/feast.hpp | 2 +- palace/linalg/ksp.cpp | 7 +- palace/linalg/operator.cpp | 13 + palace/linalg/operator.hpp | 2 +- palace/linalg/petsc.cpp | 2 - palace/linalg/petsc.hpp | 1168 ++++++++--------- palace/linalg/slepc.cpp | 1611 +++++++++++++----------- palace/linalg/slepc.hpp | 334 ++--- palace/linalg/strumpack.cpp | 7 +- palace/linalg/strumpack.hpp | 3 - palace/linalg/vector.cpp | 4 +- palace/linalg/vector.hpp | 6 +- palace/main.cpp | 19 +- palace/models/postoperator.cpp | 25 +- palace/models/postoperator.hpp | 21 +- palace/models/spaceoperator.cpp | 26 +- palace/models/spaceoperator.hpp | 5 + palace/models/timeoperator.cpp | 16 +- palace/models/waveportoperator.cpp | 446 +++---- palace/models/waveportoperator.hpp | 22 +- 32 files changed, 2814 insertions(+), 2964 deletions(-) rename palace/linalg/{eigen.hpp => eps.hpp} (63%) diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index 4a1a51227..9b209eea7 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -8,7 +8,6 @@ #include "linalg/complex.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" -#include "linalg/petsc.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" #include "models/romoperator.hpp" @@ -116,11 +115,6 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in // Because the Dirichlet BC is always homogenous, no special elimination is required on // the RHS. Assemble the linear system for the initial frequency (so we can call // KspSolver::SetOperators). Compute everything at the first frequency step. - - // XX TODO WIP - // std::unique_ptr A = spaceop.GetSystemMatrixPetsc( - // SpaceOperator::OperatorType::COMPLETE, omega0, mfem::Operator::DIAG_ONE); - std::unique_ptr K = spaceop.GetComplexSystemMatrix( SpaceOperator::OperatorType::STIFFNESS, Operator::DIAG_ONE); std::unique_ptr M = spaceop.GetComplexSystemMatrix( @@ -139,19 +133,14 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in std::vector> P, AuxP; spaceop.GetPreconditionerMatrix(1.0, omega0, -omega0 * omega0, omega0, P, AuxP); - // KspPreconditioner pc(iodata, spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), - // &spaceop.GetH1Spaces()); - // pc.SetOperator(P, &AuxP); - - // KspSolver ksp(A->GetComm(), iodata, "ksp_"); - // ksp.SetPreconditioner(pc); //XX TODO! - // ksp.SetOperator(*A); + ComplexKspSolver ksp(iodata, spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); + ksp.SetOperator(*A, P, &AuxP); // Set up RHS vector for the incident field at port boundaries, and the vector for the // first frequency step. ComplexVector RHS(Curl->Width()), E(Curl->Width()), B(Curl->Height()); - E = 0.0; - B = 0.0; + E = std::complex(0.0, 0.0); + B = std::complex(0.0, 0.0); timer.construct_time += timer.Lap(); // Main frequency sweep loop. @@ -160,34 +149,35 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in auto t0 = timer.Now(); while (step < nstep) { - // const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega); - // Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1, - // nstep, freq, Timer::Duration(timer.Now() - t0).count()); + const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega); + Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1, + nstep, freq, Timer::Duration(timer.Now() - t0).count()); - // // Assemble the linear system and solve. - // if (step > step0) - // { - // // Update frequency-dependent excitation and operators. - // A = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::COMPLETE, omega, - // mfem::Operator::DIAG_ONE, false); - // spaceop.GetPreconditionerMatrix(omega, P, AuxP, false); - // pc.SetOperator(P, &AuxP); - // ksp.SetOperator(*A); - // } - // spaceop.GetFreqDomainExcitationVector(omega, RHS); - // timer.construct_time += timer.Lap(); + // Assemble the linear system and solve. + if (step > step0) + { + // Update frequency-dependent excitation and operators. + A2 = spaceop.GetComplexSystemMatrix(SpaceOperator::OperatorType::EXTRA, omega, + Operator::DIAG_ZERO); + A = spaceop.GetComplexSystemMatrix(1.0, 1i * omega, -omega * omega, K.get(), C.get(), + M.get(), A2.get()); + spaceop.GetPreconditionerMatrix(1.0, omega, -omega * omega, omega, P, AuxP); + ksp.SetOperator(*A, P, &AuxP); + } + spaceop.GetExcitationVector(omega, RHS); + timer.construct_time += timer.Lap(); - // Mpi::Print("\n"); - // ksp.Mult(RHS, E); - // timer.solve_time += timer.Lap(); + Mpi::Print("\n"); + ksp.Mult(RHS, E); + timer.solve_time += timer.Lap(); // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in // PostOperator for all postprocessing operations. double E_elec = 0.0, E_mag = 0.0; Curl->Mult(E, B); B *= -1.0 / (1i * omega); - // postop.SetEGridFunction(E); //XX TODO petsc::PetscParVector - // postop.SetBGridFunction(B); + postop.SetEGridFunction(E); + postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n", E.Norml2(), RHS.Norml2()); if (!iodata.solver.driven.only_port_post) @@ -209,7 +199,7 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in step++; omega += delta_omega; } - // SaveMetadata(ksp.GetTotalNumMult(), ksp.GetTotalNumIter()); //XX TODO + SaveMetadata(ksp); } void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, int nstep, @@ -248,77 +238,76 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // high-dimensional field solution. std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); ComplexVector E(Curl->Width()), B(Curl->Height()); - E = 0.0; - B = 0.0; - - // // Configure the PROM operator which performs the parameter space sampling and basis - // // construction during the offline phase as well as the PROM solution during the online - // // phase. Initialize the basis with samples from the top and bottom of the frequency - // // range of interest. Each call for an HDM solution adds the frequency sample to P_S - // and - // // removes it from P \ P_S. - // timer.construct_time += timer.Lap(); - // Timer local_timer; - // Mpi::Print("\nBeginning PROM construction offline phase:\n" - // " {:d} points for frequency sweep over [{:.3e}, {:.3e}] GHz\n", - // nstep - step0, - // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega0), - // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, - // omega0 + (nstep - step0 - 1) * delta_omega)); - // spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port stuff for - // offline RomOperator prom(iodata, spaceop, nmax); prom.Initialize(nstep - step0, omega0, - // delta_omega); local_timer.construct_time += local_timer.Lap(); + E = std::complex(0.0, 0.0); + B = std::complex(0.0, 0.0); + + // Configure the PROM operator which performs the parameter space sampling and basis + // construction during the offline phase as well as the PROM solution during the online + // phase. Initialize the basis with samples from the top and bottom of the frequency + // range of interest. Each call for an HDM solution adds the frequency sample to P_S and + // removes it from P \ P_S. + timer.construct_time += timer.Lap(); + Timer local_timer; + Mpi::Print("\nBeginning PROM construction offline phase:\n" + " {:d} points for frequency sweep over [{:.3e}, {:.3e}] GHz\n", + nstep - step0, + iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega0), + iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, + omega0 + (nstep - step0 - 1) * delta_omega)); + spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port stuff for offline + // RomOperator prom(iodata, spaceop, nmax); + // prom.Initialize(nstep - step0, omega0, delta_omega); //XX TODO PROM + local_timer.construct_time += local_timer.Lap(); // prom.SolveHDM(omega0, E, true); // Print matrix stats at first HDM solve // prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E, false); - // local_timer.solve_time += local_timer.Lap(); + // local_timer.solve_time += local_timer.Lap(); //XX TODO PROM - // // Greedy procedure for basis construction (offline phase). Basis is initialized with - // // solutions at frequency sweep endpoints. + // Greedy procedure for basis construction (offline phase). Basis is initialized with + // solutions at frequency sweep endpoints. // int iter = static_cast(prom.GetSampleFrequencies().size()), iter0 = iter; // double max_error = 1.0; - // while (true) - // { - // // Compute maximum error in parameter domain with current PROM. - // double omega_star; - // max_error = prom.ComputeMaxError(ncand, omega_star); - // local_timer.construct_time += local_timer.Lap(); - // if (max_error < offline_tol || iter == nmax) - // { - // break; - // } - - // // Sample HDM and add solution to basis. - // Mpi::Print( - // "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n", - // iter - iter0 + 1, prom.GetReducedDimension(), - // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega_star), omega_star, - // max_error); - // prom.SolveHDM(omega_star, E); - // local_timer.solve_time += local_timer.Lap(); - // iter++; - // } - // { - // std::vector samples(prom.GetSampleFrequencies()); - // // samples.Sort(); - // for (auto &sample : samples) - // { - // sample = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, sample); - // } - // Mpi::Print("\nAdaptive sampling{} {:d} frequency samples:\n" - // " n = {:d}, error = {:.3e}, tol = {:.3e}\n", - // (iter == nmax) ? " reached maximum" : " converged with", iter, - // prom.GetReducedDimension(), max_error, offline_tol); - // utils::PrettyPrint(samples, " Sampled frequencies (GHz):"); - // } - // SaveMetadata(prom.GetTotalKspMult(), prom.GetTotalKspIter()); - // const auto local_construction_time = timer.Lap(); - // timer.construct_time += local_construction_time; - // Mpi::Print(" Total offline phase elapsed time: {:.2e} s\n" - // " Parameter space sampling: {:.2e} s, HDM solves: {:.2e} s\n", - // Timer::Duration(local_construction_time).count(), - // Timer::Duration(local_timer.construct_time).count(), - // Timer::Duration(local_timer.solve_time).count()); // Timings on rank 0 + while (true) + { + // Compute maximum error in parameter domain with current PROM. + // double omega_star; + // max_error = prom.ComputeMaxError(ncand, omega_star); //XX TODO PROM + // local_timer.construct_time += local_timer.Lap(); + // if (max_error < offline_tol || iter == nmax) + // { + // break; + // } + + // Sample HDM and add solution to basis. + // Mpi::Print( + // "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n", + // iter - iter0 + 1, prom.GetReducedDimension(), + // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega_star), omega_star, + // max_error); + // prom.SolveHDM(omega_star, E); //XX TODO PROM + // local_timer.solve_time += local_timer.Lap(); + // iter++; + } + { + // std::vector samples(prom.GetSampleFrequencies()); + // for (auto &sample : samples) + // { + // sample = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, sample); + // } + // Mpi::Print("\nAdaptive sampling{} {:d} frequency samples:\n" + // " n = {:d}, error = {:.3e}, tol = {:.3e}\n", + // (iter == nmax) ? " reached maximum" : " converged with", iter, + // prom.GetReducedDimension(), max_error, offline_tol); //XX TODO PROM + // utils::PrettyPrint(samples, " Sampled frequencies (GHz):"); + } + // SaveMetadata(prom.GetLinearSolver()); //XX TODO PROM + const auto local_construction_time = timer.Lap(); + timer.construct_time += local_construction_time; + Mpi::Print(" Total offline phase elapsed time: {:.2e} s\n" + " Parameter space sampling: {:.2e} s, HDM solves: {:.2e} s\n", + Timer::Duration(local_construction_time).count(), + Timer::Duration(local_timer.construct_time).count(), + Timer::Duration(local_timer.solve_time).count()); // Timings on rank 0 // Main fast frequency sweep loop (online phase). Mpi::Print("\nBeginning fast frequency sweep online phase\n"); @@ -332,15 +321,13 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1, nstep, freq, Timer::Duration(timer.Now() - t0).count()); - // XX TODO PROM - - // // Assemble the linear system and solve. - // prom.AssemblePROM(omega); - // timer.construct_time += timer.Lap(); + // Assemble the linear system and solve. + // prom.AssemblePROM(omega); //XX TODO PROM + timer.construct_time += timer.Lap(); - // Mpi::Print("\n"); - // prom.SolvePROM(E); - // timer.solve_time += timer.Lap(); + Mpi::Print("\n"); + // prom.SolvePROM(E); //XX TODO PROM + timer.solve_time += timer.Lap(); // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in // PostOperator for all postprocessing operations. diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 16a776b5a..a4f3b4dde 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -10,7 +10,6 @@ #include "linalg/feast.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" -#include "linalg/petsc.hpp" #include "linalg/slepc.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" @@ -50,10 +49,7 @@ void EigenSolver::Solve(std::vector> &mesh, // with λ = iω. A shift-and-invert strategy is employed to solve for the eigenvalues // closest to the specified target, σ. In general, the system matrices are complex and // symmetric. - - // XX TODO - // std::unique_ptr eigen; - + std::unique_ptr eigen; config::EigenSolverData::Type type = iodata.solver.eigenmode.type; #if defined(PALACE_WITH_ARPACK) && defined(PALACE_WITH_SLEPC) if (type == config::EigenSolverData::Type::DEFAULT) @@ -79,100 +75,94 @@ void EigenSolver::Solve(std::vector> &mesh, #else #error "Eigenmode solver requires building with ARPACK or SLEPc!" #endif - - // XX TODO REVISIT... - - // if (type == config::EigenSolverData::Type::FEAST) - // { - // Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); - // #if defined(PALACE_WITH_SLEPC) - // if (C) - // { - // eigen = std::make_unique( - // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - // iodata.problem.verbose); - // } - // else - // { - // eigen = std::make_unique( - // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - // iodata.problem.verbose); - // } - // #endif - // } - // else if (type == config::EigenSolverData::Type::ARPACK) - // { - // Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); - // #if defined(PALACE_WITH_ARPACK) - // if (C) - // { - // eigen = std::make_unique(iodata.problem.verbose); - // } - // else - // { - // eigen = std::make_unique(iodata.problem.verbose); - // } - // #endif - // } - // else // config::EigenSolverData::Type::SLEPC - // { - // Mpi::Print("\nConfiguring SLEPc eigenvalue solver\n"); - // #if defined(PALACE_WITH_SLEPC) - // std::unique_ptr slepc; - // if (C) - // { - // if (!iodata.solver.eigenmode.pep_linear) - // { - // slepc = - // std::make_unique(K->GetComm(), - // iodata.problem.verbose); - // slepc->SetType(slepc::SlepcEigenSolver::Type::TOAR); - // } - // else - // { - // slepc = std::make_unique(K->GetComm(), - // iodata.problem.verbose); - // slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); - // } - // } - // else - // { - // slepc = std::make_unique(K->GetComm(), - // iodata.problem.verbose); - // slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); - // } - // slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); - // slepc->SetOrthogonalization(iodata.solver.linear.orthog_mgs, - // iodata.solver.linear.orthog_cgs2); - // eigen = std::move(slepc); - // #endif - // } - // EigenSolverBase::ScaleType scale = iodata.solver.eigenmode.scale - // ? EigenSolverBase::ScaleType::NORM_2 - // : EigenSolverBase::ScaleType::NONE; - - // XX TODO REVISIT BELOW... - - // if (C) - // { - // eigen->SetOperators(*K, *C, *M, scale); - // } - // else - // { - // eigen->SetOperators(*K, *M, scale); - // } - // eigen->SetNumModes(iodata.solver.eigenmode.n, iodata.solver.eigenmode.max_size); - // eigen->SetTol(iodata.solver.eigenmode.tol); - // eigen->SetMaxIter(iodata.solver.eigenmode.max_it); - // Mpi::Print(" Scaling γ = {:.3e}, δ = {:.3e}\n", eigen->GetScalingGamma(), - // eigen->GetScalingDelta()); - - // const double target = iodata.solver.eigenmode.target; - // const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, - // target); std::unique_ptr A; - // std::vector> P, AuxP; - // std::unique_ptr ksp; - // std::unique_ptr pc; + if (type == config::EigenSolverData::Type::FEAST) + { + MFEM_ABORT("FEAST eigenvalue solver is currently not supported!"); + // Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); + // #if defined(PALACE_WITH_SLEPC) + // if (C) + // { + // eigen = std::make_unique( + // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, + // iodata.problem.verbose); + // } + // else + // { + // eigen = std::make_unique( + // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, + // iodata.problem.verbose); + // } + // #endif + } + else if (type == config::EigenSolverData::Type::ARPACK) + { + Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); +#if defined(PALACE_WITH_ARPACK) + if (C) + { + eigen = + std::make_unique(K->GetComm(), iodata.problem.verbose); + } + else + { + eigen = + std::make_unique(K->GetComm(), iodata.problem.verbose); + } +#endif + } + else // config::EigenSolverData::Type::SLEPC + { + Mpi::Print("\nConfiguring SLEPc eigenvalue solver\n"); +#if defined(PALACE_WITH_SLEPC) + std::unique_ptr slepc; + if (C) + { + if (!iodata.solver.eigenmode.pep_linear) + { + slepc = + std::make_unique(K->GetComm(), iodata.problem.verbose); + slepc->SetType(slepc::SlepcEigenSolver::Type::TOAR); + } + else + { + slepc = std::make_unique(K->GetComm(), + iodata.problem.verbose); + slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + } + } + else + { + slepc = std::make_unique(K->GetComm(), iodata.problem.verbose); + slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + } + slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); + slepc->SetOrthogonalization(iodata.solver.linear.orthog_mgs, + iodata.solver.linear.orthog_cgs2); + eigen = std::move(slepc); +#endif + } + EigenvalueSolver::ScaleType scale = iodata.solver.eigenmode.scale + ? EigenvalueSolver::ScaleType::NORM_2 + : EigenvalueSolver::ScaleType::NONE; + if (C) + { + eigen->SetOperators(*K, *C, *M, scale); + } + else + { + eigen->SetOperators(*K, *M, scale); + } + eigen->SetNumModes(iodata.solver.eigenmode.n, iodata.solver.eigenmode.max_size); + eigen->SetTol(iodata.solver.eigenmode.tol); + eigen->SetMaxIter(iodata.solver.eigenmode.max_it); + Mpi::Print(" Scaling γ = {:.3e}, δ = {:.3e}\n", eigen->GetScalingGamma(), + eigen->GetScalingDelta()); + + const double target = iodata.solver.eigenmode.target; + const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, target); + std::unique_ptr A; + std::vector> P, AuxP; + std::unique_ptr ksp; // #if defined(PALACE_WITH_SLEPC) // auto *feast = dynamic_cast(eigen.get()); // if (feast) @@ -226,156 +216,131 @@ void EigenSolver::Solve(std::vector> &mesh, // } // else // #endif - // { - // Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); - // if (C) - // { - // // Search for eigenvalues closest to λ = iσ. - // eigen->SetShiftInvert(0.0, target); - // if (type == config::EigenSolverData::Type::ARPACK) - // { - // // ARPACK searches based on eigenvalues of the transformed problem. The - // eigenvalue - // // 1/(λ-σ) will be a large-magnitude negative imaginary number for an - // eigenvalue λ - // // with frequency close to but not below the target σ. - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::SMALLEST_IMAGINARY); - // } - // else - // { - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_IMAGINARY); - // } - // // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); - // } - // else - // { - // // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = - // σ². eigen->SetShiftInvert(target * target, 0.0); if (type == - // config::EigenSolverData::Type::ARPACK) - // { - // // ARPACK searches based on eigenvalues of the transformed problem. 1/(μ-σ²) - // will be - // // a large-magnitude positive real number for an eigenvalue μ with frequency - // close - // // to but below the target σ². - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_REAL); - // } - // else - // { - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_REAL); - // } - // // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); - // } - - // // Set up the linear solver required for solving systems involving the shifted - // operator - // // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The - // // preconditioner for complex linear systems is constructed from a real - // approximation - // // to the complex system matrix. + { + Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); + if (C) + { + // Search for eigenvalues closest to λ = iσ. + eigen->SetShiftInvert(0.0, target); + if (type == config::EigenSolverData::Type::ARPACK) + { + // ARPACK searches based on eigenvalues of the transformed problem. The eigenvalue + // 1 / (λ - σ) will be a large-magnitude negative imaginary number for an eigenvalue + // λ with frequency close to but not below the target σ. + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::SMALLEST_IMAGINARY); + } + else + { + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::TARGET_IMAGINARY); + } + } + else + { + // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = σ². + eigen->SetShiftInvert(target * target, 0.0); + if (type == config::EigenSolverData::Type::ARPACK) + { + // ARPACK searches based on eigenvalues of the transformed problem. 1 / (μ - σ²) + // will be a large-magnitude positive real number for an eigenvalue μ with frequency + // close to but below the target σ². + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::LARGEST_REAL); + } + else + { + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::TARGET_REAL); + } + } - // // XX TODO REPLACE... WITH SUM OPERATOR! + // Set up the linear solver required for solving systems involving the shifted operator + // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The + // preconditioner for complex linear systems is constructed from a real approximation + // to the complex system matrix. + A = spaceop.GetComplexSystemMatrix(1.0, 1i * target, -target * target, K.get(), C.get(), + M.get(), nullptr); - // A = utils::GetSystemMatrixShell(target, *K, *M, C.get()); + spaceop.GetPreconditionerMatrix(1.0, target, -target * target, target, P, AuxP); - // // XX TODO MOVE THIS FOR LOG FILE... - // spaceop.GetPreconditionerMatrix(target, P, AuxP); + ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), + &spaceop.GetH1Spaces()); + ksp->SetOperator(*A, P, &AuxP); + eigen->SetLinearSolver(*ksp); - // pc = std::make_unique(iodata, spaceop.GetDbcMarker(), - // spaceop.GetNDSpaces(), - // &spaceop.GetH1Spaces()); - // pc->SetOperator(P, &AuxP); + // XX TODO REVISIT FOR LOG PRINT FORMATTING... + } - // ksp = std::make_unique(A->GetComm(), iodata, "ksp_"); - // ksp->SetPreconditioner(*pc); - // ksp->SetOperator(*A); - // ksp->SetTabLevel(1); - // eigen->SetLinearSolver(*ksp); - // } + // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The + // constructed matrix just references the real SPD part of the mass matrix (no copy is + // performed). + std::unique_ptr Mr; + if (iodata.solver.eigenmode.mass_orthog) + { + // Mpi::Print(" Basis uses M-inner product\n"); + // Mr = std::make_unique(M->Real(), 1.0); + // eigen->SetBMat(*Mr); + + Mpi::Print(" Basis uses (K + M)-inner product\n"); + auto KM = std::make_unique(M->Real(), 1.0); + KM->AddOperator(K->Real(), 1.0); + Mr = std::move(KM); + eigen->SetBMat(*Mr); + } - // // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The - // // constructed matrix just references the real SPD part of the mass matrix (no copy is - // // performed). - // std::unique_ptr Mr; - // if (iodata.solver.eigenmode.mass_orthog) - // { - // // Mpi::Print(" Basis uses M-inner product\n"); - // // Mr = std::make_unique( - // // mesh.back()->GetComm(), - // // - // std::make_unique(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL))); - - // Mpi::Print(" Basis uses (K + M)-inner product\n"); - // auto KM = std::make_unique(K->GetNumRows(), K->GetNumCols()); - // KM->AddOperator(*K->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - // KM->AddOperator(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - // Mr = std::make_unique(mesh.back()->GetComm(), - // std::move(KM)); - - // Mr->SetRealSymmetric(); - // eigen->SetBMat(*Mr); - // } + // Construct a divergence-free projector so the eigenvalue solve is performed in the space + // orthogonal to the zero eigenvalues of the stiffness matrix. + std::unique_ptr divfree; + if (iodata.solver.linear.divfree_max_it > 0) + { + constexpr int divfree_verbose = 0; + divfree = std::make_unique( + spaceop.GetMaterialOp(), spaceop.GetNDSpace(), spaceop.GetH1Spaces(), + spaceop.GetAuxBdrTDofLists(), iodata.solver.linear.divfree_tol, + iodata.solver.linear.divfree_max_it, divfree_verbose); + eigen->SetDivFreeProjector(*divfree); + } - // // Construct a divergence-free projector so the eigenvalue solve is performed in the - // space - // // orthogonal to the zero eigenvalues of the stiffness matrix. - // std::unique_ptr divfree; - // if (iodata.solver.linear.divfree_max_it > 0) - // { - // constexpr int divfree_verbose = 0; - // divfree = std::make_unique( - // spaceop.GetMaterialOp(), spaceop.GetAuxBdrMarker(), spaceop.GetNDSpace(), - // spaceop.GetH1Spaces(), iodata.solver.linear.divfree_tol, - // iodata.solver.linear.divfree_max_it, divfree_verbose); - // eigen->SetProjector(*divfree); - // } + // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and is + // projected appropriately. + if (iodata.solver.eigenmode.init_v0) + { + ComplexVector v0; + if (iodata.solver.eigenmode.init_v0_const) + { + Mpi::Print(" Using constant starting vector\n"); + spaceop.GetConstantInitialVector(v0); + } + else + { + Mpi::Print(" Using random starting vector\n"); + spaceop.GetRandomInitialVector(v0); + } + if (divfree) + { + divfree->Mult(v0); + } + eigen->SetInitialSpace(v0); // Copies the vector - // // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and - // is - // // projected appropriately. - // if (iodata.solver.eigenmode.init_v0) - // { - // petsc::PetscParVector v0(*K); - // if (iodata.solver.eigenmode.init_v0_const) - // { - // Mpi::Print(" Using constant starting vector\n"); - // v0 = 1.0; - // } - // else - // { - // Mpi::Print(" Using random starting vector\n"); - // v0.SetRandom(); - // } - // v0.ZeroRows(spaceop.GetDbcTDofList()); - // if (divfree) - // { - // divfree->Mult(v0); - // } - // eigen->SetInitialSpace(v0); // Copies the vector - // // { - // // std::unique_ptr Grad = spaceop.GetGradMatrixPetsc(); - // // petsc::PetscParVector r0(*Grad, false); - // // Grad->MultTranspose(v0, r0); - // // r0.Print(); - // // } - // } - // timer.construct_time += timer.Lap(); + // Debug + // auto Grad = spaceop.GetComplexGradMatrix(); + // ComplexVector r0(Grad->Width()); + // Grad->MultTranspose(v0, r0); + // r0.Print(); + } + timer.construct_time += timer.Lap(); // Eigenvalue problem solve. Mpi::Print("\n"); - int num_conv = 0; - // int num_conv = eigen->Solve(); - // #if defined(PALACE_WITH_SLEPC) - // if (!ksp) - // { - // const auto &feast = dynamic_cast(*eigen); - // SaveMetadata(feast.GetTotalKspMult(), feast.GetTotalKspIter()); - // } - // else - // #endif - // { - // SaveMetadata(ksp->GetTotalNumMult(), ksp->GetTotalNumIter()); - // } + int num_conv = eigen->Solve(); +#if defined(PALACE_WITH_SLEPC) + // auto *feast = dynamic_cast(eigen.get()); + // if (feast) + // { + // SaveMetadata(feast->GetLinearSolver()); + // } + // else +#endif + { + SaveMetadata(*ksp); + } timer.solve_time += timer.Lap(); // Postprocess the results. @@ -383,13 +348,9 @@ void EigenSolver::Solve(std::vector> &mesh, for (int i = 0; i < num_conv; i++) { // Get the eigenvalue and relative error. - double real, imag, error1, error2; - std::complex omega; - // eigen->GetEigenvalue(i, real, imag); //XX TODO EIGENVALUE SOLVES... - // eigen->GetError(i, EigenSolverBase::ErrorType::BACKWARD, error1); - // eigen->GetError(i, EigenSolverBase::ErrorType::ABSOLUTE, error2); - omega.real(real); - omega.imag(imag); + std::complex omega = eigen->GetEigenvalue(i); + double error1 = eigen->GetError(i, EigenvalueSolver::ErrorType::BACKWARD); + double error2 = eigen->GetError(i, EigenvalueSolver::ErrorType::ABSOLUTE); if (!C) { // Linear EVP has eigenvalue μ = -λ² = ω². @@ -403,17 +364,17 @@ void EigenSolver::Solve(std::vector> &mesh, if (i == 0) { Mpi::Print(" Found {:d} converged eigenvalue{} (first = {:.3e}{:+.3e}i)\n", num_conv, - (num_conv > 1) ? "s" : "", real, imag); + (num_conv > 1) ? "s" : "", omega.real(), omega.imag()); Mpi::Print("\n"); } // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in // PostOperator for all postprocessing operations. - // eigen->GetEigenvector(i, E); //XX TODO petsc::PetscParVector + eigen->GetEigenvector(i, E); Curl->Mult(E, B); B *= -1.0 / (1i * omega); - // postop.SetEGridFunction(E); //XX TODO petsc::PetscParVector - // postop.SetBGridFunction(B); + postop.SetEGridFunction(E); + postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), omega.real()); // Postprocess the mode. diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp index 111b90798..15b8e879a 100644 --- a/palace/drivers/electrostaticsolver.cpp +++ b/palace/drivers/electrostaticsolver.cpp @@ -46,7 +46,7 @@ void ElectrostaticSolver::Solve(std::vector> &mes // Main loop over terminal boundaries. Mpi::Print("\nComputing electrostatic fields for {:d} terminal boundar{}\n", nstep, (nstep > 1) ? "ies" : "y"); - int step = 0, ksp_it = 0; + int step = 0; auto t0 = timer.Now(); for (const auto &[idx, data] : laplaceop.GetSources()) { diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt index 8668c6831..b3b04a0ee 100644 --- a/palace/linalg/CMakeLists.txt +++ b/palace/linalg/CMakeLists.txt @@ -22,7 +22,6 @@ target_sources(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/ksp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/operator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/petsc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/slepc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/strumpack.cpp ${CMAKE_CURRENT_SOURCE_DIR}/superlu.cpp diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 4966c4276..bed73f212 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -9,7 +9,7 @@ namespace palace HypreAmsSolver::HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, mfem::ParFiniteElementSpace &h1_fespace, int cycle_it, int smooth_it, int agg_coarsen, bool vector_interp, - bool op_singular, int print_lvl) + bool op_singular, int print) : mfem::HypreSolver(), // From the Hypre docs for AMS: cycles 1, 5, 8, 11, 13 are fastest, 7 yields fewest its // (MFEM default is 13). 14 is similar to 11/13 but is cheaper in that is uses additive @@ -24,7 +24,7 @@ HypreAmsSolver::HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, amg_agg_levels(agg_coarsen), // If we know the operator is singular (no mass matrix, for magnetostatic problems), // internally the AMS solver will avoid G-space corrections. - ams_singular(op_singular), print((print_lvl > 1) ? print_lvl - 1 : 0) + ams_singular(op_singular), print((print > 1) ? print - 1 : 0) { // From MFEM: The AMS preconditioner may sometimes require inverting singular matrices // with BoomerAMG, which are handled correctly in Hypre's Solve method, but can produce diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp index a2ebbc328..385261f7e 100644 --- a/palace/linalg/ams.hpp +++ b/palace/linalg/ams.hpp @@ -50,9 +50,9 @@ class HypreAmsSolver : public mfem::HypreSolver // internally as needed. HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, mfem::ParFiniteElementSpace &h1_fespace, int cycle_it, int smooth_it, - int agg_coarsen, bool vector_interp, bool op_singular, int print_lvl); + int agg_coarsen, bool vector_interp, bool op_singular, int print); HypreAmsSolver(const IoData &iodata, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, int print_lvl) + mfem::ParFiniteElementSpace &h1_fespace, int print) : HypreAmsSolver(nd_fespace, h1_fespace, iodata.solver.linear.mat_gmg ? 1 : iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, @@ -62,7 +62,7 @@ class HypreAmsSolver : public mfem::HypreSolver : 0, iodata.solver.linear.ams_vector, (iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC), - print_lvl) + print) { } ~HypreAmsSolver() override; diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp index 37ddaf752..681a6eeaf 100644 --- a/palace/linalg/arpack.cpp +++ b/palace/linalg/arpack.cpp @@ -3,8 +3,6 @@ #include "arpack.hpp" -#if 0 // XX TODO DISABLE ARPACK FOR NOW - #if defined(PALACE_WITH_ARPACK) #if defined(__GNUC__) && defined(__clang__) @@ -22,106 +20,222 @@ // clang-format on #include "linalg/divfree.hpp" #include "linalg/ksp.hpp" -#include "linalg/petsc.hpp" +#include "linalg/vector.hpp" #include "utils/communication.hpp" +namespace +{ + +void CheckInfoAUPD(a_int info) +{ + if (info != 0) + { + std::string msg = "ARPACK pznaupd error: "; + switch (info) + { + case 1: + msg += "Maximum number of iterations taken, all possible eigenvalues " + "have been found!"; + break; + case 2: + msg += "No longer an informational error (deprecated starting with " + "release 2 of ARPACK)!"; + break; + case 3: + msg += "No shifts could be applied during a cycle of the Implicitly " + "restarted Arnoldi iteration!"; + break; + case -1: + msg += "N must be positive!"; + break; + case -2: + msg += "NEV must be positive!"; + break; + case -3: + msg += "NCV-NEV >= 2 and less than or equal to N!"; + break; + case -4: + msg += "The maximum number of Arnoldi update iterations allowed must " + "be greater than zero!"; + break; + case -5: + msg += "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"; + break; + case -6: + msg += "BMAT must be one of 'I' or 'G'!"; + break; + case -7: + msg += "Length of private work array WORKL is not sufficient!"; + break; + case -8: + msg += "Error return from LAPACK eigenvalue calculation!"; + break; + case -9: + msg += "Starting vector is zero!"; + break; + case -10: + msg += "IPARAM(7) must be 1, 2, or 3!"; + break; + case -11: + msg += "IPARAM(7) = 1 and BMAT = 'G' are incompatible!"; + break; + case -12: + msg += "IPARAM(1) must be equal to 0 or 1!"; + break; + case -9999: + msg += "Could not build an Arnoldi factorization!"; + break; + default: + msg += "Unknown ARPACK error message!"; + break; + } + MFEM_ABORT(msg.c_str()); + } +} + +void CheckInfoEUPD(a_int info) +{ + if (info != 0) + { + std::string msg = "ARPACK pzneupd error: "; + switch (info) + { + case 1: + msg += "The Schur form computed by LAPACK routine csheqr could not " + "be reordered by LAPACK routine ztrsen!"; + break; + case -1: + msg += "N must be positive!"; + break; + case -2: + msg += "NEV must be positive!"; + break; + case -3: + msg += "NCV-NEV >= 2 and less than or equal to N!"; + break; + case -4: + msg += "The maximum number of Arnoldi update iterations allowed must " + "be greater than zero!"; + break; + case -5: + msg += "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"; + break; + case -6: + msg += "BMAT must be one of 'I' or 'G'!"; + break; + case -7: + msg += "Length of private work array WORKL is not sufficient!"; + break; + case -8: + msg += "Error return from LAPACK eigenvalue calculation!"; + break; + case -9: + msg += "Error return from calculation of eigenvectors!"; + break; + case -10: + msg += "IPARAM(7) must be 1, 2, or 3!"; + break; + case -11: + msg += "IPARAM(7) = 1 and BMAT = 'G' are incompatible!"; + break; + case -12: + msg += "HOWMNY = 'S' not yet implemented!"; + break; + case -13: + msg += "HOWMNY must be one of 'A' or 'P' if RVEC = true!"; + break; + case -14: + msg += "PZNAUPD did not find any eigenvalues to sufficient accuracy!"; + break; + case -15: + msg += "ZNEUPD got a different count of the number of converged Ritz " + "values than ZNAUPD got!"; + break; + default: + msg += "Unknown ARPACK error message!"; + break; + } + MFEM_ABORT(msg.c_str()); + } +} + +} // namespace + namespace palace::arpack { // Base class methods -ArpackEigenSolver::ArpackEigenSolver(int print_lvl) +ArpackEigenSolver::ArpackEigenSolver(MPI_Comm comm, int print) : comm(comm), print(print) { // Initialization. - print = print_lvl; info = 0; - nev = ncv = 0; + nev = ncv = n = 0; rtol = 0.0; - max_it = 0; - which_option = ::arpack::which::largest_magnitude; + arpack_it = 0; + which_type = WhichType::LARGEST_MAGNITUDE; + gamma = delta = 1.0; sinvert = false; sigma = 0.0; - gamma = delta = 1.0; - eig = nullptr; - perm = nullptr; - V = nullptr; - res = nullptr; - r = nullptr; opInv = nullptr; opProj = nullptr; opB = nullptr; // Configure debugging output. a_int logfill = 6, ndigit = -6, mgetv0 = 0; - a_int _aupd = (print_lvl > 2) ? 1 : 0, - _aup2 = (print_lvl > 2) ? 2 : ((print_lvl > 0) ? 1 : 0), _aitr = 0, _eigh = 0, - _gets = 0, _apps = 0, _eupd = 0; + a_int _aupd = (print > 2) ? 1 : 0, _aup2 = (print > 2) ? 2 : ((print > 0) ? 1 : 0), + _aitr = 0, _eigh = 0, _gets = 0, _apps = 0, _eupd = 0; debug_c(logfill, ndigit, mgetv0, _aupd, _aup2, _aitr, _eigh, _gets, _apps, _eupd, _aupd, _aup2, _aitr, _eigh, _gets, _apps, _eupd, _aupd, _aup2, _aitr, _eigh, _gets, _apps, _eupd); cstatn_c(); } -ArpackEigenSolver::~ArpackEigenSolver() -{ - delete[] eig; - delete[] perm; - delete[] res; - delete V; - delete r; -} - -void ArpackEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void ArpackEigenSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { MFEM_ABORT("SetOperators not defined for base class ArpackEigenSolver!"); } -void ArpackEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void ArpackEigenSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { MFEM_ABORT("SetOperators not defined for base class ArpackEigenSolver!"); } -void ArpackEigenSolver::SetLinearSolver(const KspSolver &ksp) +void ArpackEigenSolver::SetLinearSolver(const ComplexKspSolver &ksp) { opInv = &ksp; } -void ArpackEigenSolver::SetProjector(const DivFreeSolver &divfree) +void ArpackEigenSolver::SetDivFreeProjector(const DivFreeSolver &divfree) { opProj = &divfree; } -void ArpackEigenSolver::SetBMat(const petsc::PetscParMatrix &B) +void ArpackEigenSolver::SetBMat(const Operator &B) { - MFEM_VERIFY(!opB || opB->GetNumRows() == B.GetNumRows(), + MFEM_VERIFY(!opB || opB->Height() == B.Height(), "Invalid modification of eigenvalue problem size!"); opB = &B; } -void ArpackEigenSolver::SetNumModes(int numeig, int numvec) +void ArpackEigenSolver::SetNumModes(int num_eig, int num_vec) { - if (nev > 0 && numeig != nev) + if (nev > 0 && num_eig != nev) { - delete[] eig; - delete[] perm; - delete[] res; - eig = nullptr; - perm = nullptr; - res = nullptr; + eig.reset(); + perm.reset(); + res.reset(); } - if (ncv > 0 && numvec != ncv) + if (ncv > 0 && num_vec != ncv) { - delete V; - V = nullptr; + V.reset(); } - nev = numeig; - ncv = (numvec > 0) ? numvec : std::max(20, 2 * nev + 1); // Default from SLEPc + nev = num_eig; + ncv = (num_vec > 0) ? num_vec : std::max(20, 2 * nev + 1); // Default from SLEPc } void ArpackEigenSolver::SetTol(double tol) @@ -129,14 +243,56 @@ void ArpackEigenSolver::SetTol(double tol) rtol = tol; } -void ArpackEigenSolver::SetMaxIter(int maxits) +void ArpackEigenSolver::SetMaxIter(int max_it) { - max_it = maxits; + arpack_it = max_it; } -void ArpackEigenSolver::SetWhichEigenpairs(EigenSolverBase::WhichType type) +void ArpackEigenSolver::SetWhichEigenpairs(EigenvalueSolver::WhichType type) { - switch (type) + which_type = type; +} + +void ArpackEigenSolver::SetShiftInvert(std::complex s, bool precond) +{ + MFEM_VERIFY(!precond, "ARPACK eigenvalue solver does not support preconditioned " + "spectral transformation option!"); + sigma = s; + sinvert = true; +} + +void ArpackEigenSolver::SetInitialSpace(const ComplexVector &v) +{ + MFEM_VERIFY( + n > 0, + "Must call SetOperators before using SetInitialSpace for ARPACK eigenvalue solver!"); + if (!r) + { + r = std::make_unique>(n); + } + MFEM_VERIFY(v.Size() == 2 * n, + "Invalid size mismatch for provided initial space vector!"); + v.Get(r.get(), n); + info = 1; +} + +int ArpackEigenSolver::SolveInternal(int n, std::complex *r, + std::complex *V, std::complex *eig, + int *perm) +{ + MPI_Fint fcomm = MPI_Comm_c2f(comm); + a_int iparam[11] = {0}; + iparam[0] = 1; // Exact shifts + iparam[2] = (a_int)arpack_it; // Maximum number of Arnoldi iterations + iparam[3] = 1; // Block size + iparam[4] = 0; // Number of converged Ritz values + iparam[6] = sinvert ? 3 : 1; // Problem mode + + ::arpack::bmat bmat_option = + (opB) ? ::arpack::bmat::generalized : ::arpack::bmat::identity; + + ::arpack::which which_option; + switch (which_type) { case WhichType::LARGEST_MAGNITUDE: case WhichType::TARGET_MAGNITUDE: @@ -161,90 +317,33 @@ void ArpackEigenSolver::SetWhichEigenpairs(EigenSolverBase::WhichType type) case WhichType::TARGET_IMAGINARY: MFEM_ABORT("ARPACK eigenvalue solver does not implement TARGET_REAL or " "TARGET_IMAGINARY for SetWhichEigenpairs!"); + which_option = ::arpack::which::largest_magnitude; // For compiler warning break; } -} - -void ArpackEigenSolver::SetShiftInvert(double tr, double ti, bool precond) -{ - MFEM_VERIFY(!precond, "ARPACK eigenvalue solver does not support preconditioned " - "spectral transformation option!"); - sigma = tr + PETSC_i * ti; - sinvert = true; -} - -void ArpackEigenSolver::SetInitialSpace(const petsc::PetscParVector &v) -{ - if (!r) - { - r = new petsc::PetscParVector(v); - } - else - { - MFEM_VERIFY(v.GetSize() == r->GetSize(), - "Invalid modification of eigenvalue problem size!"); - r->Copy(v); - } - info = 1; -} - -int ArpackEigenSolver::SolveInternal(petsc::PetscParVector &r_, petsc::PetscDenseMatrix &V_, - PetscScalar *eig_, int *perm_) -{ - MPI_Comm comm; - MPI_Fint fcomm; - a_int ido, info_ = (a_int)info; - a_int iparam[11] = {0}, ipntr[14] = {0}; - a_int n, nev_, ncv_; - ::arpack::bmat bmat_option = - (opB) ? ::arpack::bmat::generalized : ::arpack::bmat::identity; - PetscScalar *workd, *workl; - double *rwork; - a_int lworkl; - - comm = r_.GetComm(); - fcomm = MPI_Comm_c2f(comm); - iparam[0] = 1; // Exact shifts - iparam[2] = (a_int)max_it; // Maximum number of Arnoldi iterations - iparam[3] = 1; // Block size - iparam[4] = 0; // Number of converged Ritz values - iparam[6] = sinvert ? 3 : 1; // Problem mode - - // Set problem sizes. The cast to int should always be safe because this is a local size. - n = (a_int)r_.GetSize(); - nev_ = (a_int)nev; - ncv_ = (a_int)ncv; // Allocate work arrays. - lworkl = 3 * ncv_ * ncv_ + 5 * ncv_; - workd = new PetscScalar[3 * n]; - workl = new PetscScalar[lworkl]; - rwork = new double[ncv_]; - - PetscScalar *pr_ = r_.GetArray(); - PetscScalar *pV_ = V_.GetArray(); - petsc::PetscParVector x(comm, n, PETSC_DECIDE, nullptr); - petsc::PetscParVector y(comm, n, PETSC_DECIDE, nullptr); + a_int lworkl = 3 * ncv * ncv + 5 * ncv; + auto workd = std::make_unique>(3 * n); + auto workl = std::make_unique>(lworkl); + auto rwork = std::make_unique(ncv); // Begin RCI loop. - ido = 0; + a_int ido = 0, ainfo = (a_int)info, ipntr[14] = {0}; while (true) { // Call complex problem driver. - naupd(fcomm, ido, bmat_option, n, which_option, nev_, rtol, pr_, ncv_, pV_, n, iparam, - ipntr, workd, workl, lworkl, rwork, info_); - CheckInfoAUPD(info_); + naupd(fcomm, ido, bmat_option, (a_int)n, which_option, (a_int)nev, rtol, r, (a_int)ncv, + V, (a_int)n, iparam, ipntr, workd.get(), workl.get(), lworkl, rwork.get(), ainfo); + CheckInfoAUPD(ainfo); - // We never use pre-computed B * x in workd[ipntr[2]-1]. - x.PlaceArray(&workd[ipntr[0] - 1]); - y.PlaceArray(&workd[ipntr[1] - 1]); + // We never use pre-computed B * x in workd[ipntr[2] - 1]. if (ido == 1 || ido == -1) { - ApplyOp(x, y); + ApplyOp(&workd.get()[ipntr[0] - 1], &workd.get()[ipntr[1] - 1]); } else if (ido == 2) { - ApplyOpB(x, y); + ApplyOpB(&workd.get()[ipntr[0] - 1], &workd.get()[ipntr[1] - 1]); } else if (ido == 99) { @@ -254,297 +353,133 @@ int ArpackEigenSolver::SolveInternal(petsc::PetscParVector &r_, petsc::PetscDens { MFEM_ABORT("Internal error in ARPACK RCI interface!"); } - x.ResetArray(); - y.ResetArray(); } // Print some log information. - int niter = (int)iparam[2]; - int nconv = (int)iparam[4]; + int num_it = (int)iparam[2]; + int num_conv = (int)iparam[4]; if (print > 0) { Mpi::Print(comm, "\n ARPACK {} eigensolve {} ({:d} eigenpairs); iterations {:d}\n" " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", - GetName(), (nconv >= nev_) ? "converged" : "finished", nconv, niter, - opInv->GetTotalNumMult(), opInv->GetTotalNumIter()); + GetName(), (num_conv >= nev) ? "converged" : "finished", num_conv, num_it, + opInv->NumTotalMult(), opInv->NumTotalMultIter()); } - if (nconv < nev_) + if (num_conv < nev) { Mpi::Warning( comm, "ARPACK eigenvalue solver found only {:d} of requested {:d} eigenvalues!\n", - nconv, nev_); + num_conv, nev); } // Postprocess eigenvalues and eigenvectors. a_int rvec = 1; ::arpack::howmny howmny_option = ::arpack::howmny::ritz_vectors; - a_int *select; - PetscScalar *workev; // Allocate eigenvalue storage and work arrays. - select = new a_int[ncv_]; - workev = new PetscScalar[2 * ncv_]; + auto select = std::make_unique(ncv); + auto workev = std::make_unique>(2 * ncv); // Call complex problem driver. - PetscScalar sigma_ = sigma / gamma; - neupd(fcomm, rvec, howmny_option, select, eig_, pV_, n, sigma_, workev, bmat_option, n, - which_option, nev_, rtol, pr_, ncv_, pV_, n, iparam, ipntr, workd, workl, lworkl, - rwork, info_); - CheckInfoEUPD(info_); + neupd(fcomm, rvec, howmny_option, select.get(), eig, V, (a_int)n, sigma / gamma, + workev.get(), bmat_option, (a_int)n, which_option, (a_int)nev, rtol, r, (a_int)ncv, + V, (a_int)n, iparam, ipntr, workd.get(), workl.get(), lworkl, rwork.get(), ainfo); + CheckInfoEUPD(ainfo); // Unscale and properly sort the eigenvalues. - auto CompareReal = [&eig_](const int &l, const int &r) - { return PetscRealPart(eig_[l]) < PetscRealPart(eig_[r]); }; - auto CompareImag = [&eig_](const int &l, const int &r) - { return PetscImaginaryPart(eig_[l]) < PetscImaginaryPart(eig_[r]); }; - auto CompareAbs = [&eig_](const int &l, const int &r) - { return PetscAbsScalar(eig_[l]) < PetscAbsScalar(eig_[r]); }; - for (int i = 0; i < nev_; i++) + auto CompareReal = [&eig](const int &l, const int &r) + { return eig[l].real() < eig[r].real(); }; + auto CompareImag = [&eig](const int &l, const int &r) + { return eig[l].imag() < eig[r].imag(); }; + auto CompareAbs = [&eig](const int &l, const int &r) + { return std::abs(eig[l]) < std::abs(eig[r]); }; + for (int i = 0; i < nev; i++) { - eig_[i] = eig_[i] * gamma; - perm_[i] = i; + eig[i] = eig[i] * gamma; + perm[i] = i; } if (which_option == ::arpack::which::largest_real || which_option == ::arpack::which::smallest_real) { - std::sort(perm_, perm_ + nev_, CompareReal); + std::sort(perm, perm + nev, CompareReal); } else if (which_option == ::arpack::which::largest_imaginary || which_option == ::arpack::which::smallest_imaginary) { - std::sort(perm_, perm_ + nev_, CompareImag); + std::sort(perm, perm + nev, CompareImag); } else { - std::sort(perm_, perm_ + nev_, CompareAbs); + std::sort(perm, perm + nev, CompareAbs); } - // Cleanup. - r_.RestoreArray(pr_); - V_.RestoreArray(pV_); - delete[] select; - delete[] workev; - delete[] workd; - delete[] workl; - delete[] rwork; - - return nconv; + return num_conv; } void ArpackEigenSolver::CheckParameters() const { + MFEM_VERIFY(n > 0, "Operators are not set for ARPACK eigenvalue solver!"); MFEM_VERIFY(nev > 0, "Number of requested modes is not positive!"); MFEM_VERIFY(rtol > 0.0, "Eigensolver tolerance is not positive!"); MFEM_VERIFY(opInv, "No linear solver provided for operator!"); } -void ArpackEigenSolver::CheckInfoAUPD(int info) const -{ - if (info != 0) - { - std::string msg = "ARPACK pznaupd error: "; - switch (info) - { - case 1: - msg += "Maximum number of iterations taken, all possible eigenvalues " - "have been found!"; - break; - case 2: - msg += "No longer an informational error (deprecated starting with " - "release 2 of ARPACK)!"; - break; - case 3: - msg += "No shifts could be applied during a cycle of the Implicitly " - "restarted Arnoldi iteration!"; - break; - case -1: - msg += "N must be positive!"; - break; - case -2: - msg += "NEV must be positive!"; - break; - case -3: - msg += "NCV-NEV >= 2 and less than or equal to N!"; - break; - case -4: - msg += "The maximum number of Arnoldi update iterations allowed must " - "be greater than zero!"; - break; - case -5: - msg += "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"; - break; - case -6: - msg += "BMAT must be one of 'I' or 'G'!"; - break; - case -7: - msg += "Length of private work array WORKL is not sufficient!"; - break; - case -8: - msg += "Error return from LAPACK eigenvalue calculation!"; - break; - case -9: - msg += "Starting vector is zero!"; - break; - case -10: - msg += "IPARAM(7) must be 1, 2, or 3!"; - break; - case -11: - msg += "IPARAM(7) = 1 and BMAT = 'G' are incompatible!"; - break; - case -12: - msg += "IPARAM(1) must be equal to 0 or 1!"; - break; - case -9999: - msg += "Could not build an Arnoldi factorization!"; - break; - default: - msg += "Unknown ARPACK error message!"; - break; - } - MFEM_ABORT(msg.c_str()); - } -} - -void ArpackEigenSolver::CheckInfoEUPD(int info) const -{ - if (info != 0) - { - std::string msg = "ARPACK pzneupd error: "; - switch (info) - { - case 1: - msg += "The Schur form computed by LAPACK routine csheqr could not " - "be reordered by LAPACK routine ztrsen!"; - break; - case -1: - msg += "N must be positive!"; - break; - case -2: - msg += "NEV must be positive!"; - break; - case -3: - msg += "NCV-NEV >= 2 and less than or equal to N!"; - break; - case -4: - msg += "The maximum number of Arnoldi update iterations allowed must " - "be greater than zero!"; - break; - case -5: - msg += "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"; - break; - case -6: - msg += "BMAT must be one of 'I' or 'G'!"; - break; - case -7: - msg += "Length of private work array WORKL is not sufficient!"; - break; - case -8: - msg += "Error return from LAPACK eigenvalue calculation!"; - break; - case -9: - msg += "Error return from calculation of eigenvectors!"; - break; - case -10: - msg += "IPARAM(7) must be 1, 2, or 3!"; - break; - case -11: - msg += "IPARAM(7) = 1 and BMAT = 'G' are incompatible!"; - break; - case -12: - msg += "HOWMNY = 'S' not yet implemented!"; - break; - case -13: - msg += "HOWMNY must be one of 'A' or 'P' if RVEC = true!"; - break; - case -14: - msg += "PZNAUPD did not find any eigenvalues to sufficient accuracy!"; - break; - case -15: - msg += "ZNEUPD got a different count of the number of converged Ritz " - "values than ZNAUPD got!"; - break; - default: - msg += "Unknown ARPACK error message!"; - break; - } - MFEM_ABORT(msg.c_str()); - } -} - -void ArpackEigenSolver::GetEigenvalue(int i, double &eigr, double &eigi) const +std::complex ArpackEigenSolver::GetEigenvalue(int i) const { MFEM_VERIFY(eig && i >= 0 && i < nev, "Out of range eigenpair requested (i = " << i << ", nev = " << nev << ")!"); - const int &j = perm[i]; - eigr = PetscRealPart(eig[j]); - eigi = PetscImaginaryPart(eig[j]); + const int &j = perm.get()[i]; + return eig.get()[j]; } -void ArpackEigenSolver::GetEigenvector(int i, petsc::PetscParVector &x) const +void ArpackEigenSolver::GetEigenvector(int i, ComplexVector &x) const { MFEM_VERIFY(eig && i >= 0 && i < nev, "Out of range eigenpair requested (i = " << i << ", nev = " << nev << ")!"); - const int &j = perm[i]; - const petsc::PetscParVector v = V->GetColumnRead(j); - x.Copy(v); - V->RestoreColumnRead(j, v); + MFEM_VERIFY(x.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + const int &j = perm.get()[i]; + x.Set(V.get() + j * n, n); } -void ArpackEigenSolver::GetError(int i, EigenSolverBase::ErrorType type, double &err) const +double ArpackEigenSolver::GetError(int i, EigenvalueSolver::ErrorType type) const { MFEM_VERIFY(eig && i >= 0 && i < nev, "Out of range eigenpair requested (i = " << i << ", nev = " << nev << ")!"); - const int &j = perm[i]; - if (res[j] <= 0.0) - { - const petsc::PetscParVector v = V->GetColumnRead(j); - GetResidual(eig[j], v, *r); - res[j] = r->Norml2() / v.Norml2(); - V->RestoreColumnRead(j, v); - } + const int &j = perm.get()[i]; switch (type) { case ErrorType::ABSOLUTE: - err = res[j]; - break; + return res.get()[j]; case ErrorType::RELATIVE: - err = res[j] / PetscAbsScalar(eig[j]); - break; + return res.get()[j] / std::abs(eig.get()[j]); case ErrorType::BACKWARD: - err = res[j] / GetBackwardScaling(eig[j]); - break; + return res.get()[j] / GetBackwardScaling(eig.get()[j]); } + return 0.0; } // EPS specific methods -ArpackEPSSolver::ArpackEPSSolver(int print_lvl) : ArpackEigenSolver(print_lvl) +ArpackEPSSolver::ArpackEPSSolver(MPI_Comm comm, int print) : ArpackEigenSolver(comm, print) { opK = opM = nullptr; normK = normM = 0.0; - z = nullptr; -} - -ArpackEPSSolver::~ArpackEPSSolver() -{ - delete z; } -void ArpackEPSSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { - MFEM_VERIFY(!opK || opK->GetNumRows() == K.GetNumRows(), + MFEM_VERIFY(!opK || opK->Height() == K.Height(), "Invalid modification of eigenvalue problem size!"); bool first = (opK == nullptr); opK = &K; opM = &M; if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(comm, *opK, opK->IsReal()); + normM = linalg::SpectralNorm(comm, *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normM >= 0.0, "Invalid matrix norms for EPS scaling!"); if (normK > 0 && normM > 0.0) { @@ -554,155 +489,137 @@ void ArpackEPSSolver::SetOperators(const petsc::PetscParMatrix &K, } // Set up workspace. - if (!z) - { - z = new petsc::PetscParVector(K); - } + x.SetSize(opK->Height()); + y.SetSize(opK->Height()); + z.SetSize(opK->Height()); + n = opK->Height() / 2; } int ArpackEPSSolver::Solve() { - // Check input parameters. - CheckParameters(); - MFEM_VERIFY(opK && opM, "Operators are not set for ArpackEPSSolver!"); - // Set some defaults (default maximum iterations from SLEPc). - PetscInt n = opK->GetNumRows(), N = opK->GetGlobalNumRows(); + CheckParameters(); + HYPRE_BigInt N = linalg::GlobalSize(comm, z); if (ncv > N) { - ncv = (int)N; + ncv = mfem::internal::to_int(N); } - if (max_it <= 0) + if (arpack_it <= 0) { - max_it = std::max(300, (int)(2 * N / ncv)); + arpack_it = std::max(300, mfem::internal::to_int(2 * N / ncv)); } // Initialize if user did not provide an initial space. if (!r) { + r = std::make_unique>(n); info = 0; - r = new petsc::PetscParVector(*opK); } if (!info) { - r->SetZero(); + std::fill(r.get(), r.get() + n, 0.0); } // Allocate Arnoldi basis for the problem. if (!V) { - V = new petsc::PetscDenseMatrix(opK->GetComm(), n, PETSC_DECIDE, PETSC_DECIDE, ncv, - nullptr); + V = std::make_unique>(n * ncv); } - // Cache residual norms when calculated later on. + // Allocate storage for eigenvalues and residual norms. if (!eig) { - eig = new PetscScalar[nev + 1]; - perm = new int[nev + 1]; - res = new double[nev + 1]; - } - for (int i = 0; i < nev + 1; i++) - { - res[i] = -1.0; + eig = std::make_unique>(nev + 1); + perm = std::make_unique(nev); + res = std::make_unique(nev); } // Solve the generalized eigenvalue problem. - int nconv = SolveInternal(*r, *V, eig, perm); + int num_conv = SolveInternal(n, r.get(), V.get(), eig.get(), perm.get()); + + // Compute the eigenpair residuals: || (K - λ M) x ||₂ for eigenvalue λ. + for (int i = 0; i < nev; i++) + { + const std::complex l = eig.get()[i]; + x.Set(V.get() + i * n, n); + opK->Mult(x, y); + opM->AddMult(x, y, -l); + res.get()[i] = linalg::Norml2(comm, y); + } // Reset for next solve. info = 0; - return nconv; + return num_conv; } -void ArpackEPSSolver::ApplyOp(const petsc::PetscParVector &x, - petsc::PetscParVector &y) const +void ArpackEPSSolver::ApplyOp(const std::complex *px, + std::complex *py) const { // Case 1: No spectral transformation (opInv = M⁻¹) // y = M⁻¹ K x . // Case 2: Shift-and-invert spectral transformation (opInv = (K - σ M)⁻¹) // y = (K - σ M)⁻¹ M x . + x.Set(px, n); if (!sinvert) { - opK->Mult(x, *z); - opInv->Mult(*z, y); - y.Scale(1.0 / gamma); + opK->Mult(x, z); + opInv->Mult(z, y); + y *= 1.0 / gamma; } else { - opM->Mult(x, *z); - opInv->Mult(*z, y); - y.Scale(gamma); + opM->Mult(x, z); + opInv->Mult(z, y); + y *= gamma; } - - // Debug - // Mpi::Print(" Before projection: {:e}\n", y.Norml2()); - if (opProj) { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y)); opProj->Mult(y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(comm, y)); } - - // Debug - // Mpi::Print(" After projection: {:e}\n", y.Norml2()); + y.Get(py, n); } -void ArpackEPSSolver::ApplyOpB(const petsc::PetscParVector &x, - petsc::PetscParVector &y) const +void ArpackEPSSolver::ApplyOpB(const std::complex *px, + std::complex *py) const { MFEM_VERIFY(opB, "No B operator for weighted inner product in ARPACK solve!"); - opB->Mult(x, y); - y.Scale(delta * gamma); -} - -void ArpackEPSSolver::GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const -{ - // r = (K - λ M) x for eigenvalue λ. - opM->Mult(x, r); - r.Scale(-l); - opK->MultAdd(x, r); + x.Set(px, n); + opB->Mult(x.Real(), y.Real()); + opB->Mult(x.Imag(), y.Imag()); + y *= delta * gamma; + y.Get(py, n); } -double ArpackEPSSolver::GetBackwardScaling(PetscScalar l) const +double ArpackEPSSolver::GetBackwardScaling(std::complex l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc uses ||.||∞, not the 2-norm. if (normK <= 0.0) { - normK = opK->Norm2(); + normK = linalg::SpectralNorm(comm, *opK, opK->IsReal()); } if (normM <= 0.0) { - normM = opM->Norm2(); + normM = linalg::SpectralNorm(comm, *opM, opM->IsReal()); } - return normK + PetscAbsScalar(l) * normM; + return normK + std::abs(l) * normM; } // PEP specific methods -ArpackPEPSolver::ArpackPEPSolver(int print_lvl) : ArpackEigenSolver(print_lvl) +ArpackPEPSolver::ArpackPEPSolver(MPI_Comm comm, int print) : ArpackEigenSolver(comm, print) { opK = opC = opM = nullptr; normK = normC = normM = 0.0; - x1 = x2 = y1 = y2 = z = nullptr; -} - -ArpackPEPSolver::~ArpackPEPSolver() -{ - delete x1; - delete x2; - delete y1; - delete y2; - delete z; } -void ArpackPEPSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { - MFEM_VERIFY(!opK || opK->GetNumRows() == K.GetNumRows(), + MFEM_VERIFY(!opK || opK->Height() == K.Height(), "Invalid modification of eigenvalue problem size!"); bool first = (opK == nullptr); opK = &K; @@ -710,9 +627,9 @@ void ArpackPEPSolver::SetOperators(const petsc::PetscParMatrix &K, opM = &M; if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normC = opC->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(comm, *opK, opK->IsReal()); + normC = linalg::SpectralNorm(comm, *opC, opC->IsReal()); + normM = linalg::SpectralNorm(comm, *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0, "Invalid matrix norms for PEP scaling!"); if (normK > 0 && normC > 0.0 && normM > 0.0) @@ -723,102 +640,81 @@ void ArpackPEPSolver::SetOperators(const petsc::PetscParMatrix &K, } // Set up workspace. - if (!z) - { - MPI_Comm comm = K.GetComm(); - PetscInt n = K.GetNumRows(); - delete x1; - delete x2; - delete y1; - delete y2; - delete z; - x1 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - x2 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - y1 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - y2 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - z = new petsc::PetscParVector(K); - } + x1.SetSize(opK->Height()); + x2.SetSize(opK->Height()); + y1.SetSize(opK->Height()); + y2.SetSize(opK->Height()); + z.SetSize(opK->Height()); + n = opK->Height() / 2; } int ArpackPEPSolver::Solve() { - // Check input parameters. - CheckParameters(); - MFEM_VERIFY(opK && opC && opM, "Operators are not set for ArpackPEPSolver!"); - // Set some defaults (from SLEPc ARPACK interface). The problem size is the size of the // 2x2 block linearized problem. - PetscInt n = opK->GetNumRows(), N = opK->GetGlobalNumRows(); + CheckParameters(); + HYPRE_BigInt N = linalg::GlobalSize(comm, z); if (ncv > 2 * N) { - ncv = 2 * (int)N; + ncv = mfem::internal::to_int(2 * N); } - if (max_it <= 0) + if (arpack_it <= 0) { - max_it = std::max(300, 4 * (int)(N / ncv)); + arpack_it = std::max(300, mfem::internal::to_int(4 * N / ncv)); } // Initialize if user did not provide an initial space. if (!r) { + r = std::make_unique>(n); info = 0; - r = new petsc::PetscParVector(*opK); } if (!info) { - r->SetZero(); + std::fill(r.get(), r.get() + n, 0.0); } - petsc::PetscParVector *s = new petsc::PetscParVector(opK->GetComm(), 2 * n, PETSC_DECIDE); - PetscScalar *ps = GetBlocks(*s, *x1, *x2); - x1->Copy(*r); - x2->SetZero(); // Second block initialized to zero even with initial guess - RestoreBlocks(ps, *s, *x1, *x2); + auto s = std::make_unique>(2 * n); + std::copy(r.get(), r.get() + n, s.get()); + std::fill(s.get() + n, s.get() + 2 * n, 0.0); // Allocate Arnoldi basis for original and linearized problem. if (!V) { - V = new petsc::PetscDenseMatrix(opK->GetComm(), n, PETSC_DECIDE, PETSC_DECIDE, ncv, - nullptr); + V = std::make_unique>(n * ncv); } - petsc::PetscDenseMatrix *W = new petsc::PetscDenseMatrix( - opK->GetComm(), 2 * n, PETSC_DECIDE, PETSC_DECIDE, ncv, nullptr); + auto W = std::make_unique>(2 * n * ncv); - // Cache residual norms when calculated later on. + // Allocate storage for eigenvalues and residual norms. if (!eig) { - eig = new PetscScalar[nev + 1]; - perm = new int[nev + 1]; - res = new double[nev + 1]; - } - for (int i = 0; i < nev + 1; i++) - { - res[i] = -1.0; + eig = std::make_unique>(nev + 1); + perm = std::make_unique(nev + 1); + res = std::make_unique(nev + 1); } // Solve the linearized eigenvalue problem. - int nconv = SolveInternal(*s, *W, eig, perm); + int num_conv = SolveInternal(2 * n, s.get(), W.get(), eig.get(), perm.get()); - // Eigenvector extraction from the linearized eigenproblem. + // Extract the eigenvector from the linearized problem and compute the eigenpair + // residuals: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for eigenvalue λ. for (int i = 0; i < nev; i++) { - petsc::PetscParVector w = W->GetColumn(i); - petsc::PetscParVector v = V->GetColumn(i); - ExtractEigenvector(eig[i], w, v); - W->RestoreColumn(i, w); - V->RestoreColumn(i, v); + const std::complex &l = eig.get()[i]; + ExtractEigenvector(l, W.get() + i * 2 * n, V.get() + i * n); + x1.Set(V.get() + i * n, n); + opK->Mult(x1, y1); + opC->AddMult(x1, y1, l); + opM->AddMult(x1, y1, l * l); + res.get()[i] = linalg::Norml2(comm, y1); } - // Cleanup auxiliary basis and residual vector. - delete W; - delete s; - // Reset for next solve. info = 0; - return nconv; + return num_conv; } -void ArpackPEPSolver::ApplyOp(const petsc::PetscParVector &x, - petsc::PetscParVector &y) const +void ArpackPEPSolver::ApplyOp(const std::complex *px, + std::complex *py) const { // Case 1: No spectral transformation (opInv = M⁻¹) // y = L₁⁻¹ L₀ x . @@ -827,143 +723,107 @@ void ArpackPEPSolver::ApplyOp(const petsc::PetscParVector &x, // With: // L₀ = [ -K 0 ] L₁ = [ C M ] // [ 0 M ] , [ M 0 ] . - PetscScalar *px = GetBlocks(const_cast(x), *x1, *x2); - PetscScalar *py = GetBlocks(y, *y1, *y2); + x1.Set(px, n); + x2.Set(px + n, n); if (!sinvert) { - opC->Mult(*x2, *z); - z->Scale(gamma); - opK->MultAdd(*x1, *z); - opInv->Mult(*z, *y2); - y2->Scale(-1.0 / (gamma * gamma)); + y1 = x2; if (opProj) { - opProj->Mult(*y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); + opProj->Mult(y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); } - y1->Copy(*x2); + + opK->Mult(x1, z); + opC->AddMult(x2, z, std::complex(gamma, 0.0)); + opInv->Mult(z, y2); + y2 *= -1.0 / (gamma * gamma); if (opProj) { - opProj->Mult(*y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y2)); + opProj->Mult(y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y2)); } } else { - y1->AXPBYPCZ(sigma, *x1, gamma, *x2, 0.0); // Just temporarily - opM->Mult(*y1, *z); - opC->MultAdd(*x1, *z); - z->Scale(-gamma); - opInv->Mult(*z, *y1); - - // Debug - // Mpi::Print(" Before projection: {:e}\n", y1->Norml2()); - + y2.AXPBYPCZ(sigma, x1, gamma, x2, 0.0); // Just temporarily + opM->Mult(y2, z); + opC->AddMult(x1, z, std::complex(1.0, 0.0)); + opInv->Mult(z, y1); + y1 *= -gamma; if (opProj) { - opProj->Mult(*y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); + opProj->Mult(y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); } - // Debug - // Mpi::Print(" After projection: {:e}\n", y1->Norml2()); - - y2->AXPBYPCZ(sigma / gamma, *y1, 1.0, *x1, 0.0); - - // Debug - // Mpi::Print(" Before projection: {:e}\n", y2->Norml2()); - + y2.AXPBYPCZ(sigma / gamma, y1, 1.0, x1, 0.0); if (opProj) { - opProj->Mult(*y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y2)); + opProj->Mult(y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y2)); } - - // Debug - // Mpi::Print(" After projection: {:e}\n", y2->Norml2()); } - RestoreBlocks(px, const_cast(x), *x1, *x2); - RestoreBlocks(py, y, *y1, *y2); + y1.Get(py, n); + y2.Get(py + n, n); } -void ArpackPEPSolver::ApplyOpB(const petsc::PetscParVector &x, - petsc::PetscParVector &y) const +void ArpackPEPSolver::ApplyOpB(const std::complex *px, + std::complex *py) const { MFEM_VERIFY(opB, "No B operator for weighted inner product in ARPACK solve!"); - PetscScalar *px = GetBlocks(const_cast(x), *x1, *x2); - PetscScalar *py = GetBlocks(y, *y1, *y2); - opB->Mult(*x1, *y1); - opB->Mult(*x2, *y2); - y1->Scale(delta * gamma * gamma); - y2->Scale(delta * gamma * gamma); - RestoreBlocks(px, const_cast(x), *x1, *x2); - RestoreBlocks(py, y, *y1, *y2); -} - -void ArpackPEPSolver::GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const -{ - // r = P(λ) x = (K + λ C + λ² M) x for eigenvalue λ. - opM->Mult(x, r); - r.Scale(l); - opC->MultAdd(x, r); - r.Scale(l); - opK->MultAdd(x, r); + x1.Set(px, n); + x2.Set(px + n, n); + opB->Mult(x1.Real(), y1.Real()); + opB->Mult(x1.Imag(), y1.Imag()); + opB->Mult(x2.Real(), y2.Real()); + opB->Mult(x2.Imag(), y2.Imag()); + y1 *= delta * gamma * gamma; + y2 *= delta * gamma * gamma; + y1.Get(py, n); + y2.Get(py + n, n); } -double ArpackPEPSolver::GetBackwardScaling(PetscScalar l) const +double ArpackPEPSolver::GetBackwardScaling(std::complex l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc uses ||.||∞, not the 2-norm. if (normK <= 0.0) { - normK = opK->Norm2(); + normK = linalg::SpectralNorm(comm, *opK, opK->IsReal()); } if (normC <= 0.0) { - normC = opC->Norm2(); + normC = linalg::SpectralNorm(comm, *opC, opC->IsReal()); } if (normM <= 0.0) { - normM = opM->Norm2(); + normM = linalg::SpectralNorm(comm, *opM, opM->IsReal()); } - double t = PetscAbsScalar(l); + double t = std::abs(l); return normK + t * normC + t * t * normM; } -void ArpackPEPSolver::ExtractEigenvector(PetscScalar l, petsc::PetscParVector &y, - petsc::PetscParVector &x) +void ArpackPEPSolver::ExtractEigenvector(std::complex l, + const std::complex *py, + std::complex *px) const { - // Select the most accurate x for y = [x₁; x₂] from the linearized eigenvalue problem. - PetscScalar *py = GetBlocks(y, *y1, *y2); + // Select the most accurate x for y = [x₁; x₂] from the linearized eigenvalue problem. Or, + // just take x = x₁. + x1.Set(py, n); + if (opB) { - if (opB) - { - y1->Normalize(*opB, *r); - } - else - { - y1->Normalize(); - } - x.Copy(*y1); + linalg::Normalize(comm, x1, *opB, y1); } - RestoreBlocks(py, y, *y1, *y2); -} - -PetscScalar *ArpackPEPSolver::GetBlocks(petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - PetscInt n1 = v1.GetSize(), n2 = v2.GetSize(); - MFEM_VERIFY(n1 + n2 == v.GetSize(), "Unexpected size in PEP linearization!"); - PetscScalar *pv = v.GetArray(); - v1.PlaceArray(pv); - v2.PlaceArray(pv + n1); - return pv; -} - -void ArpackPEPSolver::RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - v1.ResetArray(); - v2.ResetArray(); - v.RestoreArray(pv); + else + { + linalg::Normalize(comm, x1); + } + x1.Get(px, n); } } // namespace palace::arpack @@ -973,5 +833,3 @@ void ArpackPEPSolver::RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, #endif #endif - -#endif diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp index d959f983c..3bd992cf2 100644 --- a/palace/linalg/arpack.hpp +++ b/palace/linalg/arpack.hpp @@ -4,26 +4,20 @@ #ifndef PALACE_LINALG_ARPACK_HPP #define PALACE_LINALG_ARPACK_HPP -#if 0 // XX TODO DISABLE FEAST FOR NOW - #if defined(PALACE_WITH_ARPACK) -#include "linalg/petsc.hpp" - -#if !defined(PETSC_USE_COMPLEX) -#error "ARPACK interface requires PETSc built with complex scalars!" -#endif - -#include -#include "linalg/eigen.hpp" - -// XX TODO REMOVE PETSc DEPENDENCE +#include +#include +#include +#include "linalg/complex.hpp" +#include "linalg/eps.hpp" +#include "linalg/operator.hpp" namespace palace { +class ComplexKspSolver; class DivFreeSolver; -class KspSolver; namespace arpack { @@ -34,52 +28,55 @@ namespace arpack // used to compute interior eigenvalues. Currently only implemented for complex scalar // interface. // -class ArpackEigenSolver : public EigenSolverBase +class ArpackEigenSolver : public EigenvalueSolver { protected: + // MPI communicator for PARPACK. + MPI_Comm comm; + // Control print level for debugging. int print; // Status variable for ARPACK. int info; - // Number eigenvalues to be computed, and dimension. - int nev, ncv; + // Number eigenvalues to be computed, subspace dimension, and problem size. + int nev, ncv, n; // Relative eigenvalue error convergence tolerance for the solver. double rtol; // Maximum number of Arnoldi update iterations. - int max_it; + int arpack_it; // Specifies which part of the spectrum to search for. - ::arpack::which which_option; + EigenvalueSolver::WhichType which_type; // Variables for scaling, from Higham et al., IJNME 2008. double gamma, delta; // Parameters defining the spectral transformation. - PetscScalar sigma; + std::complex sigma; bool sinvert; // Storage for computed eigenvalues. - PetscScalar *eig; - int *perm; + std::unique_ptr> eig; + std::unique_ptr perm; // Storage for Arnoldi basis vectors. - petsc::PetscDenseMatrix *V; + std::unique_ptr> V; // Storage for computed residual norms. - mutable double *res; + std::unique_ptr res; // On input used to define optional initial guess, on output stores final residual // vector. - mutable petsc::PetscParVector *r; + std::unique_ptr> r; // Reference to linear solver used for operator action for M⁻¹ (with no spectral // transformation) or (K - σ M)⁻¹ (generalized EVP with shift-and- invert) or P(σ)⁻¹ // (polynomial with shift-and-invert) (not owned). - const KspSolver *opInv; + const ComplexKspSolver *opInv; // Reference to solver for projecting an intermediate vector onto a divergence-free space // (not owned). @@ -87,89 +84,82 @@ class ArpackEigenSolver : public EigenSolverBase // Reference to matrix used for weighted inner products (not owned). May be nullptr, in // which case identity is used. - const petsc::PetscParMatrix *opB; + const Operator *opB; // Perform the ARPACK RCI loop. - int SolveInternal(petsc::PetscParVector &r_, petsc::PetscDenseMatrix &V_, - PetscScalar *eig_, int *perm_); + int SolveInternal(int n, std::complex *r, std::complex *V, + std::complex *eig, int *perm); - // Helper routines for parameter checking. + // Helper routine for parameter checking. void CheckParameters() const; - void CheckInfoAUPD(int info) const; - void CheckInfoEUPD(int info) const; // Helper routines for ARPACK RCI. - virtual void ApplyOp(const petsc::PetscParVector &x, petsc::PetscParVector &y) const = 0; - virtual void ApplyOpB(const petsc::PetscParVector &x, petsc::PetscParVector &y) const = 0; - - // Helper routine for computing the eigenpair residual. - virtual void GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const = 0; + virtual void ApplyOp(const std::complex *px, std::complex *py) const = 0; + virtual void ApplyOpB(const std::complex *px, std::complex *py) const = 0; // Helper routine for computing the backward error. - virtual double GetBackwardScaling(PetscScalar l) const = 0; + virtual double GetBackwardScaling(std::complex l) const = 0; // Return problem type name. virtual const char *GetName() const = 0; public: - ArpackEigenSolver(int print_lvl); - ~ArpackEigenSolver() override; + ArpackEigenSolver(MPI_Comm comm, int print); // Set operators for the generalized eigenvalue problem or for the quadratic polynomial // eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) override; - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; // For the linear generalized case, the linear solver should be configured to compute the // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic // case, the linear solver should be configured to compute the action of M⁻¹ (with no // spectral transformation) or P(σ)⁻¹. - void SetLinearSolver(const KspSolver &ksp) override; + void SetLinearSolver(const ComplexKspSolver &ksp) override; - // Set the projection operator for the divergence-free constraint. - void SetProjector(const DivFreeSolver &divfree) override; + // Set the projection operator for enforcing the divergence-free constraint. + void SetDivFreeProjector(const DivFreeSolver &divfree) override; // Set optional B matrix used for weighted inner products. This must be set explicitly // even for generalized problems, otherwise the identity will be used. - void SetBMat(const petsc::PetscParMatrix &B) override; + void SetBMat(const Operator &B) override; // Get scaling factors used by the solver. double GetScalingGamma() const override { return gamma; } double GetScalingDelta() const override { return delta; } // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override; + void SetNumModes(int num_eig, int num_vec = 0) override; // Set solver tolerance. void SetTol(double tol) override; // Set maximum number of Arnoldi update iterations. - void SetMaxIter(int maxits) override; + void SetMaxIter(int max_it) override; // Set target spectrum for the eigensolver. When a spectral transformation is used, this // applies to the spectrum of the shifted operator. void SetWhichEigenpairs(WhichType type) override; // Set shift-and-invert spectral transformation. - void SetShiftInvert(double tr, double ti, bool precond = false) override; + void SetShiftInvert(std::complex s, bool precond = false) override; // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; + void SetInitialSpace(const ComplexVector &v) override; // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override = 0; // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override; + std::complex GetEigenvalue(int i) const override; // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &x) const override; + void GetEigenvector(int i, ComplexVector &x) const override; // Get the corresponding eigenpair error. - void GetError(int i, ErrorType type, double &err) const override; + double GetError(int i, ErrorType type) const override; }; // Generalized eigenvalue problem solver: K x = λ M x . @@ -177,38 +167,28 @@ class ArpackEPSSolver : public ArpackEigenSolver { private: // References to matrices defining the generalized eigenvalue problem (not owned). - const petsc::PetscParMatrix *opK, *opM; + const ComplexOperator *opK, *opM; // Operator norms for scaling. mutable double normK, normM; // Workspace vector for operator applications. - mutable petsc::PetscParVector *z; + mutable ComplexVector x, y, z; protected: - // Helper routines for ARPACK RCI interface. - void ApplyOp(const petsc::PetscParVector &x, petsc::PetscParVector &y) const override; - void ApplyOpB(const petsc::PetscParVector &x, petsc::PetscParVector &y) const override; + void ApplyOp(const std::complex *px, std::complex *py) const override; + void ApplyOpB(const std::complex *px, std::complex *py) const override; - // Helper routine for computing the eigenpair residual: r = (K - λ M) x . - void GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const override; - - // Helper routine for computing the backward error. - double GetBackwardScaling(PetscScalar l) const override; + double GetBackwardScaling(std::complex l) const override; - // Return problem type name. const char *GetName() const override { return "EPS"; } public: - ArpackEPSSolver(int print_lvl); - ~ArpackEPSSolver() override; + ArpackEPSSolver(MPI_Comm comm, int print); - // Set operators for the generalized eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) override; - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override; }; @@ -218,49 +198,32 @@ class ArpackPEPSolver : public ArpackEigenSolver private: // References to matrices defining the quadratic polynomial eigenvalue problem // (not owned). - const petsc::PetscParMatrix *opK, *opC, *opM; + const ComplexOperator *opK, *opC, *opM; // Operator norms for scaling. mutable double normK, normC, normM; // Workspace vectors for operator applications. - mutable petsc::PetscParVector *x1, *x2, *y1, *y2, *z; + mutable ComplexVector x1, x2, y1, y2, z; // Do eigenvector extraction from the linearized problem to the actual eigenvectors. - void ExtractEigenvector(PetscScalar l, petsc::PetscParVector &y, - petsc::PetscParVector &x); - - // Helper methods for splitting a block vector from the linearized problem into its into - // two parts. - PetscScalar *GetBlocks(petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; - void RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; + void ExtractEigenvector(std::complex l, const std::complex *py, + std::complex *px) const; protected: - // Helper routines for ARPACK RCI interface. - void ApplyOp(const petsc::PetscParVector &x, petsc::PetscParVector &y) const override; - void ApplyOpB(const petsc::PetscParVector &x, petsc::PetscParVector &y) const override; + void ApplyOp(const std::complex *px, std::complex *py) const override; + void ApplyOpB(const std::complex *px, std::complex *py) const override; - // Helper routine for computing the eigenpair residual: r = P(λ) x . - void GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const override; - - // Helper routine for computing the backward error. - double GetBackwardScaling(PetscScalar l) const override; + double GetBackwardScaling(std::complex l) const override; - // Return problem type name. const char *GetName() const override { return "PEP"; } public: - ArpackPEPSolver(int print_lvl); - ~ArpackPEPSolver() override; + ArpackPEPSolver(MPI_Comm comm, int print); - // Set operators for the quadratic polynomial eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override; }; @@ -270,6 +233,4 @@ class ArpackPEPSolver : public ArpackEigenSolver #endif -#endif - #endif // PALACE_LINALG_ARPACK_HPP diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 9fd975997..8be3e8891 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -83,26 +83,27 @@ void ChebyshevSmoother::Mult(const mfem::Vector &x, mfem::Vector &y) const } } -// void ChebyshevSmoother::ArrayMult(const mfem::Array &X, -// mfem::Array &Y) const -// { -// // Initialize. -// const int nrhs = X.Size(); -// const int N = height; -// mfem::Array R(nrhs), D(nrhs); -// std::vector rrefs(nrhs), drefs(nrhs); -// if (nrhs * N != r.Size()) -// { -// r.SetSize(nrhs * N); -// d.SetSize(nrhs * N); -// } -// for (int j = 0; j < nrhs; j++) -// { -// rrefs[j].MakeRef(r, j * N, N); -// drefs[j].MakeRef(d, j * N, N); -// R[j] = &rrefs[j]; -// D[j] = &drefs[j]; -// } +// XX TODO REMOVE +// void ChebyshevSmoother::ArrayMult(const mfem::Array &X, +// mfem::Array &Y) const +// { +// // Initialize. +// const int nrhs = X.Size(); +// const int N = height; +// mfem::Array R(nrhs), D(nrhs); +// std::vector rrefs(nrhs), drefs(nrhs); +// if (nrhs * N != r.Size()) +// { +// r.SetSize(nrhs * N); +// d.SetSize(nrhs * N); +// } +// for (int j = 0; j < nrhs; j++) +// { +// rrefs[j].MakeRef(r, j * N, N); +// drefs[j].MakeRef(d, j * N, N); +// R[j] = &rrefs[j]; +// D[j] = &drefs[j]; +// } // // Apply smoother: y = y + p(A) (x - A y) . // for (int it = 0; it < pc_it; it++) diff --git a/palace/linalg/complex.cpp b/palace/linalg/complex.cpp index bb7bc7b84..669b6a2ec 100644 --- a/palace/linalg/complex.cpp +++ b/palace/linalg/complex.cpp @@ -8,33 +8,40 @@ namespace palace { -ComplexVector::ComplexVector(int n) : Vector(2 * n) +ComplexVector::ComplexVector(int n) : Vector(n) { - xr_.MakeRef(*this, 0, n); - xi_.MakeRef(*this, n, n); + xr_.MakeRef(*this, 0, n / 2); + xi_.MakeRef(*this, n / 2, n / 2); } -ComplexVector::ComplexVector(const ComplexVector &x) : Vector(2 * x.Size()) +ComplexVector::ComplexVector(const ComplexVector &x) : Vector(x.Size()) { - xr_.MakeRef(*this, 0, x.Size()); - xi_.MakeRef(*this, x.Size(), x.Size()); + xr_.MakeRef(*this, 0, x.Size() / 2); + xi_.MakeRef(*this, x.Size() / 2, x.Size() / 2); Set(x.Real(), x.Imag()); } ComplexVector::ComplexVector(const Vector &xr, const Vector &xi) : Vector(2 * xr.Size()) { MFEM_VERIFY(xr.Size() == xi.Size(), - "Mismatch in dimension of real and imaginary matrix parts!"); + "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); xr_.MakeRef(*this, 0, xr.Size()); xi_.MakeRef(*this, xr.Size(), xr.Size()); Set(xr, xi); } -void ComplexVector::SetSize(int n) +ComplexVector::ComplexVector(const std::complex *px, int n) : Vector(2 * n) { - Vector::SetSize(2 * n); xr_.MakeRef(*this, 0, n); xi_.MakeRef(*this, n, n); + Set(px, n); +} + +void ComplexVector::SetSize(int n) +{ + Vector::SetSize(n); + xr_.MakeRef(*this, 0, n / 2); + xi_.MakeRef(*this, n / 2, n / 2); } ComplexVector &ComplexVector::operator=(const ComplexVector &y) @@ -45,23 +52,72 @@ ComplexVector &ComplexVector::operator=(const ComplexVector &y) void ComplexVector::Set(const Vector &yr, const Vector &yi) { - MFEM_VERIFY(yr.Size() == yi.Size() && yr.Size() == Size(), - "Mismatch in dimension of real and imaginary matrix parts!"); + MFEM_VERIFY(yr.Size() == yi.Size() && 2 * yr.Size() == Size(), + "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); Real() = yr; Imag() = yi; RestoreReal(); RestoreImag(); } +void ComplexVector::Set(const std::complex *py, int n) +{ + MFEM_VERIFY(2 * n == Size(), + "Mismatch in dimension for array of std::complex in ComplexVector!"); + Vector y(reinterpret_cast(const_cast *>(py)), 2 * n); + const int N = Size() / 2; + const auto *Y = y.Read(); + auto *XR = Real().Write(); + auto *XI = Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + XR[i] = Y[2 * i]; + XI[i] = Y[2 * i + 1]; + }); + RestoreReal(); + RestoreImag(); +} + +void ComplexVector::Get(std::complex *py, int n) const +{ + MFEM_VERIFY(2 * n == Size(), + "Mismatch in dimension for array of std::complex in ComplexVector!"); + Vector y(reinterpret_cast(py), 2 * n); + const int N = Size() / 2; + const auto *XR = Real().Read(); + const auto *XI = Imag().Read(); + auto *Y = y.Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + Y[2 * i] = XR[i]; + Y[2 * i + 1] = XI[i]; + }); + y.HostReadWrite(); +} + void ComplexVector::Conj() { Imag() *= -1.0; RestoreImag(); } +ComplexVector &ComplexVector::operator=(std::complex s) +{ + Real() = s.real(); + Imag() = s.imag(); + RestoreReal(); + RestoreImag(); +} + ComplexVector &ComplexVector::operator*=(std::complex s) { - if (s.imag() != 0.0) + if (s.imag() == 0.0) + { + *this *= s.real(); + } + else { const int N = Size() / 2; const double sr = s.real(); @@ -75,14 +131,9 @@ ComplexVector &ComplexVector::operator*=(std::complex s) XR[i] = sr * XR[i] - si * XI[i]; XI[i] = t; }); + RestoreReal(); + RestoreImag(); } - else if (s.real() != 0.0) - { - Real() *= s.real(); - Imag() *= s.real(); - } - RestoreReal(); - RestoreImag(); return *this; } diff --git a/palace/linalg/complex.hpp b/palace/linalg/complex.hpp index 683c01c5f..3f5e69fc5 100644 --- a/palace/linalg/complex.hpp +++ b/palace/linalg/complex.hpp @@ -27,7 +27,8 @@ class ComplexVector : public Vector Vector xr_, xi_; public: - // Create a vector with the given size. + // Create a vector with the given size. The provided size should be the real-valued size, + // twice the actual complex-valued size, in order to agree with ComplexOperator::Height(). ComplexVector(int n = 0); // Copy constructor. @@ -36,8 +37,14 @@ class ComplexVector : public Vector // Copy constructor from separately provided real and imaginary parts. ComplexVector(const Vector &xr, const Vector &xi); - // Set the size of the vector. See the notes for Vector::SetSize for behavior in the - // cases where n is less than or greater than Size() or Capacity(). + // Copy constructor from an array of complex values. The size provided should be the + // length of the array x, which is half the resulting real-valued vector size. + ComplexVector(const std::complex *px, int n); + + // Set the size of the vector. The provided size should be the real-valued size, twice the + // actual complex-valued size, in order to agree with ComplexOperator::Height(). See the + // notes for Vector::SetSize for behavior in the cases where n is less than or greater + // than Size() or Capacity(). void SetSize(int n); // Get const access to the real and imaginary vector parts. Assumes that these are @@ -67,9 +74,20 @@ class ComplexVector : public Vector // Copy assignment from separately provided real and imaginary parts. void Set(const Vector &yr, const Vector &yi); + // Copy assignment from an array of complex values. The size provided should be the length + // of the array x, which is half the real-valued vector size. + void Set(const std::complex *py, int n); + + // Copy the vector into an array of complex values. The size provided should be the length + // of the array y, which is half the real-valued vector size. + void Get(std::complex *py, int n) const; + // Replace entries with complex conjugate. void Conj(); + // Set all entries equal to s. + ComplexVector &operator=(std::complex s); + // Scale all entries by s. ComplexVector &operator*=(std::complex s); @@ -280,6 +298,11 @@ class ComplexParOperator : public ComplexOperator bool IsReal() const override { return A_->IsReal(); } bool IsImag() const override { return A_->IsImag(); } + const Operator &Real() const override { return A_->Real(); } + Operator &Real() override { return A_->Real(); } + const Operator &Imag() const override { return A_->Imag(); } + Operator &Imag() override { return A_->Imag(); } + using ComplexOperator::AddMult; using ComplexOperator::AddMultHermitianTranspose; using ComplexOperator::AddMultTranspose; diff --git a/palace/linalg/eigen.hpp b/palace/linalg/eps.hpp similarity index 63% rename from palace/linalg/eigen.hpp rename to palace/linalg/eps.hpp index 3cddb3343..34b0d4809 100644 --- a/palace/linalg/eigen.hpp +++ b/palace/linalg/eps.hpp @@ -1,28 +1,25 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -#ifndef PALACE_LINALG_EIGEN_HPP -#define PALACE_LINALG_EIGEN_HPP +#ifndef PALACE_LINALG_EPS_HPP +#define PALACE_LINALG_EPS_HPP + +#include +#include "linalg/operator.hpp" namespace palace { class DivFreeSolver; -class KspSolver; - -namespace petsc -{ - -class PetscParMatrix; -class PetscParVector; - -} // namespace petsc +class ComplexKspSolver; +class ComplexOperator; +class ComplexVector; // // Pure abstract base class for solving generalized linear eigenvalue problems problems or // quadratic polynomial eigenvalue problems. // -class EigenSolverBase +class EigenvalueSolver { public: enum class ScaleType @@ -51,65 +48,66 @@ class EigenSolverBase BACKWARD }; - EigenSolverBase() = default; - virtual ~EigenSolverBase() = default; +public: + EigenvalueSolver() = default; + virtual ~EigenvalueSolver() = default; // Set operators for the generalized eigenvalue problem or for the quadratic polynomial // eigenvalue problem. - virtual void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) = 0; - virtual void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) = 0; + virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) = 0; // For the linear generalized case, the linear solver should be configured to compute the // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic // case, the linear solver should be configured to compute the action of M⁻¹ (with no // spectral transformation) or P(σ)⁻¹. - virtual void SetLinearSolver(const KspSolver &ksp) = 0; + virtual void SetLinearSolver(const ComplexKspSolver &ksp) = 0; - // Set the projection operator for the divergence-free constraint. - virtual void SetProjector(const DivFreeSolver &divfree) = 0; + // Set the projection operator for enforcing the divergence-free constraint. + virtual void SetDivFreeProjector(const DivFreeSolver &divfree) = 0; + + // Set optional B matrix used for weighted inner products. This must be set explicitly + // even for generalized problems, otherwise the identity will be used. + virtual void SetBMat(const Operator &B) = 0; // Get scaling factors used by the solver. virtual double GetScalingGamma() const = 0; virtual double GetScalingDelta() const = 0; // Set the number of required eigenmodes. - virtual void SetNumModes(int numeig, int numvec = 0) = 0; + virtual void SetNumModes(int num_eig, int num_vec = 0) = 0; // Set solver tolerance. virtual void SetTol(double tol) = 0; // Set maximum number of Arnoldi update iterations. - virtual void SetMaxIter(int maxits) = 0; + virtual void SetMaxIter(int max_it) = 0; // Set target spectrum for the eigensolver. When a spectral transformation is used, this // applies to the spectrum of the shifted operator. virtual void SetWhichEigenpairs(WhichType type) = 0; // Set shift-and-invert spectral transformation. - virtual void SetShiftInvert(double tr, double ti, bool precond = false) = 0; - - // Set optional B matrix used for weighted inner products. This must be set explicitly - // even for generalized problems, otherwise the identity will be used. - virtual void SetBMat(const petsc::PetscParMatrix &B) = 0; + virtual void SetShiftInvert(std::complex s, bool precond = false) = 0; // Set an initial vector for the solution subspace. - virtual void SetInitialSpace(const petsc::PetscParVector &v) = 0; + virtual void SetInitialSpace(const ComplexVector &v) = 0; // Solve the eigenvalue problem. Returns the number of converged eigenvalues. virtual int Solve() = 0; // Get the corresponding eigenvalue. - virtual void GetEigenvalue(int i, double &eigr, double &eigi) const = 0; + virtual std::complex GetEigenvalue(int i) const = 0; // Get the corresponding eigenvector. - virtual void GetEigenvector(int i, petsc::PetscParVector &x) const = 0; + virtual void GetEigenvector(int i, ComplexVector &x) const = 0; // Get the corresponding eigenpair error. - virtual void GetError(int i, ErrorType type, double &err) const = 0; + virtual double GetError(int i, ErrorType type) const = 0; }; } // namespace palace -#endif // PALACE_LINALG_EIGEN_HPP +#endif // PALACE_LINALG_EPS_HPP diff --git a/palace/linalg/feast.hpp b/palace/linalg/feast.hpp index 813bbba66..7d63c082d 100644 --- a/palace/linalg/feast.hpp +++ b/palace/linalg/feast.hpp @@ -15,7 +15,7 @@ #endif #include -#include "linalg/eigen.hpp" +#include "linalg/eps.hpp" // Forward declarations of SLEPc objects. typedef struct _p_RG *RG; diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index 77152c205..198959941 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -161,13 +161,10 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, break; #endif case config::LinearSolverData::Type::STRUMPACK_MP: -#if defined(MFEM_USE_STRUMPACK) && \ - (STRUMPACK_VERSION_MAJOR >= 6 && STRUMPACK_VERSION_MINOR >= 3 && \ - STRUMPACK_VERSION_PATCH > 1) +#if defined(MFEM_USE_STRUMPACK) pc = std::make_unique(comm, iodata, print); #else - MFEM_ABORT("Solver was not built with STRUMPACK support or uses STRUMPACK older than " - "6.3.1 which does not include mixed-precision support, please choose a " + MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a " "different solver!"); #endif break; diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index 280af7af1..ca14f69d5 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -5,6 +5,8 @@ #include #include "linalg/complex.hpp" +#include "linalg/slepc.hpp" +#include "linalg/vector.hpp" #include "utils/communication.hpp" namespace palace @@ -382,6 +384,11 @@ double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm, double t int max_it) { // XX TODO: Use ARPACK or SLEPc for this when configured. + // #if defined(PALACE_WITH_SLEPC) + + double slepc_l = slepc::GetMaxSingularValue(comm, A, herm, tol, max_it); + + // #else // Power iteration loop: ||A||₂² = λₙ(Aᴴ A). int it = 0; double res = 0.0; @@ -419,7 +426,13 @@ double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm, double t "lambda = {:.3e}!\n", it, res, l); } + + // XX TODO DEBUG + Mpi::Print(comm, "\nSPECTRAL NORM...Power iteration: {}, SLEPc: {}\n\n", + herm ? l : std::sqrt(l), slepc_l); + return herm ? l : std::sqrt(l); + // #endif } } // namespace linalg diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index c6f35ece6..227a70cb8 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -8,7 +8,6 @@ #include #include #include -#include "linalg/vector.hpp" namespace palace { @@ -16,6 +15,7 @@ namespace palace class ComplexOperator; using Operator = mfem::Operator; +using Vector = mfem::Vector; // // Derived operator classes extending mfem::Operator from MFEM. diff --git a/palace/linalg/petsc.cpp b/palace/linalg/petsc.cpp index d4596a9fb..2dc08d0ba 100644 --- a/palace/linalg/petsc.cpp +++ b/palace/linalg/petsc.cpp @@ -589,8 +589,6 @@ PetscParMatrix::PetscParMatrix(Mat B, bool ref) PetscParMatrix::~PetscParMatrix() { - MPI_Comm comm; - PalacePetscCall(PetscObjectGetComm(reinterpret_cast(A), &comm)); PalacePetscCall(MatDestroy(&A)); } diff --git a/palace/linalg/petsc.hpp b/palace/linalg/petsc.hpp index 5aaf29db8..82e2052f3 100644 --- a/palace/linalg/petsc.hpp +++ b/palace/linalg/petsc.hpp @@ -4,16 +4,18 @@ #ifndef PALACE_LINALG_PETSC_HPP #define PALACE_LINALG_PETSC_HPP +#if defined(PALACE_WITH_SLEPC) + #include #include +#if !defined(PETSC_USE_REAL_DOUBLE) +#error "PETSc should be compiled with double precision!" +#endif #if defined(PETSC_HAVE_HYPRE) #error \ "PETSc should be built without Hypre to avoid conflicts with MFEM's Hypre dependency!" #endif -#if !defined(PETSC_USE_REAL_DOUBLE) -#error "PETSc should be compiled with double precision!" -#endif #if defined(PETSC_USE_64BIT_INDICES) && !(defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT)) #warning "Mismatch between big HYPRE (32bit) and PETSc (64bit) integer types!" #endif @@ -21,592 +23,598 @@ #warning "Mismatch between big HYPRE (64bit) and PETSc (32bit) integer types!" #endif -#include -#include - // Forward declarations of PETSc objects. -typedef struct _p_PetscSF *VecScatter; typedef struct _p_Vec *Vec; typedef struct _p_Mat *Mat; -typedef struct _p_KSP *KSP; -typedef struct _p_PC *PC; // Error handling similar to Petsc's PetscCallAbort but always aborts on the global // PETSC_COMM_WORLD communicator. #define PalacePetscCall(...) PetscCallAbort(PETSC_COMM_WORLD, __VA_ARGS__) -namespace palace::petsc -{ - -// -// A minimal implementation of MFEM's PETSc wrappers to support PETSc built with complex -// numbers. -// - -class PetscParMatrix; -class PetscParVector; - -// Wrappers for PetscInitialize/PetscFinalize. -void Initialize(int &argc, char **&argv, const char rc_file[], const char help[]); -void Finalize(); - -// Wrapper for PETSc's vector scatter class. -class PetscScatter -{ -public: - enum class Type - { - TO_ZERO, - TO_ALL - }; - -private: - // The actual PETSc object. - VecScatter ctx; - -public: - // Creates a scatter context that copies all entries from the parallel vector to either - // all processes or to the root process. Allocates the - PetscScatter(Type type, const PetscParVector &x, std::unique_ptr &y); - - // Calls PETSc's destroy function. - ~PetscScatter(); - - // Routines for forward/reverse scattering. - void Forward(const PetscParVector &x, PetscParVector &y); - void Reverse(const PetscParVector &x, PetscParVector &y); -}; - -// Wrapper for PETSc's vector class. -class PetscParVector -{ -private: - // The actual PETSc object. - Vec x; - -public: - // Creates vector compatible with (i.e. in the domain of) A or Aᵀ. - PetscParVector(const PetscParMatrix &A, bool transpose = false); - - // Parallel and serial copy constructors from MFEM's Vector object. - PetscParVector(MPI_Comm comm, const mfem::Vector &y); - PetscParVector(const mfem::Vector &y); -#if defined(PETSC_USE_COMPLEX) - PetscParVector(MPI_Comm comm, const mfem::Vector &yr, const mfem::Vector &yi); - PetscParVector(const mfem::Vector &yr, const mfem::Vector &yi); -#endif - - // Create a parallel or sequential PETSc vector with the provided dimension. - PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N); - // PetscParVector(PetscInt n); - - // Create a parallel or sequential PETSc vector with a data array. - PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N, PetscScalar *data); - PetscParVector(PetscInt n, PetscScalar *data); - - // Copy constructor, calls VecDuplicate. - PetscParVector(const PetscParVector &y); - - // Constructor which wraps an existing PETSc Vec object and takes over ownership unless - // ref is true. - PetscParVector(Vec y, bool ref); - - // Calls PETSc's destroy function. - virtual ~PetscParVector(); - - // Copy to/from MFEM's Vector type. - void GetToVector(mfem::Vector &v, PetscInt start = -1, PetscInt end = -1) const; - void SetFromVector(const mfem::Vector &v); - void AddFromVector(const mfem::Vector &v); -#if defined(PETSC_USE_COMPLEX) - void GetToVectors(mfem::Vector &vr, mfem::Vector &vi, PetscInt start = -1, - PetscInt end = -1) const; - void SetFromVectors(const mfem::Vector &vr, const mfem::Vector &vi); - void AddFromVectors(const mfem::Vector &vr, const mfem::Vector &vi); -#endif - - // Access the data array of the vector. - PetscScalar *GetArray(); - const PetscScalar *GetArrayRead() const; - void RestoreArray(PetscScalar *data); - void RestoreArrayRead(const PetscScalar *data) const; - - // Temporarily replace the data array of the vector. - void PlaceArray(const PetscScalar *data); - void ResetArray(); - - // Copy entries of y to x. - void Copy(const PetscParVector &y); - - // Returns the local vector size. - PetscInt GetSize() const; - - // Returns the global vector size. - PetscInt GetGlobalSize() const; - - // Set the (local) vector dimension to n, copying previous contents to the upper block. - void Resize(PetscInt n, bool copy = false); - - // Zero all entries of the vector. - void SetZero(); - - // Sets all entries of the vector to random numbers sampled from the range [-1-i, 1+i], or - // [-1, 1]. - void SetRandom(); -#if defined(PETSC_USE_COMPLEX) - void SetRandomReal(); -#else - void SetRandomReal() { SetRandom(); } -#endif - void SetRandomSign(bool init = false); - - // Set all entries to s. - PetscParVector &operator=(PetscScalar s); - - // Scale all entries by s. - void Scale(PetscScalar s); +// namespace palace::petsc +// { + +// // +// // A minimal implementation of MFEM's PETSc wrappers to support PETSc built with complex +// // numbers. +// // + +// class PetscParMatrix; +// class PetscParVector; + +// // Wrappers for PetscInitialize/PetscFinalize. +// void Initialize(int &argc, char **&argv, const char rc_file[], const char help[]); +// void Finalize(); + +// // Wrapper for PETSc's vector scatter class. +// class PetscScatter +// { +// public: +// enum class Type +// { +// TO_ZERO, +// TO_ALL +// }; + +// private: +// // The actual PETSc object. +// VecScatter ctx; + +// public: +// // Creates a scatter context that copies all entries from the parallel vector to either +// // all processes or to the root process. Allocates the +// PetscScatter(Type type, const PetscParVector &x, std::unique_ptr &y); + +// // Calls PETSc's destroy function. +// ~PetscScatter(); + +// // Routines for forward/reverse scattering. +// void Forward(const PetscParVector &x, PetscParVector &y); +// void Reverse(const PetscParVector &x, PetscParVector &y); +// }; + +// // Wrapper for PETSc's vector class. +// class PetscParVector +// { +// private: +// // The actual PETSc object. +// Vec x; + +// public: +// // Creates vector compatible with (i.e. in the domain of) A or Aᵀ. +// PetscParVector(const PetscParMatrix &A, bool transpose = false); + +// // Parallel and serial copy constructors from MFEM's Vector object. +// PetscParVector(MPI_Comm comm, const mfem::Vector &y); +// PetscParVector(const mfem::Vector &y); +// #if defined(PETSC_USE_COMPLEX) +// PetscParVector(MPI_Comm comm, const mfem::Vector &yr, const mfem::Vector &yi); +// PetscParVector(const mfem::Vector &yr, const mfem::Vector &yi); +// #endif + +// // Create a parallel or sequential PETSc vector with the provided dimension. +// PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N); +// // PetscParVector(PetscInt n); + +// // Create a parallel or sequential PETSc vector with a data array. +// PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N, PetscScalar *data); +// PetscParVector(PetscInt n, PetscScalar *data); + +// // Copy constructor, calls VecDuplicate. +// PetscParVector(const PetscParVector &y); + +// // Constructor which wraps an existing PETSc Vec object and takes over ownership unless +// // ref is true. +// PetscParVector(Vec y, bool ref); + +// // Calls PETSc's destroy function. +// virtual ~PetscParVector(); + +// // Copy to/from MFEM's Vector type. +// void GetToVector(mfem::Vector &v, PetscInt start = -1, PetscInt end = -1) const; +// void SetFromVector(const mfem::Vector &v); +// void AddFromVector(const mfem::Vector &v); +// #if defined(PETSC_USE_COMPLEX) +// void GetToVectors(mfem::Vector &vr, mfem::Vector &vi, PetscInt start = -1, +// PetscInt end = -1) const; +// void SetFromVectors(const mfem::Vector &vr, const mfem::Vector &vi); +// void AddFromVectors(const mfem::Vector &vr, const mfem::Vector &vi); +// #endif + +// // Access the data array of the vector. +// PetscScalar *GetArray(); +// const PetscScalar *GetArrayRead() const; +// void RestoreArray(PetscScalar *data); +// void RestoreArrayRead(const PetscScalar *data) const; + +// // Temporarily replace the data array of the vector. +// void PlaceArray(const PetscScalar *data); +// void ResetArray(); + +// // Copy entries of y to x. +// void Copy(const PetscParVector &y); + +// // Returns the local vector size. +// PetscInt GetSize() const; + +// // Returns the global vector size. +// PetscInt GetGlobalSize() const; + +// // Set the (local) vector dimension to n, copying previous contents to the upper block. +// void Resize(PetscInt n, bool copy = false); + +// // Zero all entries of the vector. +// void SetZero(); + +// // Sets all entries of the vector to random numbers sampled from the range [-1-i, 1+i], +// or +// // [-1, 1]. +// void SetRandom(); +// #if defined(PETSC_USE_COMPLEX) +// void SetRandomReal(); +// #else +// void SetRandomReal() { SetRandom(); } +// #endif +// void SetRandomSign(bool init = false); + +// // Set all entries to s. +// PetscParVector &operator=(PetscScalar s); + +// // Scale all entries by s. +// void Scale(PetscScalar s); + +// // Shift all entries by +s. +// void Shift(PetscScalar s); + +// // Compute pointwise |x|. +// void Abs(); + +// // Compute pointwise sqrt(|x|). +// void SqrtAbs(); + +// // Compute pointwise 1/x. +// void Inv(); + +// // Compute pointwise 1/sqrt(x). +// void InvSqrt(); + +// #if defined(PETSC_USE_COMPLEX) +// // Replace entries with complex conjugate. +// void Conj(); + +// // Zero the imaginary part of the vector. +// void GetRealPart(); + +// // Move the imaginary part to the real part of the vector. +// void GetImagPart(); +// #endif + +// // Normalize the vector. +// PetscReal Normalize(); +// PetscReal Normalize(const PetscParMatrix &B, PetscParVector &Bv); + +// // Calculate the vector 2-norm. +// PetscReal Norml2() const; + +// // Calculate the vector infinity-norm. +// PetscReal Normlinf() const; + +// // Zero specified (local) rows of the vector. +// void ZeroRows(const mfem::Array &rows); + +// // Pointwise multiplication x *= y. +// void PointwiseMult(const PetscParVector &y, bool replace_zeros); + +// // In-place addition x += alpha * y. +// void AXPY(PetscScalar alpha, const PetscParVector &y); + +// // In-place addition x = alpha * y + beta * x. +// void AXPBY(PetscScalar alpha, const PetscParVector &y, PetscScalar beta); + +// // In-place addition x = alpha * y + beta * z + gamma * x. +// void AXPBYPCZ(PetscScalar alpha, const PetscParVector &y, PetscScalar beta, +// const PetscParVector &z, PetscScalar gamma); + +// // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. +// PetscScalar Dot(const PetscParVector &y) const; +// PetscScalar TransposeDot(const PetscParVector &y) const; + +// // Prints the vector (to stdout if fname is nullptr). +// void Print(const char *fname = nullptr, bool binary = false) const; + +// // Get the associated MPI communicator. +// MPI_Comm GetComm() const; + +// // Typecasting to PETSc's Vec type. +// operator Vec() const { return x; } + +// // Typecasting to PETSc object. +// operator PetscObject() const { return reinterpret_cast(x); } +// }; + +// // Base wrapper for PETSc's matrix class. +// class PetscParMatrix +// { +// public: +// enum class NNZStructure +// { +// DIFFERENT, +// SAME, +// SUBSET +// }; + +// #if defined(PETSC_USE_COMPLEX) +// enum class ExtractStructure +// { +// REAL, +// IMAGINARY, +// SUM +// }; +// #endif + +// protected: +// // The actual PETSc object. +// Mat A; + +// // Default constructor for derived classes. +// PetscParMatrix() : A(nullptr) {} + +// public: +// // Copy constructor, calls MatDuplicate. +// PetscParMatrix(const PetscParMatrix &B); + +// // Constructor which wraps an existing PETSc Mat object and takes over ownership unless +// // ref is true. +// PetscParMatrix(Mat B, bool ref); + +// // Calls PETSc's destroy function. +// virtual ~PetscParMatrix(); + +// // Get/set symmetric or Hermitian flags for the matrix. When setting the flags, it is +// // assumed the structure does not change for the lifetime of the matrix(unless +// explicitly +// // set again). +// void SetSymmetric(bool sym = true); +// void SetHermitian(bool herm = true); +// bool GetSymmetric() const; +// bool GetHermitian() const; +// #if defined(PETSC_USE_COMPLEX) +// void SetRealSymmetric(); +// #endif +// void CopySymmetry(const PetscParMatrix &B); + +// // Returns the local number of rows. +// PetscInt GetNumRows() const; +// PetscInt Height() const { return GetNumRows(); } + +// // Returns the local number of columns. +// PetscInt GetNumCols() const; +// PetscInt Width() const { return GetNumCols(); } + +// // Returns the global number of rows. +// PetscInt GetGlobalNumRows() const; + +// // Returns the global number of columns. +// PetscInt GetGlobalNumCols() const; + +// // Returns the number of nonzeros. +// virtual PetscInt NNZ() const; +// #if defined(PETSC_USE_COMPLEX) +// virtual PetscInt NNZReal() const +// { +// MFEM_ABORT("NNZReal is not supported for base class PetscParMatrix!"); +// return 0; +// } +// virtual PetscInt NNZImag() const +// { +// MFEM_ABORT("NNZImag is not supported for base class PetscParMatrix!"); +// return 0; +// } +// #endif + +// // Calculate matrix Frobenius and infinity norms. +// PetscReal NormF() const; +// PetscReal NormInf() const; +// #if defined(PETSC_USE_COMPLEX) +// virtual PetscReal NormFReal() const +// { +// MFEM_ABORT("NormFReal is not supported for base class PetscParMatrix!"); +// return 0.0; +// } +// virtual PetscReal NormFImag() const +// { +// MFEM_ABORT("NormFImag is not supported for base class PetscParMatrix!"); +// return 0.0; +// } +// virtual PetscReal NormInfReal() const +// { +// MFEM_ABORT("NormInfReal is not supported for base class PetscParMatrix!"); +// return 0.0; +// } +// virtual PetscReal NormInfImag() const +// { +// MFEM_ABORT("NormInfImag is not supported for base class PetscParMatrix!"); +// return 0.0; +// } +// #endif + +// // Estimate matrix 2-norm (spectral norm) using power iteration. +// PetscReal Norm2(PetscReal tol = PETSC_DEFAULT, PetscInt maxits = PETSC_DEFAULT) const; + +// // Scale all entries by s. +// void Scale(PetscScalar s); + +// #if defined(PETSC_USE_COMPLEX) +// // Replace entries with complex conjugate. +// void Conj(); + +// // Zero the imaginary part of the matrix. +// void GetRealPart(); + +// // Move the imaginary part to the real part of the matrix. +// void GetImagPart(); +// #endif + +// // In-place addition A += alpha * B. +// void AXPY(PetscScalar alpha, const PetscParMatrix &B, NNZStructure struc); + +// // Matrix-vector multiplication. +// void Mult(const PetscParVector &x, PetscParVector &y) const; +// void MultAdd(const PetscParVector &x, PetscParVector &y) const; +// void MultTranspose(const PetscParVector &x, PetscParVector &y) const; +// void MultTransposeAdd(const PetscParVector &x, PetscParVector &y) const; +// void MultHermitianTranspose(const PetscParVector &x, PetscParVector &y) const; +// void MultHermitianTransposeAdd(const PetscParVector &x, PetscParVector &y) const; + +// #if defined(PETSC_USE_COMPLEX) +// // Multiplication with a real-valued vector. +// virtual void Mult(const mfem::Vector &x, PetscParVector &y) const; +// virtual void MultTranspose(const mfem::Vector &x, PetscParVector &y) const; +// virtual void MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const; +// #endif + +// // Prints the matrix (to stdout if fname is nullptr). +// virtual void Print(const char *fname = nullptr, bool binary = false) const; +// #if defined(PETSC_USE_COMPLEX) +// virtual void PrintReal(const char *fname) const +// { +// MFEM_ABORT("PrintReal is not supported for base class PetscParMatrix!"); +// } +// virtual void PrintImag(const char *fname) const +// { +// MFEM_ABORT("PrintImag is not supported for base class PetscParMatrix!"); +// } +// #endif + +// // Returns a (real) MFEM Operator from the underlying shell matrix data. When complex +// // scalars are used, the parameter controls which part of the matrix to extract. +// #if defined(PETSC_USE_COMPLEX) +// virtual const mfem::Operator *GetOperator(ExtractStructure struc) const +// #else +// virtual const mfem::Operator *GetOperator() const +// #endif +// { +// MFEM_ABORT("GetOperator is not supported for base class PetscParMatrix!"); +// return nullptr; +// } + +// // Test whether or not a shell matrix has a real or imaginary parts. +// #if defined(PETSC_USE_COMPLEX) +// virtual bool HasReal() const +// { +// MFEM_ABORT("HasReal is not supported for base class PetscParMatrix!"); +// return false; +// } +// virtual bool HasImag() const +// { +// MFEM_ABORT("HasImag is not supported for base class PetscParMatrix!"); +// return false; +// } +// #endif + +// // Constructs a (real) HypreParMatrix from the PETSc matrix data. When complex scalars +// // are used, the parameter controls which part of the matrix to extract. +// #if defined(PETSC_USE_COMPLEX) +// virtual std::unique_ptr +// GetHypreParMatrix(ExtractStructure struc) const; +// #else +// virtual std::unique_ptr GetHypreParMatrix() const; +// #endif + +// // Create a submatrix on the same number of processors as the original matrix, +// // corresponding to the provided rows and columns which are the selected(local) +// indices. virtual std::unique_ptr GetSubMatrix(const mfem::Array +// &rows, +// const mfem::Array &cols); + +// // Create a sequential gathered matrix corresponding to the parallel matrix. All +// processes +// // on the original communicator must call this function, but if the argument is false, +// no +// // matrix is created (returned pointer is nullptr). +// virtual std::unique_ptr GetSequentialMatrix(bool create); + +// // Get the associated MPI communicator. +// MPI_Comm GetComm() const; + +// // Typecasting to PETSc's Mat type. +// operator Mat() const { return A; } + +// // Typecasting to PETSc object. +// operator PetscObject() const { return reinterpret_cast(A); } +// }; + +// // Context data for PETSc shell matrices. These store complex matrices as +// // Ar + i Ai and perform matrix-vector products. +// struct PetscMatShellCtx +// { +// std::unique_ptr Ar; +// mfem::Vector x, y; +// #if defined(PETSC_USE_COMPLEX) +// std::unique_ptr Ai; +// #endif +// }; + +// // Wrapper for PETSc's MATSHELL matrix class. +// class PetscShellMatrix : public PetscParMatrix +// { +// private: +// // Returns the shell matrix context. +// PetscMatShellCtx *GetContext() const; + +// public: +// // Create a PETSc shell matrix wrapping an MFEM Operator. Ownership of the operator is +// // transfered to the PETSc shell. When PETSc is compiled with complex numbers support, +// // the shell matrix wraps the real and imaginary parts to act on complex PETSc Vec +// // objects. +// PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&B); +// #if defined(PETSC_USE_COMPLEX) +// PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&Br, +// std::unique_ptr &&Bi); +// #endif + +// // Returns the number of nonzeros. +// PetscInt NNZ() const override; +// #if defined(PETSC_USE_COMPLEX) +// PetscInt NNZReal() const override; +// PetscInt NNZImag() const override; +// #endif + +// // Calculate matrix Frobenius and infinity norms. +// #if defined(PETSC_USE_COMPLEX) +// PetscReal NormFReal() const override; +// PetscReal NormFImag() const override; +// PetscReal NormInfReal() const override; +// PetscReal NormInfImag() const override; +// #endif + +// #if defined(PETSC_USE_COMPLEX) +// // Multiplication with a real-valued vector. +// void Mult(const mfem::Vector &x, PetscParVector &y) const override; +// void MultTranspose(const mfem::Vector &x, PetscParVector &y) const override; +// void MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const override; +// #endif + +// // Prints the locally owned matrix rows in parallel. +// void Print(const char *fname = nullptr, bool binary = false) const override; +// #if defined(PETSC_USE_COMPLEX) +// void PrintReal(const char *fname) const override; +// void PrintImag(const char *fname) const override; +// #endif + +// // Test whether or not a shell matrix has a real or imaginary parts. +// #if defined(PETSC_USE_COMPLEX) +// bool HasReal() const override; +// bool HasImag() const override; +// #endif + +// // Returns a (real) MFEM Operator from the underlying shell matrix data. When complex +// // scalars are used, the parameter controls which part of the matrix to extract. +// #if defined(PETSC_USE_COMPLEX) +// const mfem::Operator *GetOperator(ExtractStructure struc) const override; +// #else +// const mfem::Operator *GetOperator() const override; +// #endif + +// // These methods are not supported for MATSHELL. +// #if defined(PETSC_USE_COMPLEX) +// std::unique_ptr +// GetHypreParMatrix(ExtractStructure struc) const override +// #else +// std::unique_ptr GetHypreParMatrix() const override +// #endif +// { +// MFEM_ABORT("GetHypreParMatrix is not supported for PetscShellMatrix!"); +// return {}; +// } +// std::unique_ptr GetSubMatrix(const mfem::Array &, +// const mfem::Array &) override +// { +// MFEM_ABORT("GetSubMatrix is not supported for PetscShellMatrix!"); +// return {}; +// } +// std::unique_ptr GetSequentialMatrix(bool) override +// { +// MFEM_ABORT("GetSequentialMatrix is not supported for PetscShellMatrix!"); +// return {}; +// } +// }; + +// // Wrapper for PETSc's MATIJ matrix class. +// class PetscAijMatrix : public PetscParMatrix +// { +// public: +// // Create a PETSc matrix explicitly converted from an MFEM Operator. +// PetscAijMatrix(const mfem::Operator &B); +// #if defined(PETSC_USE_COMPLEX) +// PetscAijMatrix(const mfem::Operator &Br, const mfem::Operator &Bi); +// #endif +// }; + +// // Wrapper for PETSc's MATDENSE matrix class. +// class PetscDenseMatrix : public PetscParMatrix +// { +// private: +// // Helper method for column orthonormalization. +// PetscReal OrthonormalizeColumnInternal( +// PetscInt j, bool mgs, bool cgs2, +// const std::function &Dot, +// const std::function +// &VecDot, +// const std::function &Normalize); + +// public: +// // Create a parallel or sequential PETSc dense matrix. Option to specify an existing +// data +// // array. +// PetscDenseMatrix(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, +// PetscScalar *data); +// PetscDenseMatrix(PetscInt m, PetscInt n, PetscScalar *data); + +// // Set the (local) matrix dimensions to m x n, copying previous contents to the +// upper-left +// // block. +// void Resize(PetscInt m, PetscInt n, bool copy = false); + +// // Access methods for columns of the dense matrix. +// PetscParVector GetColumn(PetscInt j); +// const PetscParVector GetColumnRead(PetscInt j) const; +// void RestoreColumn(PetscInt j, PetscParVector &v); +// void RestoreColumnRead(PetscInt j, const PetscParVector &v) const; + +// // Access the data array of the dense matrix. +// PetscScalar *GetArray(); +// const PetscScalar *GetArrayRead() const; +// void RestoreArray(PetscScalar *data); +// void RestoreArrayRead(const PetscScalar *data) const; + +// // Sets all entries of the matrix to random numbers sampled from the range [-1-i, 1+i], +// or +// // [-1, 1]. +// void SetRandom(PetscInt start = -1, PetscInt end = -1); +// #if defined(PETSC_USE_COMPLEX) +// void SetRandomReal(PetscInt start = -1, PetscInt end = -1); +// #else +// void SetRandomReal(PetscInt start = -1, PetscInt end = -1) { SetRandom(start, end); } +// #endif +// void SetRandomSign(PetscInt start = -1, PetscInt end = -1, bool init = false); + +// // Orthonormalize column j of the matrix against the preceeding columns, using +// classical +// // or modified Gram-Schmidt. +// PetscReal OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2); +// PetscReal OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2, const PetscParMatrix +// &B, +// PetscParVector &Bv); + +// // Dense matrix-matrix multiplication. +// void MatMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; +// void MatMultTranspose(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; +// void MatTransposeMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; +// }; + +// } // namespace palace::petsc - // Shift all entries by +s. - void Shift(PetscScalar s); - - // Compute pointwise |x|. - void Abs(); - - // Compute pointwise sqrt(|x|). - void SqrtAbs(); - - // Compute pointwise 1/x. - void Inv(); - - // Compute pointwise 1/sqrt(x). - void InvSqrt(); - -#if defined(PETSC_USE_COMPLEX) - // Replace entries with complex conjugate. - void Conj(); - - // Zero the imaginary part of the vector. - void GetRealPart(); - - // Move the imaginary part to the real part of the vector. - void GetImagPart(); #endif - // Normalize the vector. - PetscReal Normalize(); - PetscReal Normalize(const PetscParMatrix &B, PetscParVector &Bv); - - // Calculate the vector 2-norm. - PetscReal Norml2() const; - - // Calculate the vector infinity-norm. - PetscReal Normlinf() const; - - // Zero specified (local) rows of the vector. - void ZeroRows(const mfem::Array &rows); - - // Pointwise multiplication x *= y. - void PointwiseMult(const PetscParVector &y, bool replace_zeros); - - // In-place addition x += alpha * y. - void AXPY(PetscScalar alpha, const PetscParVector &y); - - // In-place addition x = alpha * y + beta * x. - void AXPBY(PetscScalar alpha, const PetscParVector &y, PetscScalar beta); - - // In-place addition x = alpha * y + beta * z + gamma * x. - void AXPBYPCZ(PetscScalar alpha, const PetscParVector &y, PetscScalar beta, - const PetscParVector &z, PetscScalar gamma); - - // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. - PetscScalar Dot(const PetscParVector &y) const; - PetscScalar TransposeDot(const PetscParVector &y) const; - - // Prints the vector (to stdout if fname is nullptr). - void Print(const char *fname = nullptr, bool binary = false) const; - - // Get the associated MPI communicator. - MPI_Comm GetComm() const; - - // Typecasting to PETSc's Vec type. - operator Vec() const { return x; } - - // Typecasting to PETSc object. - operator PetscObject() const { return reinterpret_cast(x); } -}; - -// Base wrapper for PETSc's matrix class. -class PetscParMatrix -{ -public: - enum class NNZStructure - { - DIFFERENT, - SAME, - SUBSET - }; - -#if defined(PETSC_USE_COMPLEX) - enum class ExtractStructure - { - REAL, - IMAGINARY, - SUM - }; -#endif - -protected: - // The actual PETSc object. - Mat A; - - // Default constructor for derived classes. - PetscParMatrix() : A(nullptr) {} - -public: - // Copy constructor, calls MatDuplicate. - PetscParMatrix(const PetscParMatrix &B); - - // Constructor which wraps an existing PETSc Mat object and takes over ownership unless - // ref is true. - PetscParMatrix(Mat B, bool ref); - - // Calls PETSc's destroy function. - virtual ~PetscParMatrix(); - - // Get/set symmetric or Hermitian flags for the matrix. When setting the flags, it is - // assumed the structure does not change for the lifetime of the matrix(unless explicitly - // set again). - void SetSymmetric(bool sym = true); - void SetHermitian(bool herm = true); - bool GetSymmetric() const; - bool GetHermitian() const; -#if defined(PETSC_USE_COMPLEX) - void SetRealSymmetric(); -#endif - void CopySymmetry(const PetscParMatrix &B); - - // Returns the local number of rows. - PetscInt GetNumRows() const; - PetscInt Height() const { return GetNumRows(); } - - // Returns the local number of columns. - PetscInt GetNumCols() const; - PetscInt Width() const { return GetNumCols(); } - - // Returns the global number of rows. - PetscInt GetGlobalNumRows() const; - - // Returns the global number of columns. - PetscInt GetGlobalNumCols() const; - - // Returns the number of nonzeros. - virtual PetscInt NNZ() const; -#if defined(PETSC_USE_COMPLEX) - virtual PetscInt NNZReal() const - { - MFEM_ABORT("NNZReal is not supported for base class PetscParMatrix!"); - return 0; - } - virtual PetscInt NNZImag() const - { - MFEM_ABORT("NNZImag is not supported for base class PetscParMatrix!"); - return 0; - } -#endif - - // Calculate matrix Frobenius and infinity norms. - PetscReal NormF() const; - PetscReal NormInf() const; -#if defined(PETSC_USE_COMPLEX) - virtual PetscReal NormFReal() const - { - MFEM_ABORT("NormFReal is not supported for base class PetscParMatrix!"); - return 0.0; - } - virtual PetscReal NormFImag() const - { - MFEM_ABORT("NormFImag is not supported for base class PetscParMatrix!"); - return 0.0; - } - virtual PetscReal NormInfReal() const - { - MFEM_ABORT("NormInfReal is not supported for base class PetscParMatrix!"); - return 0.0; - } - virtual PetscReal NormInfImag() const - { - MFEM_ABORT("NormInfImag is not supported for base class PetscParMatrix!"); - return 0.0; - } -#endif - - // Estimate matrix 2-norm (spectral norm) using power iteration. - PetscReal Norm2(PetscReal tol = PETSC_DEFAULT, PetscInt maxits = PETSC_DEFAULT) const; - - // Scale all entries by s. - void Scale(PetscScalar s); - -#if defined(PETSC_USE_COMPLEX) - // Replace entries with complex conjugate. - void Conj(); - - // Zero the imaginary part of the matrix. - void GetRealPart(); - - // Move the imaginary part to the real part of the matrix. - void GetImagPart(); -#endif - - // In-place addition A += alpha * B. - void AXPY(PetscScalar alpha, const PetscParMatrix &B, NNZStructure struc); - - // Matrix-vector multiplication. - void Mult(const PetscParVector &x, PetscParVector &y) const; - void MultAdd(const PetscParVector &x, PetscParVector &y) const; - void MultTranspose(const PetscParVector &x, PetscParVector &y) const; - void MultTransposeAdd(const PetscParVector &x, PetscParVector &y) const; - void MultHermitianTranspose(const PetscParVector &x, PetscParVector &y) const; - void MultHermitianTransposeAdd(const PetscParVector &x, PetscParVector &y) const; - -#if defined(PETSC_USE_COMPLEX) - // Multiplication with a real-valued vector. - virtual void Mult(const mfem::Vector &x, PetscParVector &y) const; - virtual void MultTranspose(const mfem::Vector &x, PetscParVector &y) const; - virtual void MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const; -#endif - - // Prints the matrix (to stdout if fname is nullptr). - virtual void Print(const char *fname = nullptr, bool binary = false) const; -#if defined(PETSC_USE_COMPLEX) - virtual void PrintReal(const char *fname) const - { - MFEM_ABORT("PrintReal is not supported for base class PetscParMatrix!"); - } - virtual void PrintImag(const char *fname) const - { - MFEM_ABORT("PrintImag is not supported for base class PetscParMatrix!"); - } -#endif - - // Returns a (real) MFEM Operator from the underlying shell matrix data. When complex - // scalars are used, the parameter controls which part of the matrix to extract. -#if defined(PETSC_USE_COMPLEX) - virtual const mfem::Operator *GetOperator(ExtractStructure struc) const -#else - virtual const mfem::Operator *GetOperator() const -#endif - { - MFEM_ABORT("GetOperator is not supported for base class PetscParMatrix!"); - return nullptr; - } - - // Test whether or not a shell matrix has a real or imaginary parts. -#if defined(PETSC_USE_COMPLEX) - virtual bool HasReal() const - { - MFEM_ABORT("HasReal is not supported for base class PetscParMatrix!"); - return false; - } - virtual bool HasImag() const - { - MFEM_ABORT("HasImag is not supported for base class PetscParMatrix!"); - return false; - } -#endif - - // Constructs a (real) HypreParMatrix from the PETSc matrix data. When complex scalars - // are used, the parameter controls which part of the matrix to extract. -#if defined(PETSC_USE_COMPLEX) - virtual std::unique_ptr - GetHypreParMatrix(ExtractStructure struc) const; -#else - virtual std::unique_ptr GetHypreParMatrix() const; -#endif - - // Create a submatrix on the same number of processors as the original matrix, - // corresponding to the provided rows and columns which are the selected(local) indices. - virtual std::unique_ptr GetSubMatrix(const mfem::Array &rows, - const mfem::Array &cols); - - // Create a sequential gathered matrix corresponding to the parallel matrix. All processes - // on the original communicator must call this function, but if the argument is false, no - // matrix is created (returned pointer is nullptr). - virtual std::unique_ptr GetSequentialMatrix(bool create); - - // Get the associated MPI communicator. - MPI_Comm GetComm() const; - - // Typecasting to PETSc's Mat type. - operator Mat() const { return A; } - - // Typecasting to PETSc object. - operator PetscObject() const { return reinterpret_cast(A); } -}; - -// Context data for PETSc shell matrices. These store complex matrices as -// Ar + i Ai and perform matrix-vector products. -struct PetscMatShellCtx -{ - std::unique_ptr Ar; - mfem::Vector x, y; -#if defined(PETSC_USE_COMPLEX) - std::unique_ptr Ai; -#endif -}; - -// Wrapper for PETSc's MATSHELL matrix class. -class PetscShellMatrix : public PetscParMatrix -{ -private: - // Returns the shell matrix context. - PetscMatShellCtx *GetContext() const; - -public: - // Create a PETSc shell matrix wrapping an MFEM Operator. Ownership of the operator is - // transfered to the PETSc shell. When PETSc is compiled with complex numbers support, - // the shell matrix wraps the real and imaginary parts to act on complex PETSc Vec - // objects. - PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&B); -#if defined(PETSC_USE_COMPLEX) - PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&Br, - std::unique_ptr &&Bi); -#endif - - // Returns the number of nonzeros. - PetscInt NNZ() const override; -#if defined(PETSC_USE_COMPLEX) - PetscInt NNZReal() const override; - PetscInt NNZImag() const override; -#endif - - // Calculate matrix Frobenius and infinity norms. -#if defined(PETSC_USE_COMPLEX) - PetscReal NormFReal() const override; - PetscReal NormFImag() const override; - PetscReal NormInfReal() const override; - PetscReal NormInfImag() const override; -#endif - -#if defined(PETSC_USE_COMPLEX) - // Multiplication with a real-valued vector. - void Mult(const mfem::Vector &x, PetscParVector &y) const override; - void MultTranspose(const mfem::Vector &x, PetscParVector &y) const override; - void MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const override; -#endif - - // Prints the locally owned matrix rows in parallel. - void Print(const char *fname = nullptr, bool binary = false) const override; -#if defined(PETSC_USE_COMPLEX) - void PrintReal(const char *fname) const override; - void PrintImag(const char *fname) const override; -#endif - - // Test whether or not a shell matrix has a real or imaginary parts. -#if defined(PETSC_USE_COMPLEX) - bool HasReal() const override; - bool HasImag() const override; -#endif - - // Returns a (real) MFEM Operator from the underlying shell matrix data. When complex - // scalars are used, the parameter controls which part of the matrix to extract. -#if defined(PETSC_USE_COMPLEX) - const mfem::Operator *GetOperator(ExtractStructure struc) const override; -#else - const mfem::Operator *GetOperator() const override; -#endif - - // These methods are not supported for MATSHELL. -#if defined(PETSC_USE_COMPLEX) - std::unique_ptr - GetHypreParMatrix(ExtractStructure struc) const override -#else - std::unique_ptr GetHypreParMatrix() const override -#endif - { - MFEM_ABORT("GetHypreParMatrix is not supported for PetscShellMatrix!"); - return {}; - } - std::unique_ptr GetSubMatrix(const mfem::Array &, - const mfem::Array &) override - { - MFEM_ABORT("GetSubMatrix is not supported for PetscShellMatrix!"); - return {}; - } - std::unique_ptr GetSequentialMatrix(bool) override - { - MFEM_ABORT("GetSequentialMatrix is not supported for PetscShellMatrix!"); - return {}; - } -}; - -// Wrapper for PETSc's MATIJ matrix class. -class PetscAijMatrix : public PetscParMatrix -{ -public: - // Create a PETSc matrix explicitly converted from an MFEM Operator. - PetscAijMatrix(const mfem::Operator &B); -#if defined(PETSC_USE_COMPLEX) - PetscAijMatrix(const mfem::Operator &Br, const mfem::Operator &Bi); -#endif -}; - -// Wrapper for PETSc's MATDENSE matrix class. -class PetscDenseMatrix : public PetscParMatrix -{ -private: - // Helper method for column orthonormalization. - PetscReal OrthonormalizeColumnInternal( - PetscInt j, bool mgs, bool cgs2, - const std::function &Dot, - const std::function - &VecDot, - const std::function &Normalize); - -public: - // Create a parallel or sequential PETSc dense matrix. Option to specify an existing data - // array. - PetscDenseMatrix(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, - PetscScalar *data); - PetscDenseMatrix(PetscInt m, PetscInt n, PetscScalar *data); - - // Set the (local) matrix dimensions to m x n, copying previous contents to the upper-left - // block. - void Resize(PetscInt m, PetscInt n, bool copy = false); - - // Access methods for columns of the dense matrix. - PetscParVector GetColumn(PetscInt j); - const PetscParVector GetColumnRead(PetscInt j) const; - void RestoreColumn(PetscInt j, PetscParVector &v); - void RestoreColumnRead(PetscInt j, const PetscParVector &v) const; - - // Access the data array of the dense matrix. - PetscScalar *GetArray(); - const PetscScalar *GetArrayRead() const; - void RestoreArray(PetscScalar *data); - void RestoreArrayRead(const PetscScalar *data) const; - - // Sets all entries of the matrix to random numbers sampled from the range [-1-i, 1+i], or - // [-1, 1]. - void SetRandom(PetscInt start = -1, PetscInt end = -1); -#if defined(PETSC_USE_COMPLEX) - void SetRandomReal(PetscInt start = -1, PetscInt end = -1); -#else - void SetRandomReal(PetscInt start = -1, PetscInt end = -1) { SetRandom(start, end); } -#endif - void SetRandomSign(PetscInt start = -1, PetscInt end = -1, bool init = false); - - // Orthonormalize column j of the matrix against the preceeding columns, using classical - // or modified Gram-Schmidt. - PetscReal OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2); - PetscReal OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2, const PetscParMatrix &B, - PetscParVector &Bv); - - // Dense matrix-matrix multiplication. - void MatMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; - void MatMultTranspose(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; - void MatTransposeMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; -}; - -} // namespace palace::petsc - #endif // PALACE_LINALG_PETSC_HPP diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index 7dbd8d777..4ec2bb45b 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -3,29 +3,17 @@ #include "slepc.hpp" -#if 0 // XX TODO DISABLE SLEPC FOR NOW - #if defined(PALACE_WITH_SLEPC) +#include #include #include #include #include "linalg/divfree.hpp" #include "linalg/ksp.hpp" +#include "linalg/vector.hpp" #include "utils/communication.hpp" -static PetscErrorCode __mat_apply_EPS_A(Mat, Vec, Vec); -static PetscErrorCode __mat_apply_EPS_B(Mat, Vec, Vec); -static PetscErrorCode __pc_apply_EPS(PC, Vec, Vec); -static PetscErrorCode __mat_apply_PEPLinear_L0(Mat, Vec, Vec); -static PetscErrorCode __mat_apply_PEPLinear_L1(Mat, Vec, Vec); -static PetscErrorCode __mat_apply_PEPLinear_B(Mat, Vec, Vec); -static PetscErrorCode __pc_apply_PEPLinear(PC, Vec, Vec); -static PetscErrorCode __mat_apply_PEP_A0(Mat, Vec, Vec); -static PetscErrorCode __mat_apply_PEP_A1(Mat, Vec, Vec); -static PetscErrorCode __mat_apply_PEP_A2(Mat, Vec, Vec); -static PetscErrorCode __pc_apply_PEP(PC, Vec, Vec); - namespace palace::slepc { @@ -44,122 +32,196 @@ void Finalize() PalacePetscCall(SlepcFinalize()); } -PetscReal GetMaxSingularValue(const petsc::PetscParMatrix &A, PetscReal tol, - PetscInt maxits) +PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm, + PetscReal tol, PetscInt max_it) { // This method assumes the provided operator has the required operations for SLEPc's EPS // or SVD solvers, namely MATOP_MULT and MATOP_MULT_HERMITIAN_TRANSPOSE (if the matrix // is not Hermitian). - PetscInt nconv; - PetscReal sigma; + Mat A0; + PetscInt n = A.Height() / 2; + PalacePetscCall(MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, nullptr, &A0)); + ComplexVector x(A.Height()), y(A.Height()); + auto __mat_apply_shell = [&A, &x, &y](Mat, Vec x0, Vec y0) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x0, &n)); + + const PetscScalar *px0; + PetscCall(VecGetArrayRead(x0, &px0)); + x.Set(px0, n); + PetscCall(VecRestoreArrayRead(x0, &px0)); + + A.Mult(x, y); + + PetscScalar *py0; + PetscCall(VecGetArrayWrite(y0, &py0)); + y.Get(py0, n); + PetscCall(VecRestoreArrayWrite(y0, &py0)); + + PetscFunctionReturn(0); + }; + auto __mat_apply_transpose_shell = [&A, &x, &y](Mat, Vec x0, Vec y0) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x0, &n)); + + const PetscScalar *px0; + PetscCall(VecGetArrayRead(x0, &px0)); + x.Set(px0, n); + PetscCall(VecRestoreArrayRead(x0, &px0)); + + A.MultTranspose(x, y); + + PetscScalar *py0; + PetscCall(VecGetArrayWrite(y0, &py0)); + y.Get(py0, n); + PetscCall(VecRestoreArrayWrite(y0, &py0)); - // XX TODO ADDRESS GetHermitian + PetscFunctionReturn(0); + }; + auto __mat_apply_hermitian_transpose_shell = [&A, &x, &y](Mat, Vec x0, + Vec y0) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x0, &n)); + + const PetscScalar *px0; + PetscCall(VecGetArrayRead(x0, &px0)); + x.Set(px0, n); + PetscCall(VecRestoreArrayRead(x0, &px0)); + + A.MultHermitianTranspose(x, y); - if (A.GetHermitian()) // Returns true if symmetric and not PETSC_USE_COMPLEX + PetscScalar *py0; + PetscCall(VecGetArrayWrite(y0, &py0)); + y.Get(py0, n); + PetscCall(VecRestoreArrayWrite(y0, &py0)); + + PetscFunctionReturn(0); + }; + PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)()) & __mat_apply_shell)); + + if (herm) { EPS eps; + PetscInt num_conv; PetscScalar eig; - PalacePetscCall(EPSCreate(A.GetComm(), &eps)); - PalacePetscCall(EPSSetOperators(eps, A, nullptr)); + PalacePetscCall(EPSCreate(comm, &eps)); + PalacePetscCall(EPSSetOperators(eps, A0, nullptr)); PalacePetscCall(EPSSetProblemType(eps, EPS_HEP)); PalacePetscCall(EPSSetWhichEigenpairs(eps, EPS_LARGEST_MAGNITUDE)); PalacePetscCall(EPSSetDimensions(eps, 1, PETSC_DEFAULT, PETSC_DEFAULT)); - PalacePetscCall(EPSSetTolerances(eps, tol, maxits)); + PalacePetscCall(EPSSetTolerances(eps, tol, max_it)); PalacePetscCall(EPSSolve(eps)); - PalacePetscCall(EPSGetConverged(eps, &nconv)); - if (nconv < 1) + PalacePetscCall(EPSGetConverged(eps, &num_conv)); + if (num_conv < 1) { - Mpi::Warning(A.GetComm(), - "SLEPc EPS solve did not converge for maximum singular value!\n"); - PalacePetscCall(EPSDestroy(&eps)); - return 0.0; + Mpi::Warning(comm, "SLEPc EPS solve did not converge for maximum singular value!\n"); + eig = 0.0; + } + else + { + PalacePetscCall(EPSGetEigenvalue(eps, 0, &eig, nullptr)); + MFEM_VERIFY(PetscImaginaryPart(eig) == 0.0, + "Unexpected complex eigenvalue for Hermitian matrix (λ = " << eig + << ")!"); } - MFEM_VERIFY(nconv >= 1, " "); - PalacePetscCall(EPSGetEigenvalue(eps, 0, &eig, nullptr)); PalacePetscCall(EPSDestroy(&eps)); - MFEM_VERIFY(PetscImaginaryPart(eig) == 0.0, - "Unexpected complex eigenvalue for Hermitian matrix (λ = " << eig << ")!"); - sigma = PetscAbsScalar(eig); + PalacePetscCall(MatDestroy(&A0)); + return PetscAbsScalar(eig); } else { + PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT_TRANSPOSE, + (void (*)()) & __mat_apply_transpose_shell)); + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT_HERMITIAN_TRANSPOSE, + (void (*)()) & __mat_apply_hermitian_transpose_shell)); + SVD svd; - PalacePetscCall(SVDCreate(A.GetComm(), &svd)); - PalacePetscCall(SVDSetOperators(svd, A, nullptr)); + PetscInt num_conv; + PetscReal sigma; + PalacePetscCall(SVDCreate(comm, &svd)); + PalacePetscCall(SVDSetOperators(svd, A0, nullptr)); PalacePetscCall(SVDSetProblemType(svd, SVD_STANDARD)); PalacePetscCall(SVDSetWhichSingularTriplets(svd, SVD_LARGEST)); PalacePetscCall(SVDSetDimensions(svd, 1, PETSC_DEFAULT, PETSC_DEFAULT)); - PalacePetscCall(SVDSetTolerances(svd, tol, maxits)); + PalacePetscCall(SVDSetTolerances(svd, tol, max_it)); PalacePetscCall(SVDSolve(svd)); - PalacePetscCall(SVDGetConverged(svd, &nconv)); - if (nconv < 1) + PalacePetscCall(SVDGetConverged(svd, &num_conv)); + if (num_conv < 1) { - Mpi::Warning(A.GetComm(), - "SLEPc SVD solve did not converge for maximum singular value!\n"); - PalacePetscCall(SVDDestroy(&svd)); - return 0.0; + Mpi::Warning(comm, "SLEPc SVD solve did not converge for maximum singular value!\n"); + sigma = 0.0; + } + else + { + PalacePetscCall(SVDGetSingularTriplet(svd, 0, &sigma, nullptr, nullptr)); } - MFEM_VERIFY(nconv >= 1, " "); - PalacePetscCall(SVDGetSingularTriplet(svd, 0, &sigma, nullptr, nullptr)); PalacePetscCall(SVDDestroy(&svd)); + PalacePetscCall(MatDestroy(&A0)); + return sigma; } - return sigma; } // Eigensolver base class methods -SlepcEigenSolver::SlepcEigenSolver(int print_lvl) : clcustom(false), print(print_lvl) +SlepcEigenSolver::SlepcEigenSolver(int print) : print(print) { sinvert = false; region = true; sigma = 0.0; gamma = delta = 1.0; - res = nullptr; - v0 = r0 = nullptr; opInv = nullptr; opProj = nullptr; + opB = nullptr; + + B0 = nullptr; + v0 = nullptr; + + cl_custom = false; } SlepcEigenSolver::~SlepcEigenSolver() { - delete[] res; - delete v0; - delete r0; + PalacePetscCall(MatDestroy(&B0)); + PalacePetscCall(VecDestroy(&v0)); } -void SlepcEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcEigenSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { MFEM_ABORT("SetOperators not defined for base class SlepcEigenSolver!"); } -void SlepcEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcEigenSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { MFEM_ABORT("SetOperators not defined for base class SlepcEigenSolver!"); } -void SlepcEigenSolver::SetLinearSolver(const KspSolver &ksp) +void SlepcEigenSolver::SetLinearSolver(const ComplexKspSolver &ksp) { opInv = &ksp; } -void SlepcEigenSolver::SetProjector(const DivFreeSolver &divfree) +void SlepcEigenSolver::SetDivFreeProjector(const DivFreeSolver &divfree) { opProj = &divfree; } -void SlepcEigenSolver::SetBMat(const petsc::PetscParMatrix &B) +void SlepcEigenSolver::SetBMat(const Operator &B) { - BV bv = GetBV(); - PalacePetscCall(BVSetMatrix(bv, B, PETSC_FALSE)); + opB = &B; } -void SlepcEigenSolver::SetShiftInvert(double tr, double ti, bool precond) +void SlepcEigenSolver::SetShiftInvert(PetscScalar s, bool precond) { ST st = GetST(); if (precond) @@ -172,7 +234,7 @@ void SlepcEigenSolver::SetShiftInvert(double tr, double ti, bool precond) } PalacePetscCall(STSetTransform(st, PETSC_TRUE)); PalacePetscCall(STSetMatMode(st, ST_MATMODE_SHELL)); - sigma = tr + PETSC_i * ti; // Wait until solve time to call EPS/PEPSetTarget + sigma = s; // Wait until solve time to call EPS/PEPSetTarget sinvert = true; } @@ -222,26 +284,26 @@ void SlepcEigenSolver::Customize() { if (PetscRealPart(sigma) > 0.0) { - SetRegion(PetscRealPart(sigma) / gamma, mfem::infinity(), -mfem::infinity(), - mfem::infinity()); + SetRegion(PetscRealPart(sigma) / gamma - PETSC_i * mfem::infinity(), + mfem::infinity() + PETSC_i * mfem::infinity()); } else if (PetscRealPart(sigma) < 0.0) { - SetRegion(-mfem::infinity(), PetscRealPart(sigma) / gamma, -mfem::infinity(), - mfem::infinity()); + SetRegion(-mfem::infinity() - PETSC_i * mfem::infinity(), + PetscRealPart(sigma) / gamma + PETSC_i * mfem::infinity()); } } else if (PetscRealPart(sigma) == 0.0) { if (PetscImaginaryPart(sigma) > 0.0) { - SetRegion(-mfem::infinity(), mfem::infinity(), PetscImaginaryPart(sigma) / gamma, - mfem::infinity()); + SetRegion(-mfem::infinity() + PETSC_i * PetscImaginaryPart(sigma) / gamma, + mfem::infinity() + PETSC_i * mfem::infinity()); } else if (PetscImaginaryPart(sigma) < 0.0) { - SetRegion(-mfem::infinity(), mfem::infinity(), -mfem::infinity(), - PetscImaginaryPart(sigma) / gamma); + SetRegion(-mfem::infinity() - PETSC_i * mfem::infinity(), + PetscImaginaryPart(sigma) / gamma + PETSC_i * mfem::infinity()); } } else @@ -252,71 +314,43 @@ void SlepcEigenSolver::Customize() } } -void SlepcEigenSolver::SetPCShell(void *ctx, PetscErrorCode (*__pc_apply)(PC, Vec, Vec)) -{ - // Configure linear solver for generalized problem or spectral transformation. This also - // allows use of the divergence-free projector as a linear solve side-effect. - KSP ksp; - PC pc; - ST st = GetST(); - PalacePetscCall(STGetKSP(st, &ksp)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - - // Configure the linear solver as a shell preconditioner. - PalacePetscCall(PCSetType(pc, PCSHELL)); - PalacePetscCall(PCShellSetContext(pc, ctx)); - PalacePetscCall(PCShellSetApply(pc, __pc_apply)); -} - -void SlepcEigenSolver::SetRegion(PetscReal rminr, PetscReal rmaxr, PetscReal rmini, - PetscReal rmaxi, bool complement) +void SlepcEigenSolver::SetRegion(PetscScalar lower_left, PetscScalar upper_right, + bool complement) { RG rg = GetRG(); PalacePetscCall(RGSetType(rg, RGINTERVAL)); - PalacePetscCall(RGIntervalSetEndpoints(rg, rminr, rmaxr, rmini, rmaxi)); + PalacePetscCall(RGIntervalSetEndpoints( + rg, PetscRealPart(lower_left), PetscRealPart(upper_right), + PetscImaginaryPart(lower_left), PetscImaginaryPart(upper_right))); if (complement) { PalacePetscCall(RGSetComplement(rg, PETSC_TRUE)); } } -void SlepcEigenSolver::GetBackTransform(PetscScalar eig, PetscReal &eigr, - PetscReal &eigi) const +PetscScalar SlepcEigenSolver::GetBackTransform(PetscScalar l) const { - eigr = gamma * PetscRealPart(eig); - eigi = gamma * PetscImaginaryPart(eig); + return gamma * l; } -void SlepcEigenSolver::GetError(int i, EigenSolverBase::ErrorType type, double &err) const +PetscReal SlepcEigenSolver::GetError(int i, EigenvalueSolver::ErrorType type) const { - PetscReal eigr, eigi; - GetEigenvalue(i, eigr, eigi); - PetscScalar eig = eigr + PETSC_i * eigi; - if (res[i] <= 0.0) - { - GetEigenvector(i, *v0); - GetResidual(eig, *v0, *r0); - res[i] = r0->Norml2() / v0->Norml2(); - } switch (type) { case ErrorType::ABSOLUTE: - err = res[i]; - break; + return res.get()[i]; case ErrorType::RELATIVE: - err = res[i] / PetscAbsScalar(eig); - break; + return res.get()[i] / PetscAbsScalar(GetEigenvalue(i)); case ErrorType::BACKWARD: - err = res[i] / GetBackwardScaling(eig); - break; + return res.get()[i] / GetBackwardScaling(GetEigenvalue(i)); } + return 0.0; } // EPS specific methods -SlepcEPSSolverBase::SlepcEPSSolverBase(MPI_Comm comm, int print_lvl, - const std::string &prefix) - : SlepcEigenSolver(print_lvl) +SlepcEPSSolverBase::SlepcEPSSolverBase(MPI_Comm comm, int print, const std::string &prefix) + : SlepcEigenSolver(print) { PalacePetscCall(EPSCreate(comm, &eps)); PalacePetscCall(EPSSetOptionsPrefix(eps, prefix.c_str())); @@ -337,25 +371,23 @@ SlepcEPSSolverBase::SlepcEPSSolverBase(MPI_Comm comm, int print_lvl, PetscOptionsPrefixPop(nullptr); } } - A = B = nullptr; + A0 = A1 = nullptr; } SlepcEPSSolverBase::~SlepcEPSSolverBase() { - MPI_Comm comm; - PalacePetscCall(PetscObjectGetComm(reinterpret_cast(eps), &comm)); PalacePetscCall(EPSDestroy(&eps)); - delete A; - delete B; + PalacePetscCall(MatDestroy(&A0)); + PalacePetscCall(MatDestroy(&A1)); } -void SlepcEPSSolverBase::SetNumModes(int numeig, int numvec) +void SlepcEPSSolverBase::SetNumModes(int num_eig, int num_vec) { - PalacePetscCall( - EPSSetDimensions(eps, numeig, (numvec > 0) ? numvec : PETSC_DEFAULT, PETSC_DEFAULT)); + PalacePetscCall(EPSSetDimensions(eps, num_eig, (num_vec > 0) ? num_vec : PETSC_DEFAULT, + PETSC_DEFAULT)); } -void SlepcEPSSolverBase::SetTol(double tol) +void SlepcEPSSolverBase::SetTol(PetscReal tol) { PalacePetscCall(EPSSetTolerances(eps, tol, PETSC_DEFAULT)); PalacePetscCall(EPSSetConvergenceTest(eps, EPS_CONV_REL)); @@ -363,13 +395,13 @@ void SlepcEPSSolverBase::SetTol(double tol) // PalacePetscCall(EPSSetTrueResidual(eps, PETSC_TRUE)); } -void SlepcEPSSolverBase::SetMaxIter(int maxits) +void SlepcEPSSolverBase::SetMaxIter(int max_it) { PalacePetscCall( - EPSSetTolerances(eps, PETSC_DEFAULT, (maxits > 0) ? maxits : PETSC_DEFAULT)); + EPSSetTolerances(eps, PETSC_DEFAULT, (max_it > 0) ? max_it : PETSC_DEFAULT)); } -void SlepcEPSSolverBase::SetWhichEigenpairs(EigenSolverBase::WhichType type) +void SlepcEPSSolverBase::SetWhichEigenpairs(EigenvalueSolver::WhichType type) { switch (type) { @@ -458,20 +490,27 @@ void SlepcEPSSolverBase::SetType(SlepcEigenSolver::Type type) } } -void SlepcEPSSolverBase::SetInitialSpace(const petsc::PetscParVector &v) +void SlepcEPSSolverBase::SetInitialSpace(const ComplexVector &v) { + MFEM_VERIFY( + A0 && A1, + "Must call SetOperators before using SetInitialSpace for SLEPc eigenvalue solver!"); if (!v0) { - v0 = new petsc::PetscParVector(v); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - else - { - MFEM_VERIFY(v.GetSize() == v0->GetSize(), - "Invalid modification of eigenvalue problem size!"); - v0->Copy(v); - } - Vec is[1]; - is[0] = *v0; + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(v.Size() == 2 * n, + "Invalid size mismatch for provided initial space vector!"); + + PetscScalar *pv0; + PalacePetscCall(VecGetArrayWrite(v0, &pv0)); + v.Get(pv0, n); + PalacePetscCall(VecRestoreArrayWrite(v0, &pv0)); + + Vec is[1] = {v0}; PalacePetscCall(EPSSetInitialSpace(eps, 1, is)); } @@ -479,25 +518,27 @@ void SlepcEPSSolverBase::Customize() { SlepcEigenSolver::Customize(); PalacePetscCall(EPSSetTarget(eps, sigma / gamma)); - if (!clcustom) + if (!cl_custom) { PalacePetscCall(EPSSetFromOptions(eps)); - // if (print > 0) // These are printed by PETSc linear solver - // { - // PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); - // Mpi::Print(GetComm(), "\n"); - // } - clcustom = true; + if (print > 0) + { + PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); + Mpi::Print(GetComm(), "\n"); + } + cl_custom = true; } } int SlepcEPSSolverBase::Solve() { - MFEM_VERIFY(A && B && opInv, "Operators are not set for SlepcEPSSolverBase!"); - PetscInt numconv; + MFEM_VERIFY(A0 && A1 && opInv, "Operators are not set for SlepcEPSSolverBase!"); + + // Solve the eigenvalue problem. + PetscInt num_conv; Customize(); PalacePetscCall(EPSSolve(eps)); - PalacePetscCall(EPSGetConverged(eps, &numconv)); + PalacePetscCall(EPSGetConverged(eps, &num_conv)); if (print > 0) { Mpi::Print(GetComm(), "\n"); @@ -505,27 +546,40 @@ int SlepcEPSSolverBase::Solve() Mpi::Print(GetComm(), " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", - opInv->GetTotalNumMult(), opInv->GetTotalNumIter()); + opInv->NumTotalMult(), opInv->NumTotalMultIter()); } - delete[] res; - res = new PetscReal[numconv]; - for (PetscInt i = 0; i < numconv; i++) + + // Compute and store the eigenpair residuals. + res = std::make_unique(num_conv); + for (int i = 0; i < num_conv; i++) { - res[i] = -1.0; + res.get()[i] = GetResidualNorm(i); } - return (int)numconv; + return (int)num_conv; } -void SlepcEPSSolverBase::GetEigenvalue(int i, double &eigr, double &eigi) const +PetscScalar SlepcEPSSolverBase::GetEigenvalue(int i) const { - PetscScalar eig; - PalacePetscCall(EPSGetEigenvalue(eps, i, &eig, nullptr)); - GetBackTransform(eig, eigr, eigi); + PetscScalar l; + PalacePetscCall(EPSGetEigenvalue(eps, i, &l, nullptr)); + return GetBackTransform(l); } -void SlepcEPSSolverBase::GetEigenvector(int i, petsc::PetscParVector &v) const +void SlepcEPSSolverBase::GetEigenvector(int i, ComplexVector &x) const { - PalacePetscCall(EPSGetEigenvector(eps, i, v, nullptr)); + MFEM_VERIFY( + v0, + "Must call SetOperators before using GetEigenvector for SLEPc eigenvalue solver!"); + PalacePetscCall(EPSGetEigenvector(eps, i, v0, nullptr)); + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(x.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + + const PetscScalar *pv0; + PalacePetscCall(VecGetArrayRead(v0, &pv0)); + x.Set(pv0, n); + PalacePetscCall(VecRestoreArrayRead(v0, &pv0)); } BV SlepcEPSSolverBase::GetBV() const @@ -549,51 +603,77 @@ RG SlepcEPSSolverBase::GetRG() const return rg; } -MPI_Comm SlepcEPSSolverBase::GetComm() const -{ - return eps ? PetscObjectComm(reinterpret_cast(eps)) : MPI_COMM_NULL; -} - -SlepcEPSSolver::SlepcEPSSolver(MPI_Comm comm, int print_lvl, const std::string &prefix) - : SlepcEPSSolverBase(comm, print_lvl, prefix) +SlepcEPSSolver::SlepcEPSSolver(MPI_Comm comm, int print, const std::string &prefix) + : SlepcEPSSolverBase(comm, print, prefix) { opK = opM = nullptr; normK = normM = 0.0; } -void SlepcEPSSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { // Construct shell matrices for the scaled operators which define the generalized // eigenvalue problem. bool first = (opK == nullptr); - { - Mat A_, B_; - MPI_Comm comm = GetComm(); - PetscInt n = K.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A_)); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B_)); - PalacePetscCall(MatShellSetOperation( - A_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_EPS_A))); - PalacePetscCall(MatShellSetOperation( - B_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_EPS_B))); - delete A; - delete B; - A = new petsc::PetscParMatrix(A_, false); // Inherits the PETSc Mat - B = new petsc::PetscParMatrix(B_, false); - } - PalacePetscCall(EPSSetOperators(eps, *A, *B)); opK = &K; opM = &M; + + PetscInt n = opK->Height() / 2; + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); + auto __mat_apply_shell_A0 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + opK->Mult(x, y); + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + auto __mat_apply_shell_A1 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + opM->Mult(x, y); + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT, (void (*)()) & __mat_apply_shell_A0)); + PalacePetscCall( + MatShellSetOperation(A1, MATOP_MULT, (void (*)()) & __mat_apply_shell_A1)); + PalacePetscCall(EPSSetOperators(eps, A0, A1)); + if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normM >= 0.0, "Invalid matrix norms for EPS scaling!"); if (normK > 0 && normM > 0.0) { @@ -605,103 +685,217 @@ void SlepcEPSSolver::SetOperators(const petsc::PetscParMatrix &K, // Set up workspace. if (!v0) { - v0 = new petsc::PetscParVector(K); - } - if (!r0) - { - r0 = new petsc::PetscParVector(K); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } + x.SetSize(opK->Height()); + y.SetSize(opK->Height()); - // Configure linear solver. + // Configure linear solver for generalized problem or spectral transformation. This also + // allows use of the divergence-free projector as a linear solve side-effect. if (first) { - SetPCShell((void *)this, __pc_apply_EPS); + ConfigurePCShell(); } } -void SlepcEPSSolver::GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const +void SlepcEPSSolver::SetBMat(const Operator &B) { - // r = (K - λ M) v for eigenvalue λ. - opM->Mult(v, r); - r.Scale(-eig); - opK->MultAdd(v, r); + SlepcEigenSolver::SetBMat(B); + + PetscInt n = B.Height(); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); + auto __mat_apply_shell = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + opB->Mult(x.Real(), y.Real()); + opB->Mult(x.Imag(), y.Imag()); + y *= delta * gamma; + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)()) & __mat_apply_shell)); + + BV bv = GetBV(); + PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); } -PetscReal SlepcEPSSolver::GetBackwardScaling(PetscScalar eig) const +void SlepcEPSSolver::ConfigurePCShell() +{ + auto __pc_apply = [this](PC, Vec x_, Vec y_) -> PetscErrorCode + { + // Solve the linear system associated with the generalized eigenvalue problem: y = + // M⁻¹ x, or shift-and-invert spectral transformation: y = (K - σ M)⁻¹ x . Enforces the + // divergence-free constraint using the supplied projector. + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + opInv->Mult(x, y); + if (!sinvert) + { + y *= 1.0 / (delta * gamma); + } + else + { + y *= 1.0 / delta; + } + if (opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y)); + opProj->Mult(y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(GetComm(), y)); + } + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + + KSP ksp; + PC pc; + ST st = GetST(); + PalacePetscCall(STGetKSP(st, &ksp)); + PalacePetscCall(KSPGetPC(ksp, &pc)); + PalacePetscCall(PCSetType(pc, PCSHELL)); + PalacePetscCall(PCShellSetApply(pc, (PetscErrorCode(*)(PC, Vec, Vec)) & __pc_apply)); +} + +PetscReal SlepcEPSSolver::GetResidualNorm(int i) const +{ + // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ. + PetscScalar l = GetEigenvalue(i); + GetEigenvector(i, x); + opK->Mult(x, y); + opM->AddMult(x, y, -l); + return linalg::Norml2(GetComm(), y); +} + +PetscReal SlepcEPSSolver::GetBackwardScaling(PetscScalar l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc typically uses ||.||∞, not the 2-norm. if (normK <= 0.0) { - normK = opK->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); } if (normM <= 0.0) { - normM = opM->Norm2(); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); } - return normK + PetscAbsScalar(eig) * normM; + return normK + PetscAbsScalar(l) * normM; } -SlepcPEPLinearSolver::SlepcPEPLinearSolver(MPI_Comm comm, int print_lvl, +SlepcPEPLinearSolver::SlepcPEPLinearSolver(MPI_Comm comm, int print, const std::string &prefix) - : SlepcEPSSolverBase(comm, print_lvl, prefix) + : SlepcEPSSolverBase(comm, print, prefix) { opK = opC = opM = nullptr; normK = normC = normM = 0.0; - B0 = nullptr; - opB = nullptr; - x1 = x2 = y1 = y2 = z = nullptr; } -SlepcPEPLinearSolver::~SlepcPEPLinearSolver() -{ - delete B0; - delete x1; - delete x2; - delete y1; - delete y2; - delete z; -} - -void SlepcPEPLinearSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { // Construct shell matrices for the scaled linearized operators which define the block 2x2 // eigenvalue problem. bool first = (opK == nullptr); - { - Mat A_, B_; - MPI_Comm comm = GetComm(); - PetscInt n = K.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A_)); - PalacePetscCall( - MatCreateShell(comm, 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B_)); - PalacePetscCall( - MatShellSetOperation(A_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_PEPLinear_L0))); - PalacePetscCall( - MatShellSetOperation(B_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_PEPLinear_L1))); - delete A; - delete B; - A = new petsc::PetscParMatrix(A_, false); // Inherits the PETSc Mat - B = new petsc::PetscParMatrix(B_, false); - } - PalacePetscCall(EPSSetOperators(eps, *A, *B)); opK = &K; opC = &C; opM = &M; + + PetscInt n = opK->Height() / 2; + PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, + (void *)this, &A0)); + PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, + (void *)this, &A1)); + auto __mat_apply_shell_A0 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + // Apply the linearized operator L₀ = [ 0 I ] + // [ -K -C ] . + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x1.Set(px_, n / 2); + x2.Set(px_ + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + y1 = x2; + opC->Mult(x2, y2); + y2 *= gamma; + opK->AddMult(x1, y2, std::complex(1.0, 0.0)); + y2 *= -delta; + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y1.Get(py_, n / 2); + y2.Get(py_ + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + auto __mat_apply_shell_A1 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + // Apply the linearized operator L₁ = [ I 0 ] + // [ 0 M ] . + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x1.Set(px_, n / 2); + x2.Set(px_ + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + y1 = x1; + opM->Mult(x2, y2); + y2 *= delta * gamma * gamma; + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y1.Get(py_, n / 2); + y2.Get(py_ + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT, (void (*)()) & __mat_apply_shell_A0)); + PalacePetscCall( + MatShellSetOperation(A1, MATOP_MULT, (void (*)()) & __mat_apply_shell_A1)); + PalacePetscCall(EPSSetOperators(eps, A0, A1)); + if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normC = opC->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); + normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal()); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0, "Invalid matrix norms for PEP scaling!"); if (normK > 0 && normC > 0.0 && normM > 0.0) @@ -714,175 +908,226 @@ void SlepcPEPLinearSolver::SetOperators(const petsc::PetscParMatrix &K, // Set up workspace. if (!v0) { - v0 = new petsc::PetscParVector(K); - } - if (!r0) - { - r0 = new petsc::PetscParVector(K); - } - if (!z) - { - z = new petsc::PetscParVector(*A); - } - if (!x1) - { - MPI_Comm comm = GetComm(); - PetscInt n = K.GetNumRows(); - delete x1; - delete x2; - delete y1; - delete y2; - x1 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - x2 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - y1 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - y2 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } + x1.SetSize(opK->Height()); + x2.SetSize(opK->Height()); + y1.SetSize(opK->Height()); + y2.SetSize(opK->Height()); // Configure linear solver. if (first) { - SetPCShell((void *)this, __pc_apply_PEPLinear); + ConfigurePCShell(); } } -void SlepcPEPLinearSolver::SetBMat(const petsc::PetscParMatrix &B) +void SlepcPEPLinearSolver::SetBMat(const Operator &B) { - // Construct an SPD linearized mass matrix for weighted inner products. - Mat B0_; - MPI_Comm comm = GetComm(); - PetscInt n = B.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0_)); - PalacePetscCall( - MatShellSetOperation(B0_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_PEPLinear_B))); - delete B0; - B0 = new petsc::PetscParMatrix(B0_, false); // Inherits the PETSc Mat - opB = &B; - SlepcEigenSolver::SetBMat(*B0); + SlepcEigenSolver::SetBMat(B); + + PetscInt n = B.Height(); + PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, + (void *)this, &B0)); + auto __mat_apply_shell = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x1.Set(px_, n / 2); + x2.Set(px_ + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + opB->Mult(x1.Real(), y1.Real()); + opB->Mult(x1.Imag(), y1.Imag()); + opB->Mult(x2.Real(), y2.Real()); + opB->Mult(x2.Imag(), y2.Imag()); + y1 *= delta * gamma * gamma; + y2 *= delta * gamma * gamma; + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y1.Get(py_, n / 2); + y2.Get(py_ + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)()) & __mat_apply_shell)); + + BV bv = GetBV(); + PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); } -void SlepcPEPLinearSolver::SetInitialSpace(const petsc::PetscParVector &v) +void SlepcPEPLinearSolver::SetInitialSpace(const ComplexVector &v) { - if (!z) + MFEM_VERIFY( + A0 && A1, + "Must call SetOperators before using SetInitialSpace for SLEPc eigenvalue solver!"); + if (!v0) { - z = new petsc::PetscParVector(v.GetComm(), 2 * v.GetSize(), PETSC_DECIDE); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - else + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(2 * v.Size() == 2 * n, + "Invalid size mismatch for provided initial space vector!"); + + PetscScalar *pv0; + PalacePetscCall(VecGetArrayWrite(v0, &pv0)); + v.Get(pv0, n / 2); + std::fill(pv0 + n / 2, pv0 + n, 0.0); + PalacePetscCall(VecRestoreArrayWrite(v0, &pv0)); + + Vec is[1] = {v0}; + PalacePetscCall(EPSSetInitialSpace(eps, 1, is)); +} + +void SlepcPEPLinearSolver::GetEigenvector(int i, ComplexVector &x) const +{ + // Select the most accurate x for y = [x₁; x₂] from the linearized eigenvalue problem. Or, + // just take x = x₁. + MFEM_VERIFY( + v0, + "Must call SetOperators before using GetEigenvector for SLEPc eigenvalue solver!"); + PalacePetscCall(EPSGetEigenvector(eps, i, v0, nullptr)); + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(2 * x1.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + + const PetscScalar *pv0; + PalacePetscCall(VecGetArrayRead(v0, &pv0)); + x1.Set(pv0, n / 2); + PalacePetscCall(VecRestoreArrayRead(v0, &pv0)); + if (opB) { - MFEM_VERIFY(2 * v.GetSize() == z->GetSize(), - "Invalid modification of eigenvalue problem size!"); + linalg::Normalize(GetComm(), x1, *opB, y1); } + else { - PetscScalar *pz = GetBlocks(*z, *x1, *x2); - x1->Copy(v); - x2->SetZero(); - RestoreBlocks(pz, *z, *x1, *x2); + linalg::Normalize(GetComm(), x1); } - Vec is[1]; - is[0] = *z; - PalacePetscCall(EPSSetInitialSpace(eps, 1, is)); } -void SlepcPEPLinearSolver::GetEigenvector(int i, petsc::PetscParVector &v) const +void SlepcPEPLinearSolver::ConfigurePCShell() { - // Select the most accurate v for z = [v₁; v₂] from the linearized eigenvalue problem. - PalacePetscCall(EPSGetEigenvector(eps, i, *z, nullptr)); - const PetscScalar *pz = GetBlocksRead(*z, *x1, *x2); + auto __pc_apply = [this](PC, Vec x_, Vec y_) -> PetscErrorCode { - if (opB) + // Solve the linear system associated with the generalized eigenvalue problem after + // linearization: y = L₁⁻¹ x, or with the shift-and-invert spectral transformation: + // y = (L₀ - σ L₁)⁻¹ x, with: + // L₀ = [ 0 I ] L₁ = [ I 0 ] + // [ -K -C ] , [ 0 M ] . + // Enforces the divergence-free constraint using the supplied projector. + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x1.Set(px_, n / 2); + x2.Set(px_ + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + if (!sinvert) { - x1->Normalize(*opB, *r0); + y1 = x1; + if (opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y1)); + opProj->Mult(y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y1)); + } + + opInv->Mult(x2, y2); + y2 *= 1.0 / (delta * gamma * gamma); + if (opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y2)); + opProj->Mult(y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y2)); + } } else { - x1->Normalize(); + y1.AXPBY(-sigma / (delta * gamma), x2, 0.0); // Temporarily + opK->AddMult(x1, y1, std::complex(1.0, 0.0)); + opInv->Mult(y1, y2); + if (opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y2)); + opProj->Mult(y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y2)); + } + + y1.AXPBYPCZ(gamma / sigma, y2, -gamma / sigma, x1, 0.0); + if (opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y1)); + opProj->Mult(y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y1)); + } } - v.Copy(*x1); - } - RestoreBlocksRead(pz, *z, *x1, *x2); + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y1.Get(py_, n / 2); + y2.Get(py_ + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + + KSP ksp; + PC pc; + ST st = GetST(); + PalacePetscCall(STGetKSP(st, &ksp)); + PalacePetscCall(KSPGetPC(ksp, &pc)); + PalacePetscCall(PCSetType(pc, PCSHELL)); + PalacePetscCall(PCShellSetApply(pc, (PetscErrorCode(*)(PC, Vec, Vec)) & __pc_apply)); } -void SlepcPEPLinearSolver::GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const +PetscReal SlepcPEPLinearSolver::GetResidualNorm(int i) const { - // r = P(λ) v = (K + λ C + λ² M) v for eigenvalue λ. - opM->Mult(v, r); - r.Scale(eig); - opC->MultAdd(v, r); - r.Scale(eig); - opK->MultAdd(v, r); + // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for + // eigenvalue λ. + PetscScalar l = GetEigenvalue(i); + GetEigenvector(i, x1); + opK->Mult(x1, y1); + opC->AddMult(x1, y1, l); + opM->AddMult(x1, y1, l * l); + return linalg::Norml2(GetComm(), y1); } -PetscReal SlepcPEPLinearSolver::GetBackwardScaling(PetscScalar eig) const +PetscReal SlepcPEPLinearSolver::GetBackwardScaling(PetscScalar l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc typically uses ||.||∞, not the 2-norm. if (normK <= 0.0) { - normK = opK->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); } if (normC <= 0.0) { - normC = opC->Norm2(); + normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal()); } if (normM <= 0.0) { - normM = opM->Norm2(); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); } - PetscReal t = PetscAbsScalar(eig); + PetscReal t = PetscAbsScalar(l); return normK + t * normC + t * t * normM; } -PetscScalar *SlepcPEPLinearSolver::GetBlocks(petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - PetscInt n1 = v1.GetSize(), n2 = v2.GetSize(); - MFEM_VERIFY(n1 + n2 == v.GetSize(), "Unexpected size in PEP linearization!"); - PetscScalar *pv = v.GetArray(); - v1.PlaceArray(pv); - v2.PlaceArray(pv + n1); - return pv; -} - -const PetscScalar *SlepcPEPLinearSolver::GetBlocksRead(const petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - PetscInt n1 = v1.GetSize(), n2 = v2.GetSize(); - MFEM_VERIFY(n1 + n2 == v.GetSize(), "Unexpected size in PEP linearization!"); - const PetscScalar *pv = v.GetArrayRead(); - v1.PlaceArray(pv); - v2.PlaceArray(pv + n1); - return pv; -} - -void SlepcPEPLinearSolver::RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - v1.ResetArray(); - v2.ResetArray(); - v.RestoreArray(pv); -} - -void SlepcPEPLinearSolver::RestoreBlocksRead(const PetscScalar *pv, - const petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - v1.ResetArray(); - v2.ResetArray(); - v.RestoreArrayRead(pv); -} - // PEP specific methods -SlepcPEPSolverBase::SlepcPEPSolverBase(MPI_Comm comm, int print_lvl, - const std::string &prefix) - : SlepcEigenSolver(print_lvl) +SlepcPEPSolverBase::SlepcPEPSolverBase(MPI_Comm comm, int print, const std::string &prefix) + : SlepcEigenSolver(print) { PalacePetscCall(PEPCreate(comm, &pep)); PalacePetscCall(PEPSetOptionsPrefix(pep, prefix.c_str())); @@ -908,34 +1153,32 @@ SlepcPEPSolverBase::SlepcPEPSolverBase(MPI_Comm comm, int print_lvl, SlepcPEPSolverBase::~SlepcPEPSolverBase() { - MPI_Comm comm; - PalacePetscCall(PetscObjectGetComm(reinterpret_cast(pep), &comm)); PalacePetscCall(PEPDestroy(&pep)); - delete A0; - delete A1; - delete A2; + PalacePetscCall(MatDestroy(&A0)); + PalacePetscCall(MatDestroy(&A1)); + PalacePetscCall(MatDestroy(&A2)); } -void SlepcPEPSolverBase::SetNumModes(int numeig, int numvec) +void SlepcPEPSolverBase::SetNumModes(int num_eig, int num_vec) { - PalacePetscCall( - PEPSetDimensions(pep, numeig, (numvec > 0) ? numvec : PETSC_DEFAULT, PETSC_DEFAULT)); + PalacePetscCall(PEPSetDimensions(pep, num_eig, (num_vec > 0) ? num_vec : PETSC_DEFAULT, + PETSC_DEFAULT)); } -void SlepcPEPSolverBase::SetTol(double tol) +void SlepcPEPSolverBase::SetTol(PetscReal tol) { PalacePetscCall(PEPSetTolerances(pep, tol, PETSC_DEFAULT)); PalacePetscCall(PEPSetConvergenceTest(pep, PEP_CONV_REL)); // PalacePetscCall(PEPSetTrackAll(pep, PETSC_TRUE)); } -void SlepcPEPSolverBase::SetMaxIter(int maxits) +void SlepcPEPSolverBase::SetMaxIter(int max_it) { PalacePetscCall( - PEPSetTolerances(pep, PETSC_DEFAULT, (maxits > 0) ? maxits : PETSC_DEFAULT)); + PEPSetTolerances(pep, PETSC_DEFAULT, (max_it > 0) ? max_it : PETSC_DEFAULT)); } -void SlepcPEPSolverBase::SetWhichEigenpairs(EigenSolverBase::WhichType type) +void SlepcPEPSolverBase::SetWhichEigenpairs(EigenvalueSolver::WhichType type) { switch (type) { @@ -1019,20 +1262,27 @@ void SlepcPEPSolverBase::SetType(SlepcEigenSolver::Type type) } } -void SlepcPEPSolverBase::SetInitialSpace(const petsc::PetscParVector &v) +void SlepcPEPSolverBase::SetInitialSpace(const ComplexVector &v) { + MFEM_VERIFY( + A0 && A1 && A2, + "Must call SetOperators before using SetInitialSpace for SLEPc eigenvalue solver!"); if (!v0) { - v0 = new petsc::PetscParVector(v); - } - else - { - MFEM_VERIFY(v.GetSize() == v0->GetSize(), - "Invalid modification of eigenvalue problem size!"); - v0->Copy(v); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - Vec is[1]; - is[0] = *v0; + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(v.Size() == 2 * n, + "Invalid size mismatch for provided initial space vector!"); + + PetscScalar *pv0; + PalacePetscCall(VecGetArrayWrite(v0, &pv0)); + v.Get(pv0, n); + PalacePetscCall(VecRestoreArrayWrite(v0, &pv0)); + + Vec is[1] = {v0}; PalacePetscCall(PEPSetInitialSpace(pep, 1, is)); } @@ -1040,25 +1290,27 @@ void SlepcPEPSolverBase::Customize() { SlepcEigenSolver::Customize(); PalacePetscCall(PEPSetTarget(pep, sigma / gamma)); - if (!clcustom) + if (!cl_custom) { PalacePetscCall(PEPSetFromOptions(pep)); - // if (print > 0) // These are printed by PETSc linear solver - // { - // PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); - // Mpi::Print(GetComm(), "\n"); - // } - clcustom = true; + if (print > 0) + { + PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); + Mpi::Print(GetComm(), "\n"); + } + cl_custom = true; } } int SlepcPEPSolverBase::Solve() { MFEM_VERIFY(A0 && A1 && A2 && opInv, "Operators are not set for SlepcPEPSolverBase!"); - PetscInt numconv; + + // Solve the eigenvalue problem. + PetscInt num_conv; Customize(); PalacePetscCall(PEPSolve(pep)); - PalacePetscCall(PEPGetConverged(pep, &numconv)); + PalacePetscCall(PEPGetConverged(pep, &num_conv)); if (print > 0) { Mpi::Print(GetComm(), "\n"); @@ -1066,27 +1318,40 @@ int SlepcPEPSolverBase::Solve() Mpi::Print(GetComm(), " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", - opInv->GetTotalNumMult(), opInv->GetTotalNumIter()); + opInv->NumTotalMult(), opInv->NumTotalMultIter()); } - delete[] res; - res = new PetscReal[numconv]; - for (PetscInt i = 0; i < numconv; i++) + + // Compute and store the eigenpair residuals. + res = std::make_unique(num_conv); + for (int i = 0; i < num_conv; i++) { - res[i] = -1.0; + res.get()[i] = GetResidualNorm(i); } - return (int)numconv; + return (int)num_conv; } -void SlepcPEPSolverBase::GetEigenvalue(int i, double &eigr, double &eigi) const +PetscScalar SlepcPEPSolverBase::GetEigenvalue(int i) const { - PetscScalar eig; - PalacePetscCall(PEPGetEigenpair(pep, i, &eig, nullptr, nullptr, nullptr)); - GetBackTransform(eig, eigr, eigi); + PetscScalar l; + PalacePetscCall(PEPGetEigenpair(pep, i, &l, nullptr, nullptr, nullptr)); + return GetBackTransform(l); } -void SlepcPEPSolverBase::GetEigenvector(int i, petsc::PetscParVector &v) const +void SlepcPEPSolverBase::GetEigenvector(int i, ComplexVector &x) const { - PalacePetscCall(PEPGetEigenpair(pep, i, nullptr, nullptr, v, nullptr)); + MFEM_VERIFY( + v0, + "Must call SetOperators before using GetEigenvector for SLEPc eigenvalue solver!"); + PalacePetscCall(PEPGetEigenpair(pep, i, nullptr, nullptr, v0, nullptr)); + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(x.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + + const PetscScalar *pv0; + PalacePetscCall(VecGetArrayRead(v0, &pv0)); + x.Set(pv0, n); + PalacePetscCall(VecRestoreArrayRead(v0, &pv0)); } BV SlepcPEPSolverBase::GetBV() const @@ -1110,62 +1375,105 @@ RG SlepcPEPSolverBase::GetRG() const return rg; } -MPI_Comm SlepcPEPSolverBase::GetComm() const -{ - return pep ? PetscObjectComm(reinterpret_cast(pep)) : MPI_COMM_NULL; -} - -SlepcPEPSolver::SlepcPEPSolver(MPI_Comm comm, int print_lvl, const std::string &prefix) - : SlepcPEPSolverBase(comm, print_lvl, prefix) +SlepcPEPSolver::SlepcPEPSolver(MPI_Comm comm, int print, const std::string &prefix) + : SlepcPEPSolverBase(comm, print, prefix) { opK = opC = opM = nullptr; normK = normC = normM = 0.0; } -void SlepcPEPSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { // Construct shell matrices for the scaled operators which define the quadratic polynomial // eigenvalue problem. bool first = (opK == nullptr); - { - Mat A0_, A1_, A2_; - MPI_Comm comm = GetComm(); - PetscInt n = K.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0_)); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1_)); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2_)); - PalacePetscCall(MatShellSetOperation( - A0_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_PEP_A0))); - PalacePetscCall(MatShellSetOperation( - A1_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_PEP_A1))); - PalacePetscCall(MatShellSetOperation( - A2_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_PEP_A2))); - delete A0; - delete A1; - delete A2; - A0 = new petsc::PetscParMatrix(A0_, false); // Inherits the PETSc Mat - A1 = new petsc::PetscParMatrix(A1_, false); - A2 = new petsc::PetscParMatrix(A2_, false); - } - Mat A[3] = {*A0, *A1, *A2}; - PalacePetscCall(PEPSetOperators(pep, 3, A)); opK = &K; opC = &C; opM = &M; + + PetscInt n = opK->Height() / 2; + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2)); + auto __mat_apply_shell_A0 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + opK->Mult(x, y); + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + auto __mat_apply_shell_A1 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + opC->Mult(x, y); + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + auto __mat_apply_shell_A2 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode + { + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); + + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); + + opM->Mult(x, y); + + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); + + PetscFunctionReturn(0); + }; + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT, (void (*)()) & __mat_apply_shell_A0)); + PalacePetscCall( + MatShellSetOperation(A1, MATOP_MULT, (void (*)()) & __mat_apply_shell_A1)); + PalacePetscCall( + MatShellSetOperation(A2, MATOP_MULT, (void (*)()) & __mat_apply_shell_A2)); + Mat A[3] = {A0, A1, A2}; + PalacePetscCall(PEPSetOperators(pep, 3, A)); + if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normC = opC->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); + normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal()); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0, "Invalid matrix norms for PEP scaling!"); if (normK > 0 && normC > 0.0 && normM > 0.0) @@ -1178,353 +1486,134 @@ void SlepcPEPSolver::SetOperators(const petsc::PetscParMatrix &K, // Set up workspace. if (!v0) { - v0 = new petsc::PetscParVector(K); - } - if (!r0) - { - r0 = new petsc::PetscParVector(K); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } + x.SetSize(opK->Height()); + y.SetSize(opK->Height()); // Configure linear solver. if (first) { - SetPCShell((void *)this, __pc_apply_PEP); + ConfigurePCShell(); } } -void SlepcPEPSolver::GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const +void SlepcPEPSolver::SetBMat(const Operator &B) { - // r = P(λ) v = (K + λ C + λ² M) v for eigenvalue λ. - opM->Mult(v, r); - r.Scale(eig); - opC->MultAdd(v, r); - r.Scale(eig); - opK->MultAdd(v, r); -} + SlepcEigenSolver::SetBMat(B); -PetscReal SlepcPEPSolver::GetBackwardScaling(PetscScalar eig) const -{ - // Make sure not to use norms from scaling as this can be confusing if they are different. - // Note that SLEPc typically uses ||.||∞, not Frobenius. - if (normK <= 0.0) - { - normK = opK->NormInf(); - } - if (normC <= 0.0) - { - normC = opC->NormInf(); - } - if (normM <= 0.0) + PetscInt n = B.Height(); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); + auto __mat_apply_shell = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode { - normM = opM->NormInf(); - } - PetscReal t = PetscAbsScalar(eig); - return normK + t * normC + t * t * normM; -} + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); -} // namespace palace::slepc - -PetscErrorCode __mat_apply_EPS_A(Mat A, Vec x, Vec y) -{ - // Apply the operator: K (no transform) or M . - palace::slepc::SlepcEPSSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpK()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta()); - } - PetscFunctionReturn(0); -} + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); -PetscErrorCode __mat_apply_EPS_B(Mat A, Vec x, Vec y) -{ - // Apply the operator: M (no transform) or (K - σ M) . - palace::slepc::SlepcEPSSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; + opB->Mult(x.Real(), y.Real()); + opB->Mult(x.Imag(), y.Imag()); + y *= delta * gamma; - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpM()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma()); - } - PetscFunctionReturn(0); -} + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); -PetscErrorCode __pc_apply_EPS(PC pc, Vec x, Vec y) -{ - // Solve the linear system associated with the generalized eigenvalue problem: y = M⁻¹ x , - // or shift-and-invert spectral transformation: y =(K - σ M)⁻¹ x . Enforces the - // divergence-free constraint using the supplied projector. - palace::slepc::SlepcEPSSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; + PetscFunctionReturn(0); + }; + PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)()) & __mat_apply_shell)); - PetscCall(PCShellGetContext(pc, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell PC context for SLEPc!"); - slepc->GetKspSolver()->Mult(xx, yy); - if (!slepc->IsShiftInvert()) - { - yy.Scale(1.0 / (slepc->GetScalingDelta() * slepc->GetScalingGamma())); - } - else - { - yy.Scale(1.0 / slepc->GetScalingDelta()); - } - - // Debug - // Mpi::Print(" Before projection: {:e}\n", yy.Norml2()); - - if (slepc->GetDivFreeSolver()) - { - slepc->GetDivFreeSolver()->Mult(yy); - } - - // Debug - // Mpi::Print(" After projection: {:e}\n", yy.Norml2()); - - PetscFunctionReturn(0); + BV bv = GetBV(); + PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); } -PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y) +void SlepcPEPSolver::ConfigurePCShell() { - // Apply the linearized operator: L₀ (no transform) or L₁ . With: - // L₀ = [ 0 I ] L₁ = [ I 0 ] - // [ -K -C ] , [ 0 M ] . - palace::slepc::SlepcPEPLinearSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - const PetscScalar *px; - PetscScalar *py; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - px = slepc->GetBlocksRead(xx, *slepc->GetX1(), *slepc->GetX2()); - py = slepc->GetBlocks(yy, *slepc->GetY1(), *slepc->GetY2()); - { - slepc->GetY1()->Copy(*slepc->GetX2()); - slepc->GetOpC()->Mult(*slepc->GetX2(), *slepc->GetY2()); - slepc->GetY2()->Scale(slepc->GetScalingGamma()); - slepc->GetOpK()->MultAdd(*slepc->GetX1(), *slepc->GetY2()); - slepc->GetY2()->Scale(-slepc->GetScalingDelta()); - } - slepc->RestoreBlocksRead(px, xx, *slepc->GetX1(), *slepc->GetX2()); - slepc->RestoreBlocks(py, yy, *slepc->GetY1(), *slepc->GetY2()); - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_apply_PEPLinear_L1(Mat A, Vec x, Vec y) -{ - // Apply the linearized operator: L₁ (no transform) or (L₀ - σ L₁) . With: - // L₀ = [ 0 I ] L₁ = [ I 0 ] - // [ -K -C ] , [ 0 M ] . - palace::slepc::SlepcPEPLinearSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - const PetscScalar *px; - PetscScalar *py; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - px = slepc->GetBlocksRead(xx, *slepc->GetX1(), *slepc->GetX2()); - py = slepc->GetBlocks(yy, *slepc->GetY1(), *slepc->GetY2()); + auto __pc_apply = [this](PC, Vec x_, Vec y_) -> PetscErrorCode { - slepc->GetY1()->Copy(*slepc->GetX1()); - slepc->GetOpM()->Mult(*slepc->GetX2(), *slepc->GetY2()); - slepc->GetY2()->Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma() * - slepc->GetScalingGamma()); - } - slepc->RestoreBlocksRead(px, xx, *slepc->GetX1(), *slepc->GetX2()); - slepc->RestoreBlocks(py, yy, *slepc->GetY1(), *slepc->GetY2()); - PetscFunctionReturn(0); -} + // Solve the linear system associated with the generalized eigenvalue problem: y = + // M⁻¹ x, or shift-and-invert spectral transformation: y = P(σ)⁻¹ x . Enforces the + // divergence-free constraint using the supplied projector. + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x_, &n)); -PetscErrorCode __mat_apply_PEPLinear_B(Mat A, Vec x, Vec y) -{ - // Apply the linearized mass matrix L₁ using the supplied SPD mass matrix. - palace::slepc::SlepcPEPLinearSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - const PetscScalar *px; - PetscScalar *py; - PetscFunctionBeginUser; + const PetscScalar *px_; + PetscCall(VecGetArrayRead(x_, &px_)); + x.Set(px_, n); + PetscCall(VecRestoreArrayRead(x_, &px_)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - px = slepc->GetBlocksRead(xx, *slepc->GetX1(), *slepc->GetX2()); - py = slepc->GetBlocks(yy, *slepc->GetY1(), *slepc->GetY2()); - { - slepc->GetY1()->Copy(*slepc->GetX1()); - slepc->GetOpB()->Mult(*slepc->GetX2(), *slepc->GetY2()); - slepc->GetY2()->Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma() * - slepc->GetScalingGamma()); - } - slepc->RestoreBlocksRead(px, xx, *slepc->GetX1(), *slepc->GetX2()); - slepc->RestoreBlocks(py, yy, *slepc->GetY1(), *slepc->GetY2()); - PetscFunctionReturn(0); -} - -PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y) -{ - // Solve the linear system associated with the generalized eigenvalue problem after - // linearization: y = L₁⁻¹ x , or with the shift-and-invert spectral transformation: y = - // (L₀ - σ L₁)⁻¹ x . Enforces the divergence-free constraint using the supplied - // projectors. - palace::slepc::SlepcPEPLinearSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - const PetscScalar *px; - PetscScalar *py; - PetscScalar sigma; - PetscReal gamma, delta; - PetscFunctionBeginUser; - - PetscCall(PCShellGetContext(pc, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell PC context for SLEPc!"); - sigma = slepc->GetTarget(); - gamma = slepc->GetScalingGamma(); - delta = slepc->GetScalingDelta(); - px = slepc->GetBlocksRead(xx, *slepc->GetX1(), *slepc->GetX2()); - py = slepc->GetBlocks(yy, *slepc->GetY1(), *slepc->GetY2()); - if (!slepc->IsShiftInvert()) - { - slepc->GetKspSolver()->Mult(*slepc->GetX2(), *slepc->GetY2()); - slepc->GetY2()->Scale(1.0 / (delta * gamma * gamma)); - if (slepc->GetDivFreeSolver()) - { - slepc->GetDivFreeSolver()->Mult(*slepc->GetY2()); - } - slepc->GetY1()->Copy(*slepc->GetX1()); - if (slepc->GetDivFreeSolver()) + opInv->Mult(x, y); + if (!sinvert) { - slepc->GetDivFreeSolver()->Mult(*slepc->GetY1()); + y *= 1.0 / (delta * gamma * gamma); } - } - else - { - slepc->GetY1()->AXPBY(-sigma / (delta * gamma), *slepc->GetX2(), 0.0); // Temporarily - slepc->GetOpK()->MultAdd(*slepc->GetX1(), *slepc->GetY1()); - slepc->GetKspSolver()->Mult(*slepc->GetY1(), *slepc->GetY2()); - - // Debug - // Mpi::Print(" Before projection: {:e}\n", slepc->GetY2()->Norml2()); - - if (slepc->GetDivFreeSolver()) + else { - slepc->GetDivFreeSolver()->Mult(*slepc->GetY2()); + y *= 1.0 / delta; } - - // Debug - // Mpi::Print(" After projection: {:e}\n", slepc->GetY2()->Norml2()); - - slepc->GetY1()->AXPBYPCZ(gamma / sigma, *slepc->GetY2(), -gamma / sigma, - *slepc->GetX1(), 0.0); - - // Debug - // Mpi::Print(" Before projection: {:e}\n", slepc->GetY1()->Norml2()); - - if (slepc->GetDivFreeSolver()) + if (opProj) { - slepc->GetDivFreeSolver()->Mult(*slepc->GetY1()); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y)); + opProj->Mult(y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(GetComm(), y)); } - // Debug - // Mpi::Print(" After projection: {:e}\n", slepc->GetY1()->Norml2()); - } - slepc->RestoreBlocksRead(px, xx, *slepc->GetX1(), *slepc->GetX2()); - slepc->RestoreBlocks(py, yy, *slepc->GetY1(), *slepc->GetY2()); - PetscFunctionReturn(0); -} + PetscScalar *py_; + PetscCall(VecGetArrayWrite(y_, &py_)); + y.Get(py_, n); + PetscCall(VecRestoreArrayWrite(y_, &py_)); -PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y) -{ - // Apply the operator: K (no transform) or M . - palace::slepc::SlepcPEPSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; + PetscFunctionReturn(0); + }; - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpK()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta()); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_apply_PEP_A1(Mat A, Vec x, Vec y) -{ - // Apply the operator: C (no transform) or (C + 2σ M) . - palace::slepc::SlepcPEPSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpC()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma()); - } - PetscFunctionReturn(0); + KSP ksp; + PC pc; + ST st = GetST(); + PalacePetscCall(STGetKSP(st, &ksp)); + PalacePetscCall(KSPGetPC(ksp, &pc)); + PalacePetscCall(PCSetType(pc, PCSHELL)); + PalacePetscCall(PCShellSetApply(pc, (PetscErrorCode(*)(PC, Vec, Vec)) & __pc_apply)); } -PetscErrorCode __mat_apply_PEP_A2(Mat A, Vec x, Vec y) +PetscReal SlepcPEPSolver::GetResidualNorm(int i) const { - // Apply the operator: M (no transform) or (K + σ C + σ² M) . - palace::slepc::SlepcPEPSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpM()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma() * - slepc->GetScalingGamma()); - } - PetscFunctionReturn(0); + // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for + // eigenvalue λ. + PetscScalar l = GetEigenvalue(i); + GetEigenvector(i, x); + opK->Mult(x, y); + opC->AddMult(x, y, l); + opM->AddMult(x, y, l * l); + return linalg::Norml2(GetComm(), y); } -PetscErrorCode __pc_apply_PEP(PC pc, Vec x, Vec y) +PetscReal SlepcPEPSolver::GetBackwardScaling(PetscScalar l) const { - // Solve the linear system associated with the generalized eigenvalue problem: y = M⁻¹ x , - // or shift-and-invert spectral transformation: y = P(σ)⁻¹ x . Enforces the - // divergence-free constraint using the supplied projector. - palace::slepc::SlepcPEPSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(PCShellGetContext(pc, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell PC context for SLEPc!"); - slepc->GetKspSolver()->Mult(xx, yy); - if (!slepc->IsShiftInvert()) + // Make sure not to use norms from scaling as this can be confusing if they are different. + // Note that SLEPc typically uses ||.||∞, not Frobenius. + if (normK <= 0.0) { - yy.Scale(1.0 / (slepc->GetScalingDelta() * slepc->GetScalingGamma() * - slepc->GetScalingGamma())); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); } - else + if (normC <= 0.0) { - yy.Scale(1.0 / slepc->GetScalingDelta()); + normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal()); } - - // Debug - // Mpi::Print(" Before projection: {:e}\n", yy); - - if (slepc->GetDivFreeSolver()) + if (normM <= 0.0) { - slepc->GetDivFreeSolver()->Mult(yy); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); } - - // Debug - // Mpi::Print(" After projection: {:e}\n", yy); - - PetscFunctionReturn(0); + PetscReal t = PetscAbsScalar(l); + return normK + t * normC + t * t * normM; } -#endif +} // namespace palace::slepc #endif diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp index 1c9ff5b79..e1335631e 100644 --- a/palace/linalg/slepc.hpp +++ b/palace/linalg/slepc.hpp @@ -4,18 +4,20 @@ #ifndef PALACE_LINALG_SLEPC_HPP #define PALACE_LINALG_SLEPC_HPP -#if 0 // XX TODO DISABLE FEAST FOR NOW - #if defined(PALACE_WITH_SLEPC) #include "linalg/petsc.hpp" #if !defined(PETSC_USE_COMPLEX) -#error "SLEPc interface requires PETSc built with complex scalars!" +#error "SLEPc interface requires PETSc compiled with complex scalars!" #endif +#include #include -#include "linalg/eigen.hpp" +#include +#include "linalg/complex.hpp" +#include "linalg/eps.hpp" +#include "linalg/operator.hpp" // Forward declarations of SLEPc objects. typedef struct _p_EPS *EPS; @@ -27,16 +29,8 @@ typedef struct _p_RG *RG; namespace palace { +class ComplexKspSolver; class DivFreeSolver; -class KspSolver; // XX TODO WORKING - -namespace petsc -{ - -class PetscParMatrix; -class PetscParVector; - -} // namespace petsc namespace slepc { @@ -47,15 +41,16 @@ void Initialize(); void Finalize(); // Compute and return the maximum singular value of the given operator, σₙ² = λₙ(Aᴴ A) . -PetscReal GetMaxSingularValue(const petsc::PetscParMatrix &A, PetscReal tol = PETSC_DEFAULT, - PetscInt maxits = PETSC_DEFAULT); +PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm = false, + PetscReal tol = PETSC_DEFAULT, + PetscInt max_it = PETSC_DEFAULT); // // A wrapper for the SLEPc library for generalized linear eigenvalue problems or quadratic // polynomial eigenvalue problems. Shift-and-invert spectral transformations can be used to // compute interior eigenvalues. // -class SlepcEigenSolver : public EigenSolverBase +class SlepcEigenSolver : public EigenvalueSolver { public: enum class ProblemType @@ -81,9 +76,6 @@ class SlepcEigenSolver : public EigenSolverBase }; protected: - // Boolean to handle SetFromOptions calls. - mutable bool clcustom; - // Control print level for debugging. int print; @@ -95,92 +87,75 @@ class SlepcEigenSolver : public EigenSolverBase bool sinvert, region; // Storage for computed residual norms. - mutable PetscReal *res; - - // Workspace vectors for initial space and residual calculations. - mutable petsc::PetscParVector *v0, *r0; + std::unique_ptr res; // Reference to linear solver used for operator action for M⁻¹ (with no spectral // transformation) or (K - σ M)⁻¹ (generalized EVP with shift-and- invert) or P(σ)⁻¹ // (polynomial with shift-and-invert) (not owned). - const KspSolver *opInv; + const ComplexKspSolver *opInv; // Reference to solver for projecting an intermediate vector onto a divergence-free space // (not owned). const DivFreeSolver *opProj; + // Reference to matrix used for weighted inner products (not owned). May be nullptr, in + // which case identity is used. + const Operator *opB; + + // Workspace objects for eigenvalue calculations. + Mat B0; + Vec v0; + + // Boolean to handle SetFromOptions calls. + mutable bool cl_custom; + // Customize object with command line options set. virtual void Customize(); - // Configure KSP object associated with the spectral transformation. - void SetPCShell(void *ctx, PetscErrorCode (*__pc_apply)(PC, Vec, Vec)); - - // Specify rectangular region of the complex plane, bounded by[rminr, rmaxr] x - // [rmini, rmaxi] in which to constrain eigenvalue search. - void SetRegion(PetscReal rminr, PetscReal rmaxr, PetscReal rmini, PetscReal rmaxi, - bool complement = false); + // Specify rectangular region of the complex plane in which to constrain eigenvalue + // search. + void SetRegion(PetscScalar lower_left, PetscScalar upper_right, bool complement = false); // Perform the back-transformation from the spectrally transformed eigenvalue back to the // original problem. - void GetBackTransform(PetscScalar eig, PetscReal &eigr, PetscReal &eigi) const; + PetscScalar GetBackTransform(PetscScalar l) const; // Helper routine for computing the eigenpair residual. - virtual void GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const = 0; + virtual PetscReal GetResidualNorm(int i) const = 0; // Helper routine for computing the backward error. - virtual PetscReal GetBackwardScaling(PetscScalar eig) const = 0; + virtual PetscReal GetBackwardScaling(PetscScalar l) const = 0; public: - SlepcEigenSolver(int print_lvl); + SlepcEigenSolver(int print); ~SlepcEigenSolver() override; // Set operators for the generalized eigenvalue problem or for the quadratic polynomial // eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) override; - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; // For the linear generalized case, the linear solver should be configured to compute the // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic // case, the linear solver should be configured to compute the action of M⁻¹ (with no // spectral transformation) or P(σ)⁻¹. - void SetLinearSolver(const KspSolver &ksp) override; + void SetLinearSolver(const ComplexKspSolver &ksp) override; - // Set the projection operator or operators for the divergence-free constraint. - void SetProjector(const DivFreeSolver &divfree) override; + // Set the projection operator for enforcing the divergence-free constraint. + void SetDivFreeProjector(const DivFreeSolver &divfree) override; // Set optional B matrix used for weighted inner products. This must be set explicitly // even for generalized problems, otherwise the identity will be used. - void SetBMat(const petsc::PetscParMatrix &B) override; - - // Get spectral transformation target used by the solver. - bool IsShiftInvert() const { return sinvert; } - PetscScalar GetTarget() const { return sigma; } + void SetBMat(const Operator &B) override; // Get scaling factors used by the solver. - double GetScalingGamma() const override { return (double)gamma; } - double GetScalingDelta() const override { return (double)delta; } + PetscReal GetScalingGamma() const override { return gamma; } + PetscReal GetScalingDelta() const override { return delta; } // Set shift-and-invert spectral transformation. - void SetShiftInvert(double tr, double ti, bool precond = false) override; - - // Configure the basis vectors object associated with the eigenvalue solver. - void SetOrthogonalization(bool mgs, bool cgs2); - - // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override = 0; - - // Set solver tolerance. - void SetTol(double tol) override = 0; - - // Set maximum number of iterations. - void SetMaxIter(int maxits) override = 0; - - // Set target spectrum for the eigensolver. When a spectral transformation is used, this - // applies to the spectrum of the shifted operator. - void SetWhichEigenpairs(WhichType type) override = 0; + void SetShiftInvert(PetscScalar s, bool precond = false) override; // Set problem type. virtual void SetProblemType(ProblemType type) = 0; @@ -188,20 +163,11 @@ class SlepcEigenSolver : public EigenSolverBase // Set eigenvalue solver. virtual void SetType(Type type) = 0; - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override = 0; - - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. - int Solve() override = 0; - - // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override = 0; - - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override = 0; + // Configure the basis vectors object associated with the eigenvalue solver. + void SetOrthogonalization(bool mgs, bool cgs2); // Get the corresponding eigenpair error. - void GetError(int i, ErrorType type, double &err) const override; + PetscReal GetError(int i, ErrorType type) const override; // Get the basis vectors object. virtual BV GetBV() const = 0; @@ -217,12 +183,6 @@ class SlepcEigenSolver : public EigenSolverBase // Conversion function to PetscObject. virtual operator PetscObject() const = 0; - - // Access solver object for callback functions. - const KspSolver *GetKspSolver() const { return opInv; } - - // Access solver object for callback functions. - const DivFreeSolver *GetDivFreeSolver() const { return opProj; } }; // Base class for SLEPc's EPS problem type. @@ -233,66 +193,51 @@ class SlepcEPSSolverBase : public SlepcEigenSolver EPS eps; // Shell matrices for the generalized eigenvalue problem. - petsc::PetscParMatrix *A, *B; + Mat A0, A1; - // Customize object with command line options set. void Customize() override; public: // Calls SLEPc's EPSCreate. Expects SLEPc to be initialized/finalized externally. - SlepcEPSSolverBase(MPI_Comm comm, int print_lvl, - const std::string &prefix = std::string()); + SlepcEPSSolverBase(MPI_Comm comm, int print, const std::string &prefix = std::string()); // Call's SLEPc's EPSDestroy. ~SlepcEPSSolverBase() override; - // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override; + // Conversion function to SLEPc's EPS type. + operator EPS() const { return eps; } + + void SetNumModes(int num_eig, int num_vec = 0) override; - // Set solver tolerance. - void SetTol(double tol) override; + void SetTol(PetscReal tol) override; - // Set maximum number of iterations. - void SetMaxIter(int maxits) override; + void SetMaxIter(int max_it) override; - // Set target spectrum for the eigensolver. When a spectral transformation is used, this - // applies to the spectrum of the shifted operator. void SetWhichEigenpairs(WhichType type) override; - // Set problem type. void SetProblemType(ProblemType type) override; - // Set eigenvalue solver. void SetType(Type type) override; - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; + void SetInitialSpace(const ComplexVector &v) override; - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override; - // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override; + PetscScalar GetEigenvalue(int i) const override; - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override; + void GetEigenvector(int i, ComplexVector &x) const override; - // Get the basis vectors object. BV GetBV() const override; - // Get the spectral transformation object. ST GetST() const override; - // Get the filtering region object. RG GetRG() const override; - // Get the associated MPI communicator. - MPI_Comm GetComm() const override; - - // Conversion function to SLEPc's EPS type. - operator EPS() const { return eps; } + MPI_Comm GetComm() const override + { + return eps ? PetscObjectComm(reinterpret_cast(eps)) : MPI_COMM_NULL; + } - // Conversion function to PetscObject. operator PetscObject() const override { return reinterpret_cast(eps); }; }; @@ -301,29 +246,29 @@ class SlepcEPSSolver : public SlepcEPSSolverBase { private: // References to matrices defining the generalized eigenvalue problem (not owned). - const petsc::PetscParMatrix *opK, *opM; + const ComplexOperator *opK, *opM; // Operator norms for scaling. mutable PetscReal normK, normM; + // Workspace vector for operator applications. + mutable ComplexVector x, y; + + // Configure linear solver for generalized problem or spectral transformation. + void ConfigurePCShell(); + protected: - // Helper routine for computing the eigenpair residual. - void GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const override; + PetscReal GetResidualNorm(int i) const override; - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig) const override; + PetscReal GetBackwardScaling(PetscScalar l) const override; public: - SlepcEPSSolver(MPI_Comm comm, int print_lvl, const std::string &prefix = std::string()); + SlepcEPSSolver(MPI_Comm comm, int print, const std::string &prefix = std::string()); - // Set operators for the generalized eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) override; - // Access methods for operator application. - const petsc::PetscParMatrix *GetOpK() { return opK; } - const petsc::PetscParMatrix *GetOpM() { return opM; } + void SetBMat(const Operator &B) override; }; // Quadratic eigenvalue problem solver: P(λ) x = (K + λ C + λ² M) x = 0 , solved via @@ -333,66 +278,33 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase private: // References to matrices defining the quadratic polynomial eigenvalue problem // (not owned). - const petsc::PetscParMatrix *opK, *opC, *opM; + const ComplexOperator *opK, *opC, *opM; // Operator norms for scaling. mutable PetscReal normK, normC, normM; - // Shell matrix used for weighted inner products. May be nullptr, in which case identity - // is used. Also a reference to the original passed in matrix. - petsc::PetscParMatrix *B0; - const petsc::PetscParMatrix *opB; - // Workspace vectors for operator applications. - mutable petsc::PetscParVector *x1, *x2, *y1, *y2, *z; + mutable ComplexVector x1, x2, y1, y2; + + // Configure linear solver for generalized problem or spectral transformation. + void ConfigurePCShell(); protected: - // Helper routine for computing the eigenpair residual. - void GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const override; + PetscReal GetResidualNorm(int i) const override; - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig) const override; + PetscReal GetBackwardScaling(PetscScalar l) const override; public: - SlepcPEPLinearSolver(MPI_Comm comm, int print_lvl, - const std::string &prefix = std::string()); - ~SlepcPEPLinearSolver() override; + SlepcPEPLinearSolver(MPI_Comm comm, int print, const std::string &prefix = std::string()); - // Set operators for the quadratic polynomial eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; - // Configure the basis vectors object associated with the eigenvalue solver. - void SetBMat(const petsc::PetscParMatrix &B) override; - - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; - - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override; - - // Helper methods for splitting a block vector from the linearized problem into its into - // two parts. - PetscScalar *GetBlocks(petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; - const PetscScalar *GetBlocksRead(const petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; - void RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; - void RestoreBlocksRead(const PetscScalar *pv, const petsc::PetscParVector &v, - petsc::PetscParVector &v1, petsc::PetscParVector &v2) const; - - // Access methods for operator application. - const petsc::PetscParMatrix *GetOpK() { return opK; } - const petsc::PetscParMatrix *GetOpC() { return opC; } - const petsc::PetscParMatrix *GetOpM() { return opM; } - const petsc::PetscParMatrix *GetOpB() { return opB; } - petsc::PetscParVector *GetX1() { return x1; } - petsc::PetscParVector *GetX2() { return x2; } - petsc::PetscParVector *GetY1() { return y1; } - petsc::PetscParVector *GetY2() { return y2; } + void SetBMat(const Operator &B) override; + + void SetInitialSpace(const ComplexVector &v) override; + + void GetEigenvector(int i, ComplexVector &x) const override; }; // Base class for SLEPc's PEP problem type. @@ -403,66 +315,51 @@ class SlepcPEPSolverBase : public SlepcEigenSolver PEP pep; // Shell matrices for the quadratic polynomial eigenvalue problem - petsc::PetscParMatrix *A0, *A1, *A2; + Mat A0, A1, A2; - // Customize object with command line options set. void Customize() override; public: // Calls SLEPc's PEPCreate. Expects SLEPc to be initialized/finalized externally. - SlepcPEPSolverBase(MPI_Comm comm, int print_lvl, - const std::string &prefix = std::string()); + SlepcPEPSolverBase(MPI_Comm comm, int print, const std::string &prefix = std::string()); // Call's SLEPc's PEPDestroy. ~SlepcPEPSolverBase() override; - // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override; + // Conversion function to SLEPc's PEP type. + operator PEP() const { return pep; } - // Set solver tolerance. - void SetTol(double tol) override; + void SetNumModes(int num_eig, int num_vec = 0) override; - // Set maximum number of iterations. - void SetMaxIter(int maxits) override; + void SetTol(PetscReal tol) override; + + void SetMaxIter(int max_it) override; - // Set target spectrum for the eigensolver. When a spectral transformation is used, this - // applies to the spectrum of the shifted operator. void SetWhichEigenpairs(WhichType type) override; - // Set problem type. void SetProblemType(ProblemType type) override; - // Set eigenvalue solver. void SetType(Type type) override; - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; + void SetInitialSpace(const ComplexVector &v) override; - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override; - // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override; + PetscScalar GetEigenvalue(int i) const override; - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override; + void GetEigenvector(int i, ComplexVector &x) const override; - // Get the basis vectors object. BV GetBV() const override; - // Get the spectral transformation object. ST GetST() const override; - // Get the filtering region object. RG GetRG() const override; - // Get the associated MPI communicator. - MPI_Comm GetComm() const override; - - // Conversion function to SLEPc's PEP type. - operator PEP() const { return pep; } + MPI_Comm GetComm() const override + { + return pep ? PetscObjectComm(reinterpret_cast(pep)) : MPI_COMM_NULL; + } - // Conversion function to PetscObject. operator PetscObject() const override { return reinterpret_cast(pep); }; }; @@ -472,30 +369,29 @@ class SlepcPEPSolver : public SlepcPEPSolverBase private: // References to matrices defining the quadratic polynomial eigenvalue problem // (not owned). - const petsc::PetscParMatrix *opK, *opC, *opM; + const ComplexOperator *opK, *opC, *opM; // Operator norms for scaling. mutable PetscReal normK, normC, normM; + // Workspace vector for operator applications. + mutable ComplexVector x, y; + + // Configure linear solver for generalized problem or spectral transformation. + void ConfigurePCShell(); + protected: - // Helper routine for computing the eigenpair residual. - void GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const override; + PetscReal GetResidualNorm(int i) const override; - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig) const override; + PetscReal GetBackwardScaling(PetscScalar l) const override; public: - SlepcPEPSolver(MPI_Comm comm, int print_lvl, const std::string &prefix = std::string()); + SlepcPEPSolver(MPI_Comm comm, int print, const std::string &prefix = std::string()); - // Set operators for the quadratic polynomial eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; - // Access methods for operator application. - const petsc::PetscParMatrix *GetOpK() { return opK; } - const petsc::PetscParMatrix *GetOpC() { return opC; } - const petsc::PetscParMatrix *GetOpM() { return opM; } + void SetBMat(const Operator &B) override; }; } // namespace slepc @@ -504,6 +400,4 @@ class SlepcPEPSolver : public SlepcPEPSolverBase #endif -#endif - #endif // PALACE_LINALG_SLEPC_HPP diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index bf919f3a3..b3bb96154 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -47,8 +47,8 @@ StrumpackSolverBase::StrumpackSolverBase( : StrumpackSolverType(comm), comm(comm) { // Configure the solver. - this->SetPrintFactorStatistics((print > 1)); - this->SetPrintSolveStatistics((print > 1)); + this->SetPrintFactorStatistics(print > 1); + this->SetPrintSolveStatistics(print > 1); this->SetKrylovSolver(strumpack::KrylovSolver::DIRECT); // Always as a preconditioner or // direct solver this->SetMatching(strumpack::MatchingJob::NONE); @@ -115,10 +115,7 @@ void StrumpackSolverBase::SetOperator(const ParOperator &op } template class StrumpackSolverBase; -#if STRUMPACK_VERSION_MAJOR >= 6 && STRUMPACK_VERSION_MINOR >= 3 && \ - STRUMPACK_VERSION_PATCH > 1 template class StrumpackSolverBase; -#endif } // namespace palace diff --git a/palace/linalg/strumpack.hpp b/palace/linalg/strumpack.hpp index fd05de362..c1703c60b 100644 --- a/palace/linalg/strumpack.hpp +++ b/palace/linalg/strumpack.hpp @@ -46,11 +46,8 @@ class StrumpackSolverBase : public StrumpackSolverType using StrumpackSolver = StrumpackSolverBase; -#if STRUMPACK_VERSION_MAJOR >= 6 && STRUMPACK_VERSION_MINOR >= 3 && \ - STRUMPACK_VERSION_PATCH > 1 using StrumpackMixedPrecisionSolver = StrumpackSolverBase; -#endif } // namespace palace diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index a5db9c6a0..3d585f864 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -60,7 +60,7 @@ double Normalize(MPI_Comm comm, Vector &x) return norm; } -double Normalize(MPI_Comm comm, Vector &x, const ParOperator &B, Vector &Bx) +double Normalize(MPI_Comm comm, Vector &x, const Operator &B, Vector &Bx) { B.Mult(x, Bx); double norm = std::sqrt(mfem::InnerProduct(comm, x, Bx)); @@ -69,7 +69,7 @@ double Normalize(MPI_Comm comm, Vector &x, const ParOperator &B, Vector &Bx) return norm; } -double Normalize(MPI_Comm comm, ComplexVector &x, const ParOperator &B, ComplexVector &Bx) +double Normalize(MPI_Comm comm, ComplexVector &x, const Operator &B, ComplexVector &Bx) { // For SPD B, xᴴ B x is real. B.Mult(x.Real(), Bx.Real()); diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp index 5eef970ea..4f4235f46 100644 --- a/palace/linalg/vector.hpp +++ b/palace/linalg/vector.hpp @@ -11,8 +11,8 @@ namespace palace { class ComplexVector; -class ParOperator; +using Operator = mfem::Operator; using Vector = mfem::Vector; namespace linalg @@ -40,8 +40,8 @@ double Norml1(MPI_Comm comm, const Vector &x); // Normalize the vector, possibly with respect to an SPD matrix B. double Normalize(MPI_Comm comm, Vector &x); -double Normalize(MPI_Comm comm, Vector &x, const ParOperator &B, Vector &Bx); -double Normalize(MPI_Comm comm, ComplexVector &x, const ParOperator &B, ComplexVector &Bx); +double Normalize(MPI_Comm comm, Vector &x, const Operator &B, Vector &Bx); +double Normalize(MPI_Comm comm, ComplexVector &x, const Operator &B, ComplexVector &Bx); } // namespace linalg diff --git a/palace/main.cpp b/palace/main.cpp index 76e280025..9e400f954 100644 --- a/palace/main.cpp +++ b/palace/main.cpp @@ -12,7 +12,6 @@ #include "drivers/electrostaticsolver.hpp" #include "drivers/magnetostaticsolver.hpp" #include "drivers/transientsolver.hpp" -#include "linalg/petsc.hpp" //XX TODO REMOVE ONLY SLEPC... #include "linalg/slepc.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -130,12 +129,12 @@ int main(int argc, char *argv[]) PrintBanner(world_comm, world_size, num_thread, git_tag); IoData iodata(argv[1], false); - // Initialize Hypre and PETSc, and optionally SLEPc. + // Initialize Hypre and, optionally, SLEPc. mfem::Hypre::Init(); - petsc::Initialize(argc, argv, nullptr, nullptr); - // #if defined(PALACE_WITH_SLEPC) //XX TODO WORKING... - // slepc::Initialize(); - // #endif + // petsc::Initialize(argc, argv, nullptr, nullptr); //XX TODO REMOVE... +#if defined(PALACE_WITH_SLEPC) + slepc::Initialize(argc, argv, nullptr, nullptr); +#endif if (PETSC_COMM_WORLD != world_comm) { Mpi::Print(world_comm, "Error: Problem during MPI initialization!\n\n"); @@ -187,10 +186,10 @@ int main(int argc, char *argv[]) Mpi::Print(world_comm, "\n"); // Finalize PETSc. - // #if defined(PALACE_WITH_SLEPC) //XX TODO WORKING... - // slepc::Finalize(); - // #endif - petsc::Finalize(); +#if defined(PALACE_WITH_SLEPC) + slepc::Finalize(); +#endif + // petsc::Finalize(); //XX TODO REMOVE return 0; } diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp index d886c8ed5..8e1cba350 100644 --- a/palace/models/postoperator.cpp +++ b/palace/models/postoperator.cpp @@ -4,7 +4,6 @@ #include "postoperator.hpp" #include "fem/coefficient.hpp" -#include "linalg/petsc.hpp" #include "models/curlcurloperator.hpp" #include "models/laplaceoperator.hpp" #include "models/lumpedportoperator.hpp" @@ -278,37 +277,33 @@ void PostOperator::InitializeDataCollection(const IoData &iodata) } } -void PostOperator::SetEGridFunction(const petsc::PetscParVector &e) +void PostOperator::SetEGridFunction(const ComplexVector &e) { MFEM_VERIFY( has_imaginary, "SetEGridFunction for complex-valued output called when has_imaginary == false!"); MFEM_VERIFY(E, "Incorrect usage of PostOperator::SetEGridFunction!"); - mfem::Vector Er(e.GetSize()), Ei(e.GetSize()); - e.GetToVectors(Er, Ei); - E->real().SetFromTrueDofs(Er); // Parallel distribute - E->imag().SetFromTrueDofs(Ei); + E->real().SetFromTrueDofs(e.Real()); // Parallel distribute + E->imag().SetFromTrueDofs(e.Imag()); E->real().ExchangeFaceNbrData(); // Ready for parallel comm on shared faces E->imag().ExchangeFaceNbrData(); lumped_port_init = wave_port_init = false; } -void PostOperator::SetBGridFunction(const petsc::PetscParVector &b) +void PostOperator::SetBGridFunction(const ComplexVector &b) { MFEM_VERIFY( has_imaginary, "SetBGridFunction for complex-valued output called when has_imaginary == false!"); MFEM_VERIFY(B, "Incorrect usage of PostOperator::SetBGridFunction!"); - mfem::Vector Br(b.GetSize()), Bi(b.GetSize()); - b.GetToVectors(Br, Bi); - B->real().SetFromTrueDofs(Br); // Parallel distribute - B->imag().SetFromTrueDofs(Bi); + B->real().SetFromTrueDofs(b.Real()); // Parallel distribute + B->imag().SetFromTrueDofs(b.Imag()); B->real().ExchangeFaceNbrData(); // Ready for parallel comm on shared faces B->imag().ExchangeFaceNbrData(); lumped_port_init = wave_port_init = false; } -void PostOperator::SetEGridFunction(const mfem::Vector &e) +void PostOperator::SetEGridFunction(const Vector &e) { MFEM_VERIFY(!has_imaginary, "SetEGridFunction for real-valued output called when has_imaginary == true!"); @@ -318,7 +313,7 @@ void PostOperator::SetEGridFunction(const mfem::Vector &e) lumped_port_init = wave_port_init = false; } -void PostOperator::SetBGridFunction(const mfem::Vector &b) +void PostOperator::SetBGridFunction(const Vector &b) { MFEM_VERIFY(!has_imaginary, "SetBGridFunction for real-valued output called when has_imaginary == true!"); @@ -328,7 +323,7 @@ void PostOperator::SetBGridFunction(const mfem::Vector &b) lumped_port_init = wave_port_init = false; } -void PostOperator::SetVGridFunction(const mfem::Vector &v) +void PostOperator::SetVGridFunction(const Vector &v) { MFEM_VERIFY(!has_imaginary, "SetVGridFunction for real-valued output called when has_imaginary == true!"); @@ -337,7 +332,7 @@ void PostOperator::SetVGridFunction(const mfem::Vector &v) V->ExchangeFaceNbrData(); } -void PostOperator::SetAGridFunction(const mfem::Vector &a) +void PostOperator::SetAGridFunction(const Vector &a) { MFEM_VERIFY(!has_imaginary, "SetAGridFunction for real-valued output called when has_imaginary == true!"); diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp index d71ff7c2f..a790b5c02 100644 --- a/palace/models/postoperator.hpp +++ b/palace/models/postoperator.hpp @@ -12,6 +12,7 @@ #include #include #include "fem/interpolation.hpp" +#include "linalg/vector.hpp" #include "models/domainpostoperator.hpp" #include "models/surfacepostoperator.hpp" @@ -27,14 +28,6 @@ class SpaceOperator; class SurfaceCurrentOperator; class WavePortOperator; -namespace petsc -{ - -class PetscParMatrix; -class PetscParVector; - -} // namespace petsc - // // A class to handle solution postprocessing. // @@ -90,12 +83,12 @@ class PostOperator // on the true dofs. For the real-valued overload, the electric scalar potential can be // specified too for electrostatic simulations. The output mesh and fields are // nondimensionalized consistently (B ~ E (L₀ ω₀ E₀⁻¹)). - void SetEGridFunction(const petsc::PetscParVector &e); - void SetBGridFunction(const petsc::PetscParVector &b); - void SetEGridFunction(const mfem::Vector &e); - void SetBGridFunction(const mfem::Vector &b); - void SetVGridFunction(const mfem::Vector &v); - void SetAGridFunction(const mfem::Vector &a); + void SetEGridFunction(const ComplexVector &e); + void SetBGridFunction(const ComplexVector &b); + void SetEGridFunction(const Vector &e); + void SetBGridFunction(const Vector &b); + void SetVGridFunction(const Vector &v); + void SetAGridFunction(const Vector &a); // Update cached port voltages and currents for lumped and wave port operators. void UpdatePorts(const LumpedPortOperator &lumped_port_op, diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 156528cb1..6009ab017 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -628,12 +628,13 @@ bool SpaceOperator::GetExcitationVector(double omega, ComplexVector &RHS) { // Frequency domain excitation vector: RHS = iω RHS1 + RHS2(ω). RHS.SetSize(GetNDSpace().GetTrueVSize()); - RHS = 0.0; + RHS = std::complex(0.0, 0.0); bool nnz1 = AddExcitationVector1Internal(RHS.Real()); RHS *= 1i * omega; bool nnz2 = AddExcitationVector2Internal(omega, RHS); RHS.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); RHS.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + RHS.SyncAlias(); return nnz1 || nnz2; } @@ -642,19 +643,21 @@ bool SpaceOperator::GetExcitationVector1(ComplexVector &RHS1) // Assemble the frequency domain excitation term with linear frequency dependence // (coefficient iω, see GetExcitationVector above, is accounted for later). RHS1.SetSize(GetNDSpace().GetTrueVSize()); - RHS1 = 0.0; + RHS1 = std::complex(0.0, 0.0); bool nnz1 = AddExcitationVector1Internal(RHS1.Real()); RHS1.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + RHS1.SyncAlias(); return nnz1; } bool SpaceOperator::GetExcitationVector2(double omega, ComplexVector &RHS2) { RHS2.SetSize(GetNDSpace().GetTrueVSize()); - RHS2 = 0.0; + RHS2 = std::complex(0.0, 0.0); bool nnz2 = AddExcitationVector2Internal(omega, RHS2); RHS2.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); RHS2.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + RHS2.SyncAlias(); return nnz2; } @@ -705,6 +708,23 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH return true; } +void SpaceOperator::GetConstantInitialVector(ComplexVector &v) +{ + v.SetSize(GetNDSpace().GetTrueVSize()); + v = std::complex(1.0, 0.0); + v.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + v.SyncAlias(); +} + +void SpaceOperator::GetRandomInitialVector(ComplexVector &v) +{ + v.SetSize(GetNDSpace().GetTrueVSize()); + linalg::SetRandom(GetNDSpace().GetComm(), v); + v.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + v.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); + v.SyncAlias(); +} + template void SpaceOperator::AddRealMassCoefficients( double, SumMatrixCoefficient &, SumMatrixCoefficient &); diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp index d6747737e..9da54232a 100644 --- a/palace/models/spaceoperator.hpp +++ b/palace/models/spaceoperator.hpp @@ -186,6 +186,11 @@ class SpaceOperator // or not the excitation is nonzero (and thus is true most of the time). bool GetExcitationVector1(ComplexVector &RHS1); bool GetExcitationVector2(double omega, ComplexVector &RHS2); + + // Construct a constant or randomly initialized vector which satisfies the PEC essential + // boundary conditions. + void GetRandomInitialVector(ComplexVector &v); + void GetConstantInitialVector(ComplexVector &v); }; } // namespace palace diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index 4768ddb28..e3f5d06f7 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -80,14 +80,13 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera { // Configure the system matrix and also the matrix (matrices) from which the // preconditioner will be constructed. - this->A = spaceop.GetSystemMatrix(a0, a1, 1.0, this->K.get(), this->C.get(), - this->M.get()); - spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0, this->B, this->AuxB); + A = spaceop.GetSystemMatrix(a0, a1, 1.0, K.get(), C.get(), M.get()); + spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0, B, AuxB); // Configure the solver. auto ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); - ksp->SetOperator(*this->A, this->B, &this->AuxB); + ksp->SetOperator(*A, B, &AuxB); return ksp; }; } @@ -96,13 +95,12 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera void FormRHS(const Vector &u, const Vector &du, Vector &rhs) const { // Multiply: rhs = -(K u + C du) - g'(t) J. - rhs = 0.0; - K->AddMult(u, rhs, -1.0); + K->Mult(u, rhs); if (C) { - C->AddMult(du, rhs, -1.0); + C->AddMult(du, rhs, 1.0); } - rhs.Add(dJcoef(t), NegJ); + add(-1.0, rhs, dJcoef(t), NegJ, rhs); } void Mult(const Vector &u, const Vector &du, Vector &ddu) const override @@ -241,7 +239,7 @@ void TimeOperator::Step(double &t, double &dt) ode->Step(E, dE, t, dt); // Trapezoidal integration for B-field: dB/dt = -∇ x E. - En.Add(1.0, E); + En += E; Curl->AddMult(En, B, -0.5 * dt); } diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index 20228d7f7..12100f66e 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -155,87 +155,89 @@ inline mfem::HypreParMatrix GetZ(mfem::ParFiniteElementSpace &fespace) return *z.ParallelAssemble(); } -struct SystemMatrices -{ - petsc::PetscParMatrix A1; - petsc::PetscParMatrix A2; - petsc::PetscParMatrix B3; - petsc::PetscParMatrix B4; -}; - -SystemMatrices -GetSystemMatrices(const mfem::HypreParMatrix &Att1, const mfem::HypreParMatrix &Att2r, - const std::optional &Att2i, - const mfem::HypreParMatrix &Btt, const mfem::HypreParMatrix &Btn, - const mfem::HypreParMatrix &Bnn1, const mfem::HypreParMatrix &Bnn2r, - const std::optional &Bnn2i, - const mfem::HypreParMatrix &Ztt, const mfem::HypreParMatrix &Znn, - const mfem::Array &nd_tdof_list, - const mfem::Array &h1_tdof_list, int nd_tdof_offset) -{ - // Construct the 2x2 block matrices for the eigenvalue problem. We pre-compute the - // eigenvalue problem matrices such that: - // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. - mfem::Array2D blocks(2, 2); - blocks(0, 0) = &Btt; - blocks(0, 1) = &Btn; - blocks(1, 0) = Btn.Transpose(); - blocks(1, 1) = &Bnn1; - std::unique_ptr hA1s(mfem::HypreParMatrixFromBlocks(blocks)); - auto A1s = petsc::PetscAijMatrix(*hA1s); - - blocks = nullptr; - blocks(0, 0) = &Ztt; - blocks(1, 1) = &Bnn2r; - std::unique_ptr hA2r(mfem::HypreParMatrixFromBlocks(blocks)); - auto A2s = [&]() - { - if (!Bnn2i) - { - return petsc::PetscAijMatrix(*hA2r); - } - blocks(1, 1) = &*Bnn2i; - std::unique_ptr hA2i(mfem::HypreParMatrixFromBlocks(blocks)); - return petsc::PetscAijMatrix(*hA2r, *hA2i); - }(); - - blocks = nullptr; - blocks(0, 0) = &Att1; - blocks(1, 1) = &Znn; - std::unique_ptr hB3s(mfem::HypreParMatrixFromBlocks(blocks)); - auto B3s = petsc::PetscAijMatrix(*hB3s); - - blocks = nullptr; - blocks(0, 0) = &Att2r; - blocks(1, 1) = &Znn; - std::unique_ptr hB4r(mfem::HypreParMatrixFromBlocks(blocks)); - auto B4s = [&]() - { - if (!Att2i) - { - return petsc::PetscAijMatrix(*hB4r); - } - blocks(0, 0) = &*Att2i; - std::unique_ptr hB4i(mfem::HypreParMatrixFromBlocks(blocks)); - return petsc::PetscAijMatrix(*hB4r, *hB4i); - }(); - - // Consolidate list of local ND and H1 true dofs before extracting the respective - // submatrices. The matrix is still distributed over the same number of processors, - // though some are empty (PETSc handles this). - mfem::Array tdof_list; - tdof_list.Reserve(nd_tdof_list.Size() + h1_tdof_list.Size()); - for (auto tdof : nd_tdof_list) - { - tdof_list.Append(tdof); - } - for (auto tdof : h1_tdof_list) - { - tdof_list.Append(tdof + nd_tdof_offset); - } - return {*A1s.GetSubMatrix(tdof_list, tdof_list), *A2s.GetSubMatrix(tdof_list, tdof_list), - *B3s.GetSubMatrix(tdof_list, tdof_list), *B4s.GetSubMatrix(tdof_list, tdof_list)}; -} +// struct SystemMatrices +// { +// petsc::PetscParMatrix A1; +// petsc::PetscParMatrix A2; +// petsc::PetscParMatrix B3; +// petsc::PetscParMatrix B4; +// }; + +// SystemMatrices +// GetSystemMatrices(const mfem::HypreParMatrix &Att1, const mfem::HypreParMatrix &Att2r, +// const std::optional &Att2i, +// const mfem::HypreParMatrix &Btt, const mfem::HypreParMatrix &Btn, +// const mfem::HypreParMatrix &Bnn1, const mfem::HypreParMatrix &Bnn2r, +// const std::optional &Bnn2i, +// const mfem::HypreParMatrix &Ztt, const mfem::HypreParMatrix &Znn, +// const mfem::Array &nd_tdof_list, +// const mfem::Array &h1_tdof_list, int nd_tdof_offset) +// { +// // Construct the 2x2 block matrices for the eigenvalue problem. We pre-compute the +// // eigenvalue problem matrices such that: +// // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. +// mfem::Array2D blocks(2, 2); +// blocks(0, 0) = &Btt; +// blocks(0, 1) = &Btn; +// blocks(1, 0) = Btn.Transpose(); +// blocks(1, 1) = &Bnn1; +// std::unique_ptr hA1s(mfem::HypreParMatrixFromBlocks(blocks)); +// auto A1s = petsc::PetscAijMatrix(*hA1s); + +// blocks = nullptr; +// blocks(0, 0) = &Ztt; +// blocks(1, 1) = &Bnn2r; +// std::unique_ptr hA2r(mfem::HypreParMatrixFromBlocks(blocks)); +// auto A2s = [&]() +// { +// if (!Bnn2i) +// { +// return petsc::PetscAijMatrix(*hA2r); +// } +// blocks(1, 1) = &*Bnn2i; +// std::unique_ptr hA2i(mfem::HypreParMatrixFromBlocks(blocks)); +// return petsc::PetscAijMatrix(*hA2r, *hA2i); +// }(); + +// blocks = nullptr; +// blocks(0, 0) = &Att1; +// blocks(1, 1) = &Znn; +// std::unique_ptr hB3s(mfem::HypreParMatrixFromBlocks(blocks)); +// auto B3s = petsc::PetscAijMatrix(*hB3s); + +// blocks = nullptr; +// blocks(0, 0) = &Att2r; +// blocks(1, 1) = &Znn; +// std::unique_ptr hB4r(mfem::HypreParMatrixFromBlocks(blocks)); +// auto B4s = [&]() +// { +// if (!Att2i) +// { +// return petsc::PetscAijMatrix(*hB4r); +// } +// blocks(0, 0) = &*Att2i; +// std::unique_ptr hB4i(mfem::HypreParMatrixFromBlocks(blocks)); +// return petsc::PetscAijMatrix(*hB4r, *hB4i); +// }(); + +// // Consolidate list of local ND and H1 true dofs before extracting the respective +// // submatrices. The matrix is still distributed over the same number of processors, +// // though some are empty (PETSc handles this). +// mfem::Array tdof_list; +// tdof_list.Reserve(nd_tdof_list.Size() + h1_tdof_list.Size()); +// for (auto tdof : nd_tdof_list) +// { +// tdof_list.Append(tdof); +// } +// for (auto tdof : h1_tdof_list) +// { +// tdof_list.Append(tdof + nd_tdof_offset); +// } +// return {*A1s.GetSubMatrix(tdof_list, tdof_list), *A2s.GetSubMatrix(tdof_list, +// tdof_list), +// *B3s.GetSubMatrix(tdof_list, tdof_list), *B4s.GetSubMatrix(tdof_list, +// tdof_list)}; +// } } // namespace @@ -370,49 +372,51 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera const auto &[Att1, Att2r, Att2i] = GetAtt(mat_op, nd_fespace, attr_marker); const auto &Ztt = GetZ(nd_fespace); const auto &Znn = GetZ(h1_fespace); - auto system_mat = - GetSystemMatrices(Att1, Att2r, Att2i, Btt, Btn, Bnn1, Bnn2r, Bnn2i, Ztt, Znn, - nd_attr_tdof_list, h1_attr_tdof_list, nd_fespace.GetTrueVSize()); - A1 = std::make_unique(std::move(system_mat.A1)); - A2 = std::make_unique(std::move(system_mat.A2)); - B3 = std::make_unique(std::move(system_mat.B3)); - B4 = std::make_unique(std::move(system_mat.B4)); + // auto system_mat = + // GetSystemMatrices(Att1, Att2r, Att2i, Btt, Btn, Bnn1, Bnn2r, Bnn2i, Ztt, Znn, + // nd_attr_tdof_list, h1_attr_tdof_list, + // nd_fespace.GetTrueVSize()); + // A1 = std::make_unique(std::move(system_mat.A1)); + // A2 = std::make_unique(std::move(system_mat.A2)); + // B3 = std::make_unique(std::move(system_mat.B3)); + // B4 = std::make_unique(std::move(system_mat.B4)); } // Configure sequential vector and scatter from parallel. The original vector is created // to be compatible with the parallel matrix, and the scatter creates a sequential vector // compatible with the sequential matrix. Then, gather matrices so eigenvalue problem can // be solved sequentially without communication. A1/A2/B3/B4 = nullptr if !root. - { - bool root = Mpi::Root(A1->GetComm()); - e = std::make_unique(*A1); - scatter = - std::make_unique(petsc::PetscScatter::Type::TO_ZERO, *e, e0); - A1 = A1->GetSequentialMatrix(root); - A2 = A2->GetSequentialMatrix(root); - B3 = B3->GetSequentialMatrix(root); - B4 = B4->GetSequentialMatrix(root); - } - if (A1) - { - // sparsity(A2) ⊆ sparsity(A1), sparsity(B4) ⊆ sparsity(B3) ⊆ sparsity(A) - A = std::make_unique(*A1); - B = std::make_unique(*A1); - A->SetSymmetric(); - B->SetSymmetric(); - A1->SetSymmetric(); - A2->SetSymmetric(); - B3->SetSymmetric(); - B4->SetSymmetric(); - } + // { + // bool root = Mpi::Root(A1->GetComm()); + // e = std::make_unique(*A1); + // scatter = + // std::make_unique(petsc::PetscScatter::Type::TO_ZERO, *e, + // e0); + // A1 = A1->GetSequentialMatrix(root); + // A2 = A2->GetSequentialMatrix(root); + // B3 = B3->GetSequentialMatrix(root); + // B4 = B4->GetSequentialMatrix(root); + // } + // if (A1) + // { + // // sparsity(A2) ⊆ sparsity(A1), sparsity(B4) ⊆ sparsity(B3) ⊆ sparsity(A) + // A = std::make_unique(*A1); + // B = std::make_unique(*A1); + // A->SetSymmetric(); + // B->SetSymmetric(); + // A1->SetSymmetric(); + // A2->SetSymmetric(); + // B3->SetSymmetric(); + // B4->SetSymmetric(); + // } // Create vector for initial space (initially parallel, then scattered to root). - { - petsc::PetscParVector y(*e); - GetInitialSpace(nd_attr_tdof_list.Size(), h1_attr_tdof_list.Size(), y); - y0 = std::make_unique(*e0); - scatter->Forward(y, *y0); - } + // { + // petsc::PetscParVector y(*e); + // GetInitialSpace(nd_attr_tdof_list.Size(), h1_attr_tdof_list.Size(), y); + // y0 = std::make_unique(*e0); + // scatter->Forward(y, *y0); + // } // Coefficients store references to kₙ, ω so they are updated implicitly at each new // solve. Also, μ⁻¹ is persistent, so no copy is OK. @@ -426,7 +430,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera // Configure the eigenvalue problem solver. As for the full 3D case, the system matrices // are in general complex and symmetric. We supply the operators to the solver in // shift-inverted form and handle the back- transformation externally. - if (A) + // if (A) //XX { // Define the linear solver to be used for solving systems associated with the // generalized eigenvalue problem. We use PETSc's sequential sparse solvers. @@ -544,77 +548,80 @@ void WavePortData::GetTrueDofs(mfem::ParFiniteElementSpace &nd_fespace, } } -void WavePortData::GetInitialSpace(int nt, int nn, petsc::PetscParVector &y0) -{ - // Initial space chosen as such that B v₀ = y₀, with y₀ = [y₀ₜ, 0, ... 0]ᵀ ⟂ null(A) - // (with Aₜₜ nonsingular). See Lee, Sun, and Cendes, 1991 for reference. - // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner - // product(since we use a general non-Hermitian solver due to complex symmetric B), then - // we just use v0 = y0 directly. - MFEM_VERIFY(y0.GetSize() == nt + nn, "Invalid vector size!"); - y0.SetRandomReal(); - PetscScalar *py0 = y0.GetArray(); - // for (int i = 0; i < nt; i++) { py0[i] = 1.0; } - for (int i = nt; i < nt + nn; i++) - { - py0[i] = 0.0; - } - y0.RestoreArray(py0); -} - -std::complex WavePortData::Solve(petsc::PetscParVector &y0, - petsc::PetscParVector &e0, - petsc::PetscParVector &e, - petsc::PetscScatter &scatter) -{ - double eig[2]; - - // XX TODO REVISIT... - - // if (A) // Only on root - // { - // // The y0 and e0 vectors are still parallel vectors, but with all data on root. We - // want - // // true sequential vectors. - // PetscScalar *pe0 = e0.GetArray(); - // petsc::PetscParVector e0s(e0.GetSize(), pe0); - - // // Set starting vector. - // { - // PetscScalar *py0 = y0.GetArray(); - // petsc::PetscParVector y0s(y0.GetSize(), py0); - // eigen->SetInitialSpace(y0s); - // y0.RestoreArray(py0); - // } - - // #if 0 - // // Alternatively, use B-orthogonal initial space. Probably want to call SetBMat for - // // the eigensolver in this case. - // { - // PetscScalar *py0 = y0.GetArray(); - // petsc::PetscParVector y0s(y0.GetSize(), py0); - // petsc::PetscParVector v0s(y0s); - // ksp->Mult(y0s, v0s); - // eigen->SetInitialSpace(v0s); - // y0.RestoreArray(py0); - // } - // #endif - - // // Solve (operators have been set in constructor). - // int num_conv = 0; - // eigen->SetOperators(*A, *B, EigenSolverBase::ScaleType::NONE); - // num_conv = eigen->Solve(); - // MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); - // eigen->GetEigenvalue(mode_idx - 1, eig[0], eig[1]); - // eigen->GetEigenvector(mode_idx - 1, e0s); - // e0.RestoreArray(pe0); - // } - - // Scatter the result to all processors. - scatter.Reverse(e0, e); - Mpi::Broadcast(2, eig, 0, e.GetComm()); - return {eig[0], eig[1]}; -} +// void WavePortData::GetInitialSpace(int nt, int nn, petsc::PetscParVector &y0) +// { +// // Initial space chosen as such that B v₀ = y₀, with y₀ = [y₀ₜ, 0, ... 0]ᵀ ⟂ null(A) +// // (with Aₜₜ nonsingular). See Lee, Sun, and Cendes, 1991 for reference. +// // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner +// // product(since we use a general non-Hermitian solver due to complex symmetric B), +// then +// // we just use v0 = y0 directly. +// MFEM_VERIFY(y0.GetSize() == nt + nn, "Invalid vector size!"); +// y0.SetRandomReal(); +// PetscScalar *py0 = y0.GetArray(); +// // for (int i = 0; i < nt; i++) { py0[i] = 1.0; } +// for (int i = nt; i < nt + nn; i++) +// { +// py0[i] = 0.0; +// } +// y0.RestoreArray(py0); +// } + +// std::complex WavePortData::Solve(petsc::PetscParVector &y0, +// petsc::PetscParVector &e0, +// petsc::PetscParVector &e, +// petsc::PetscScatter &scatter) +// { +// double eig[2]; + +// // XX TODO REVISIT... + +// // if (A) // Only on root +// // { +// // // The y0 and e0 vectors are still parallel vectors, but with all data on root. +// We +// // want +// // // true sequential vectors. +// // PetscScalar *pe0 = e0.GetArray(); +// // petsc::PetscParVector e0s(e0.GetSize(), pe0); + +// // // Set starting vector. +// // { +// // PetscScalar *py0 = y0.GetArray(); +// // petsc::PetscParVector y0s(y0.GetSize(), py0); +// // eigen->SetInitialSpace(y0s); +// // y0.RestoreArray(py0); +// // } + +// // #if 0 +// // // Alternatively, use B-orthogonal initial space. Probably want to call SetBMat +// for +// // // the eigensolver in this case. +// // { +// // PetscScalar *py0 = y0.GetArray(); +// // petsc::PetscParVector y0s(y0.GetSize(), py0); +// // petsc::PetscParVector v0s(y0s); +// // ksp->Mult(y0s, v0s); +// // eigen->SetInitialSpace(v0s); +// // y0.RestoreArray(py0); +// // } +// // #endif + +// // // Solve (operators have been set in constructor). +// // int num_conv = 0; +// // eigen->SetOperators(*A, *B, EigenSolverBase::ScaleType::NONE); +// // num_conv = eigen->Solve(); +// // MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); +// // eigen->GetEigenvalue(mode_idx - 1, eig[0], eig[1]); +// // eigen->GetEigenvector(mode_idx - 1, e0s); +// // e0.RestoreArray(pe0); +// // } + +// // Scatter the result to all processors. +// scatter.Reverse(e0, e); +// Mpi::Broadcast(2, eig, 0, e.GetComm()); +// return {eig[0], eig[1]}; +// } void WavePortData::Initialize(double omega) { @@ -626,21 +633,22 @@ void WavePortData::Initialize(double omega) // Use pre-computed matrices to construct and solve the generalized eigenvalue problem for // the desired wave port mode. double theta2 = muepsmax * omega * omega; - if (A) - { - MFEM_VERIFY(A1 && A2 && B3 && B4 && A && B, - "Boundary mode eigenvalue problem operators uninitialized for solve!"); - A->Scale(0.0); - A->AXPY(1.0, *A1, petsc::PetscParMatrix::NNZStructure::SAME); - A->AXPY(-omega * omega, *A2, petsc::PetscParMatrix::NNZStructure::SUBSET); - B->Scale(0.0); - B->AXPY(1.0, *A, petsc::PetscParMatrix::NNZStructure::SAME); - B->AXPY(1.0 / theta2, *B3, petsc::PetscParMatrix::NNZStructure::SUBSET); - B->AXPY(-omega * omega / theta2, *B4, petsc::PetscParMatrix::NNZStructure::SUBSET); - } + // if (A) + // { + // MFEM_VERIFY(A1 && A2 && B3 && B4 && A && B, + // "Boundary mode eigenvalue problem operators uninitialized for solve!"); + // A->Scale(0.0); + // A->AXPY(1.0, *A1, petsc::PetscParMatrix::NNZStructure::SAME); + // A->AXPY(-omega * omega, *A2, petsc::PetscParMatrix::NNZStructure::SUBSET); + // B->Scale(0.0); + // B->AXPY(1.0, *A, petsc::PetscParMatrix::NNZStructure::SAME); + // B->AXPY(1.0 / theta2, *B3, petsc::PetscParMatrix::NNZStructure::SUBSET); + // B->AXPY(-omega * omega / theta2, *B4, petsc::PetscParMatrix::NNZStructure::SUBSET); + // } // Configure and solve the eigenvalue problem for the desired boundary mode. - std::complex lambda = Solve(*y0, *e0, *e, *scatter); + std::complex lambda; + // lambda = Solve(*y0, *e0, *e, *scatter); // Extract the eigenmode solution and postprocess. The extracted eigenvalue is λ = // Θ²/(Θ²-kₙ²). @@ -654,11 +662,11 @@ void WavePortData::Initialize(double omega) mfem::Vector etr(nd_attr_tdof_list.Size()), eti(nd_attr_tdof_list.Size()), enr(h1_attr_tdof_list.Size()), eni(h1_attr_tdof_list.Size()); - MFEM_VERIFY(e->GetSize() == etr.Size() + enr.Size(), - "Unexpected vector size in wave port eigenmode solver!"); - e->GetToVectors(etr, eti, 0, nd_attr_tdof_list.Size()); - e->GetToVectors(enr, eni, nd_attr_tdof_list.Size(), - nd_attr_tdof_list.Size() + h1_attr_tdof_list.Size()); + // MFEM_VERIFY(e->GetSize() == etr.Size() + enr.Size(), + // "Unexpected vector size in wave port eigenmode solver!"); + // e->GetToVectors(etr, eti, 0, nd_attr_tdof_list.Size()); + // e->GetToVectors(enr, eni, nd_attr_tdof_list.Size(), + // nd_attr_tdof_list.Size() + h1_attr_tdof_list.Size()); // Re-expand from restricted boundary dofs to true dofs and transform back to true // electric field variables: Eₜ = eₜ/kₙ and Eₙ = ieₙ. @@ -958,19 +966,19 @@ void WavePortOperator::Initialize(double omega) data.Initialize(omega); if (!suppress_output) { - if (first) - { - // Print header at first solve. - if (data.GetA() && data.GetB()) - { - Mpi::Print(" Number of global unknowns for port {:d}: {}\n", idx, - data.GetA()->GetGlobalNumRows()); - Mpi::Print(" A: NNZ = {:d}, norm = {:e}\n", data.GetA()->NNZ(), - data.GetA()->NormF()); - Mpi::Print(" B: NNZ = {:d}, norm = {:e}\n", data.GetB()->NNZ(), - data.GetB()->NormF()); - } - } + // if (first) + // { + // // Print header at first solve. + // if (data.GetA() && data.GetB()) + // { + // Mpi::Print(" Number of global unknowns for port {:d}: {}\n", idx, + // data.GetA()->GetGlobalNumRows()); + // Mpi::Print(" A: NNZ = {:d}, norm = {:e}\n", data.GetA()->NNZ(), + // data.GetA()->NormF()); + // Mpi::Print(" B: NNZ = {:d}, norm = {:e}\n", data.GetB()->NNZ(), + // data.GetB()->NormF()); + // } + // } double k0 = 1.0 / iodata.DimensionalizeValue(IoData::ValueType::LENGTH, 1.0); Mpi::Print(" Port {:d}, mode {:d}: kₙ = {:.3e}{:+.3e}i m⁻¹\n", idx, data.GetModeIndex(), k0 * data.GetPropagationConstant().real(), diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp index 1ea469074..9cdfcb20e 100644 --- a/palace/models/waveportoperator.hpp +++ b/palace/models/waveportoperator.hpp @@ -8,9 +8,9 @@ #include #include #include -// #include "linalg/eigen.hpp" +// #include "linalg/eps.hpp" // #include "linalg/ksp.hpp" -#include "linalg/petsc.hpp" +// #include "linalg/petsc.hpp" namespace palace { @@ -45,9 +45,9 @@ class WavePortData mfem::Array nd_attr_tdof_list, h1_attr_tdof_list; // Operator storage for repeated boundary mode eigenvalue problem solves. - std::unique_ptr A, B, A1, A2, B3, B4; - std::unique_ptr e, e0, y0; - std::unique_ptr scatter; + // std::unique_ptr A, B, A1, A2, B3, B4; + // std::unique_ptr e, e0, y0; + // std::unique_ptr scatter; double muepsmax; // Grid functions storing the last computed electric field mode on the port and the @@ -71,10 +71,10 @@ class WavePortData const mfem::Array &dbc_marker, mfem::Array &nd_tdof_list, mfem::Array &h1_tdof_list); - // Configure and solve the linear eigenvalue problem for the boundary mode. - void GetInitialSpace(int nt, int nn, petsc::PetscParVector &y0); - std::complex Solve(petsc::PetscParVector &y0, petsc::PetscParVector &e0, - petsc::PetscParVector &e, petsc::PetscScatter &scatter); + // // Configure and solve the linear eigenvalue problem for the boundary mode. + // void GetInitialSpace(int nt, int nn, petsc::PetscParVector &y0); + // std::complex Solve(petsc::PetscParVector &y0, petsc::PetscParVector &e0, + // petsc::PetscParVector &e, petsc::PetscScatter &scatter); public: WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op, @@ -86,8 +86,8 @@ class WavePortData void Initialize(double omega); - const petsc::PetscParMatrix *GetA() const { return A.get(); } - const petsc::PetscParMatrix *GetB() const { return B.get(); } + // const petsc::PetscParMatrix *GetA() const { return A.get(); } + // const petsc::PetscParMatrix *GetB() const { return B.get(); } std::complex GetPropagationConstant() const { return kn0; } double GetOperatingFrequency() const { return omega0; } From 466f2c6630b2d626b1bc91e307ab0e6d0e4b9170 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 16 May 2023 18:24:46 -0700 Subject: [PATCH 06/41] Upgrade wave ports for new linear algebra interface --- palace/drivers/eigensolver.cpp | 34 +- palace/linalg/CMakeLists.txt | 1 - palace/linalg/amg.hpp | 5 +- palace/linalg/ams.cpp | 10 +- palace/linalg/ams.hpp | 5 +- palace/linalg/chebyshev.cpp | 2 +- palace/linalg/complex.cpp | 1 + palace/linalg/hypre.cpp | 25 - palace/linalg/hypre.hpp | 22 - palace/linalg/jacobi.cpp | 2 +- palace/linalg/jacobi.hpp | 6 +- palace/linalg/ksp.cpp | 8 +- palace/linalg/mumps.hpp | 5 +- palace/linalg/strumpack.cpp | 10 +- palace/linalg/strumpack.hpp | 5 +- palace/linalg/superlu.cpp | 10 +- palace/linalg/superlu.hpp | 5 +- palace/models/curlcurloperator.cpp | 2 +- palace/models/laplaceoperator.cpp | 6 +- palace/models/waveportoperator.cpp | 937 +++++++++++++---------------- palace/models/waveportoperator.hpp | 41 +- 21 files changed, 497 insertions(+), 645 deletions(-) delete mode 100644 palace/linalg/hypre.cpp delete mode 100644 palace/linalg/hypre.hpp diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index a4f3b4dde..19fe54079 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -78,26 +78,26 @@ void EigenSolver::Solve(std::vector> &mesh, if (type == config::EigenSolverData::Type::FEAST) { MFEM_ABORT("FEAST eigenvalue solver is currently not supported!"); - // Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); - // #if defined(PALACE_WITH_SLEPC) - // if (C) - // { - // eigen = std::make_unique( - // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - // iodata.problem.verbose); - // } - // else - // { - // eigen = std::make_unique( - // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - // iodata.problem.verbose); - // } - // #endif +#if defined(PALACE_WITH_SLEPC) + // Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); + // if (C) + // { + // eigen = std::make_unique( + // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, + // iodata.problem.verbose); + // } + // else + // { + // eigen = std::make_unique( + // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, + // iodata.problem.verbose); + // } +#endif } else if (type == config::EigenSolverData::Type::ARPACK) { - Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); #if defined(PALACE_WITH_ARPACK) + Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); if (C) { eigen = @@ -112,8 +112,8 @@ void EigenSolver::Solve(std::vector> &mesh, } else // config::EigenSolverData::Type::SLEPC { - Mpi::Print("\nConfiguring SLEPc eigenvalue solver\n"); #if defined(PALACE_WITH_SLEPC) + Mpi::Print("\nConfiguring SLEPc eigenvalue solver\n"); std::unique_ptr slepc; if (C) { diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt index b3b04a0ee..8f93f9a28 100644 --- a/palace/linalg/CMakeLists.txt +++ b/palace/linalg/CMakeLists.txt @@ -17,7 +17,6 @@ target_sources(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/divfree.cpp ${CMAKE_CURRENT_SOURCE_DIR}/feast.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gmg.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/hypre.cpp ${CMAKE_CURRENT_SOURCE_DIR}/jacobi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ksp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mumps.cpp diff --git a/palace/linalg/amg.hpp b/palace/linalg/amg.hpp index 3b602a2b7..7c3a5c5a2 100644 --- a/palace/linalg/amg.hpp +++ b/palace/linalg/amg.hpp @@ -24,10 +24,7 @@ class BoomerAmgSolver : public mfem::HypreBoomerAMG { } - void SetOperator(const Operator &op) override - { - MFEM_ABORT("BoomerAmgSolver requires a ParOperator operator!"); - } + using mfem::HypreBoomerAMG::SetOperator; void SetOperator(const ParOperator &op); }; diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index bed73f212..757442e63 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -199,7 +199,7 @@ void HypreAmsSolver::InitializeSolver() } } -void HypreAmsSolver::SetOperator(const ParOperator &op) +void HypreAmsSolver::SetOperator(const Operator &op) { // When the operator changes, we need to rebuild the AMS solver but can use the unchanged // auxiliary space matrices. @@ -209,7 +209,8 @@ void HypreAmsSolver::SetOperator(const ParOperator &op) InitializeSolver(); } - A = &const_cast(&op)->ParallelAssemble(); + A = dynamic_cast(const_cast(&op)); + MFEM_VERIFY(A, "HypreAmsSolver requires a HypreParMatrix operator!"); height = A->Height(); width = A->Width(); @@ -224,4 +225,9 @@ void HypreAmsSolver::SetOperator(const ParOperator &op) auxX.Reset(); } +void HypreAmsSolver::SetOperator(const ParOperator &op) +{ + SetOperator(const_cast(&op)->ParallelAssemble()); +} + } // namespace palace diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp index 385261f7e..454d259f9 100644 --- a/palace/linalg/ams.hpp +++ b/palace/linalg/ams.hpp @@ -67,10 +67,7 @@ class HypreAmsSolver : public mfem::HypreSolver } ~HypreAmsSolver() override; - void SetOperator(const Operator &op) override - { - MFEM_ABORT("HypreAmsSolver requires a ParOperator operator!"); - } + void SetOperator(const Operator &op) override; void SetOperator(const ParOperator &op); operator HYPRE_Solver() const override { return ams; } diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 8be3e8891..93f6fda8b 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -34,7 +34,7 @@ void ChebyshevSmoother::SetOperator(const ParOperator &op) lambda_max = 1.1 * linalg::SpectralNorm(A->GetComm(), DinvA, false); } -void ChebyshevSmoother::Mult(const mfem::Vector &x, mfem::Vector &y) const +void ChebyshevSmoother::Mult(const Vector &x, Vector &y) const { // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) diff --git a/palace/linalg/complex.cpp b/palace/linalg/complex.cpp index 669b6a2ec..376b81426 100644 --- a/palace/linalg/complex.cpp +++ b/palace/linalg/complex.cpp @@ -109,6 +109,7 @@ ComplexVector &ComplexVector::operator=(std::complex s) Imag() = s.imag(); RestoreReal(); RestoreImag(); + return *this; } ComplexVector &ComplexVector::operator*=(std::complex s) diff --git a/palace/linalg/hypre.cpp b/palace/linalg/hypre.cpp deleted file mode 100644 index ec918718d..000000000 --- a/palace/linalg/hypre.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "hypre.hpp" - -namespace mfem -{ - -mfem::HypreParMatrix * -HypreParMatrixFromBlocks(mfem::Array2D &blocks, - mfem::Array2D *coeff) -{ - mfem::Array2D blocks_without_const(blocks.NumRows(), - blocks.NumCols()); - for (int i = 0; i < blocks.NumRows(); i++) - { - for (int j = 0; j < blocks.NumCols(); j++) - { - blocks_without_const(i, j) = const_cast(blocks(i, j)); - } - } - return HypreParMatrixFromBlocks(blocks_without_const, coeff); -} - -} // namespace mfem diff --git a/palace/linalg/hypre.hpp b/palace/linalg/hypre.hpp deleted file mode 100644 index 00131a05d..000000000 --- a/palace/linalg/hypre.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LINALG_HYPRE_HPP -#define PALACE_LINALG_HYPRE_HPP - -#include - -// XX TODO REVISIT AFTER WAVE PORT OPERATOR IF NEEDED.... - -namespace mfem -{ - -// Convenience wrapper for casting away the const on the pointers and dispatching onto the -// original function that has the argument type: mfem::Array2D &. -mfem::HypreParMatrix * -HypreParMatrixFromBlocks(mfem::Array2D &blocks, - mfem::Array2D *coeff = nullptr); - -} // namespace mfem - -#endif // PALACE_LINALG_HYPRE_HPP diff --git a/palace/linalg/jacobi.cpp b/palace/linalg/jacobi.cpp index 0a494b84d..b4ccb86b1 100644 --- a/palace/linalg/jacobi.cpp +++ b/palace/linalg/jacobi.cpp @@ -8,7 +8,7 @@ namespace palace { -void JacobiSmoother::SetOperator(const ParOperator &op) +void JacobiSmoother::SetOperator(const Operator &op) { height = op.Height(); width = op.Width(); diff --git a/palace/linalg/jacobi.hpp b/palace/linalg/jacobi.hpp index cc1fbff7f..2b240d3af 100644 --- a/palace/linalg/jacobi.hpp +++ b/palace/linalg/jacobi.hpp @@ -25,11 +25,7 @@ class JacobiSmoother : public mfem::Solver public: JacobiSmoother() : mfem::Solver() {} - void SetOperator(const Operator &op) override - { - MFEM_ABORT("JacobiSmoother requires a ParOperator operator!"); - } - void SetOperator(const ParOperator &op); + void SetOperator(const Operator &op) override; void Mult(const Vector &x, Vector &y) const override; diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index 198959941..05a39ba38 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -214,13 +214,17 @@ class ComplexBlockDiagonalSolver : public mfem::Solver { MFEM_ASSERT(x.Size() == 2 * op_->Width() && y.Size() == 2 * op_->Height(), "Incompatible dimensions for ComplexBlockDiagonalSolver::Mult!"); - mfem::Array X(2); - mfem::Array Y(2); Vector xr, xi, yr, yi; xr.MakeRef(const_cast(x), 0, op_->Width()); xi.MakeRef(const_cast(x), op_->Width(), op_->Width()); yr.MakeRef(y, 0, op_->Height()); yi.MakeRef(y, op_->Height(), op_->Height()); + mfem::Array X(2); + mfem::Array Y(2); + X[0] = &xr; + X[1] = ξ + Y[0] = &yr; + Y[1] = &yi; op_->ArrayMult(X, Y); yr.SyncAliasMemory(y); yi.SyncAliasMemory(y); diff --git a/palace/linalg/mumps.hpp b/palace/linalg/mumps.hpp index 7cb4c77ad..a6620aaf4 100644 --- a/palace/linalg/mumps.hpp +++ b/palace/linalg/mumps.hpp @@ -39,10 +39,7 @@ class MumpsSolver : public mfem::MUMPSSolver { } - void SetOperator(const Operator &op) override - { - MFEM_ABORT("MumpsSolver requires a ParOperator operator!"); - } + using mfem::MUMPSSolver::SetOperator; void SetOperator(const ParOperator &op); }; diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index b3bb96154..1db94d765 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -104,16 +104,22 @@ StrumpackSolverBase::StrumpackSolverBase( } template -void StrumpackSolverBase::SetOperator(const ParOperator &op) +void StrumpackSolverBase::SetOperator(const Operator &op) { // Convert the input operator to a distributed STRUMPACK matrix (always assume a symmetric // sparsity pattern). Safe to delete the matrix since STRUMPACK copies it on input. - mfem::STRUMPACKRowLocMatrix A(const_cast(&op)->ParallelAssemble(), true); + mfem::STRUMPACKRowLocMatrix A(op, true); // Set up base class. StrumpackSolverType::SetOperator(A); } +template +void StrumpackSolverBase::SetOperator(const ParOperator &op) +{ + SetOperator(const_cast(&op)->ParallelAssemble()); +} + template class StrumpackSolverBase; template class StrumpackSolverBase; diff --git a/palace/linalg/strumpack.hpp b/palace/linalg/strumpack.hpp index c1703c60b..fa1be49c2 100644 --- a/palace/linalg/strumpack.hpp +++ b/palace/linalg/strumpack.hpp @@ -37,10 +37,7 @@ class StrumpackSolverBase : public StrumpackSolverType { } - void SetOperator(const Operator &op) override - { - MFEM_ABORT("StrumpackSolver requires a ParOperator operator!"); - } + void SetOperator(const Operator &op) override; void SetOperator(const ParOperator &op); }; diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index e018a113f..f787ca172 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -72,7 +72,7 @@ SuperLUSolver::SuperLUSolver(MPI_Comm comm, config::LinearSolverData::SymFactTyp solver.SetSymmetricPattern(true); // Always symmetric sparsity pattern } -void SuperLUSolver::SetOperator(const ParOperator &op) +void SuperLUSolver::SetOperator(const Operator &op) { // We need to save A because SuperLU does not copy the input matrix. For repeated // factorizations, always reuse the sparsity pattern. @@ -80,13 +80,17 @@ void SuperLUSolver::SetOperator(const ParOperator &op) { solver.SetFact(mfem::superlu::SamePattern_SameRowPerm); } - A = std::make_unique( - const_cast(&op)->ParallelAssemble()); + A = std::make_unique(op); // Set up base class. solver.SetOperator(*A); } +void SuperLUSolver::SetOperator(const ParOperator &op) +{ + SetOperator(const_cast(&op)->ParallelAssemble()); +} + } // namespace palace #endif diff --git a/palace/linalg/superlu.hpp b/palace/linalg/superlu.hpp index e8355f9bb..c4e16799b 100644 --- a/palace/linalg/superlu.hpp +++ b/palace/linalg/superlu.hpp @@ -35,10 +35,7 @@ class SuperLUSolver : public mfem::Solver { } - void SetOperator(const Operator &op) override - { - MFEM_ABORT("SuperLUSolver requires a ParOperator operator!"); - } + void SetOperator(const Operator &op) override; void SetOperator(const ParOperator &op); void Mult(const Vector &x, Vector &y) const override { solver.Mult(x, y); } diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index 04d1f4ae8..e81fea645 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -163,7 +163,7 @@ std::unique_ptr CurlCurlOperator::GetCurlMatrix() return std::make_unique(std::move(curl), GetNDSpace(), GetRTSpace(), true); } -void CurlCurlOperator::GetExcitationVector(int idx, mfem::Vector &RHS) +void CurlCurlOperator::GetExcitationVector(int idx, Vector &RHS) { // Assemble the surface current excitation +J. The SurfaceCurrentOperator assembles -J // (meant for time or frequency domain Maxwell discretization, so we multiply by -1 to diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index 9f409ff54..6a4a2800d 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -152,7 +152,7 @@ void LaplaceOperator::GetStiffnessMatrix(std::vector epsilon_func(mat_op); auto k = std::make_unique(&h1_fespace_l); - k->AddDomainIntegrator(new mfem::DiffusionIntegrator(epsilon_func)); + k->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); k->SetAssemblyLevel(assembly_level); k->Assemble(skip_zeros); k->Finalize(skip_zeros); @@ -186,8 +186,8 @@ std::unique_ptr LaplaceOperator::GetGradMatrix() return std::make_unique(std::move(grad), GetH1Space(), GetNDSpace(), true); } -void LaplaceOperator::GetExcitationVector(int idx, const ParOperator &K, mfem::Vector &X, - mfem::Vector &RHS) +void LaplaceOperator::GetExcitationVector(int idx, const ParOperator &K, Vector &X, + Vector &RHS) { // Apply the Dirichlet BCs to the solution vector: V = 1 on terminal boundaries with the // given index, V = 0 on all ground and other terminal boundaries. diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index 12100f66e..3c7f1609f 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -3,12 +3,17 @@ #include "waveportoperator.hpp" -#include +#include +#include #include "fem/coefficient.hpp" #include "fem/integrator.hpp" #include "linalg/arpack.hpp" -#include "linalg/hypre.hpp" +#include "linalg/mumps.hpp" +#include "linalg/operator.hpp" #include "linalg/slepc.hpp" +#include "linalg/strumpack.hpp" +#include "linalg/superlu.hpp" +#include "linalg/vector.hpp" #include "models/materialoperator.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -17,227 +22,289 @@ namespace palace { +using namespace std::complex_literals; + namespace { +void GetEssentialTrueDofs(mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace, + const mfem::Array &attr_marker, + const mfem::Array &dbc_marker, + mfem::Array &nd_dbc_tdof_list, + mfem::Array &h1_dbc_tdof_list) +{ + // Mark all ND and H1 dofs which are not on the port, and then mark PEC boundaries on + // the port as well. + mfem::Array nd_tdof_list, h1_tdof_list; + nd_fespace.GetEssentialTrueDofs(attr_marker, nd_tdof_list); + h1_fespace.GetEssentialTrueDofs(attr_marker, h1_tdof_list); + nd_fespace.GetEssentialTrueDofs(dbc_marker, nd_dbc_tdof_list); + h1_fespace.GetEssentialTrueDofs(dbc_marker, h1_dbc_tdof_list); + + mfem::Array nd_dbc_tdof_marker(nd_fespace.GetTrueVSize()), + h1_dbc_tdof_marker(h1_fespace.GetTrueVSize()); + nd_dbc_tdof_marker = 1; + h1_dbc_tdof_marker = 1; + for (auto tdof : nd_tdof_list) + { + nd_dbc_tdof_marker[tdof] = 0; + } + for (auto tdof : nd_dbc_tdof_list) + { + nd_dbc_tdof_marker[tdof] = 1; + } + for (auto tdof : h1_tdof_list) + { + h1_dbc_tdof_marker[tdof] = 0; + } + for (auto tdof : h1_dbc_tdof_list) + { + h1_dbc_tdof_marker[tdof] = 1; + } + + // Convert back to a list. + nd_dbc_tdof_list.DeleteAll(); + nd_dbc_tdof_list.Reserve(nd_fespace.GetTrueVSize()); + for (int i = 0; i < nd_dbc_tdof_marker.Size(); i++) + { + if (nd_dbc_tdof_marker[i]) + { + nd_dbc_tdof_list.Append(i); + } + } + h1_dbc_tdof_list.DeleteAll(); + h1_dbc_tdof_list.Reserve(h1_fespace.GetTrueVSize()); + for (int i = 0; i < h1_dbc_tdof_marker.Size(); i++) + { + if (h1_dbc_tdof_marker[i]) + { + h1_dbc_tdof_list.Append(i); + } + } +} + constexpr int skip_zeros = 0; -inline mfem::HypreParMatrix GetBtt(const MaterialOperator &mat_op, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::Array &attr_marker) +std::unique_ptr GetBtt(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpace &nd_fespace, + mfem::Array &attr_marker) { // Mass matrix: Bₜₜ = (μ⁻¹ u, v). constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; MaterialPropertyCoefficient muinv_func(mat_op); - mfem::ParBilinearForm btt(&nd_fespace); - btt.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func), attr_marker); - // btt.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - btt.Assemble(skip_zeros); - btt.Finalize(skip_zeros); - return *btt.ParallelAssemble(); + auto btt = std::make_unique(&nd_fespace); + btt->AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func), attr_marker); + btt->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + btt->Assemble(skip_zeros); + btt->Finalize(skip_zeros); + return std::make_unique(std::move(btt), nd_fespace, nd_fespace); } -inline mfem::HypreParMatrix GetBtn(const MaterialOperator &mat_op, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - mfem::Array &attr_marker) +std::unique_ptr GetBtn(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace, + mfem::Array &attr_marker) { // Mass matrix: Bₜₙ = (μ⁻¹ ∇ₜ u, v). constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; MaterialPropertyCoefficient muinv_func(mat_op); - mfem::ParMixedBilinearForm btn(&h1_fespace, &nd_fespace); - btn.AddBoundaryIntegrator(new mfem::MixedVectorGradientIntegrator(muinv_func), - attr_marker); - // btn.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - btn.Assemble(skip_zeros); - btn.Finalize(skip_zeros); - return *btn.ParallelAssemble(); + auto btn = std::make_unique(&h1_fespace, &nd_fespace); + btn->AddBoundaryIntegrator(new mfem::MixedVectorGradientIntegrator(muinv_func), + attr_marker); + btn->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + btn->Assemble(skip_zeros); + btn->Finalize(skip_zeros); + return std::make_unique(std::move(btn), h1_fespace, nd_fespace); } -struct Bnn -{ - mfem::HypreParMatrix Bnn1; - mfem::HypreParMatrix Bnn2r; - std::optional Bnn2i; -}; - -inline Bnn GetBnn(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &h1_fespace, - mfem::Array &attr_marker) +std::array, 3> GetBnn(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpace &h1_fespace, + mfem::Array &attr_marker) { // Mass matrix: Bₙₙ = (μ⁻¹ ∇ₜ u, ∇ₜ v) - ω² (ε u, v) = Bₙₙ₁ - ω² Bₙₙ₂. constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; MaterialPropertyCoefficient muinv_func(mat_op); - mfem::ParBilinearForm bnn1(&h1_fespace); - bnn1.AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(muinv_func), attr_marker); - // bnn1.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - bnn1.Assemble(skip_zeros); - bnn1.Finalize(skip_zeros); + auto bnn1 = std::make_unique(&h1_fespace); + bnn1->AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(muinv_func), attr_marker); + bnn1->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + bnn1->Assemble(skip_zeros); + bnn1->Finalize(skip_zeros); constexpr MaterialPropertyType MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; NormalProjectedCoefficient epsilon_func( std::make_unique>(mat_op)); - mfem::ParBilinearForm bnn2r(&h1_fespace); - bnn2r.AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(epsilon_func), - attr_marker); - // bnn2r.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - bnn2r.Assemble(skip_zeros); - bnn2r.Finalize(skip_zeros); + auto bnn2r = std::make_unique(&h1_fespace); + bnn2r->AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(epsilon_func), + attr_marker); + bnn2r->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + bnn2r->Assemble(skip_zeros); + bnn2r->Finalize(skip_zeros); // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). if (!mat_op.HasLossTangent()) { - return {*bnn1.ParallelAssemble(), *bnn2r.ParallelAssemble()}; + return {std::make_unique(std::move(bnn1), h1_fespace, h1_fespace), + std::make_unique(std::move(bnn2r), h1_fespace, h1_fespace), + nullptr}; } constexpr MaterialPropertyType MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; NormalProjectedCoefficient negepstandelta_func( std::make_unique>(mat_op)); - mfem::ParBilinearForm bnn2i(&h1_fespace); - bnn2i.AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(negepstandelta_func), - attr_marker); - // bnn2i.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - bnn2i.Assemble(skip_zeros); - bnn2i.Finalize(skip_zeros); - return {*bnn1.ParallelAssemble(), *bnn2r.ParallelAssemble(), *bnn2i.ParallelAssemble()}; + auto bnn2i = std::make_unique(&h1_fespace); + bnn2i->AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(negepstandelta_func), + attr_marker); + bnn2i->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + bnn2i->Assemble(skip_zeros); + bnn2i->Finalize(skip_zeros); + return {std::make_unique(std::move(bnn1), h1_fespace, h1_fespace), + std::make_unique(std::move(bnn2r), h1_fespace, h1_fespace), + std::make_unique(std::move(bnn2i), h1_fespace, h1_fespace)}; } -struct Att -{ - mfem::HypreParMatrix Att1; - mfem::HypreParMatrix Att2r; - std::optional Att2i; -}; - -inline Att GetAtt(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &nd_fespace, - mfem::Array &attr_marker) +std::array, 3> GetAtt(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpace &nd_fespace, + mfem::Array &attr_marker) { // Stiffness matrix: Aₜₜ = (μ⁻¹ ∇ₜ x u, ∇ₜ x v) - ω² (ε u, v) = Aₜₜ₁ - ω² Aₜₜ₂. constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; NormalProjectedCoefficient muinv_func( std::make_unique>(mat_op)); - mfem::ParBilinearForm att1(&nd_fespace); - att1.AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(muinv_func), attr_marker); - // att1.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - att1.Assemble(skip_zeros); - att1.Finalize(skip_zeros); + auto att1 = std::make_unique(&nd_fespace); + att1->AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(muinv_func), attr_marker); + att1->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + att1->Assemble(skip_zeros); + att1->Finalize(skip_zeros); constexpr MaterialPropertyType MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; MaterialPropertyCoefficient epsilon_func(mat_op); - mfem::ParBilinearForm att2r(&nd_fespace); - att2r.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func), - attr_marker); - // att2r.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - att2r.Assemble(skip_zeros); - att2r.Finalize(skip_zeros); + auto att2r = std::make_unique(&nd_fespace); + att2r->AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func), + attr_marker); + att2r->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + att2r->Assemble(skip_zeros); + att2r->Finalize(skip_zeros); // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). if (!mat_op.HasLossTangent()) { - return {*att1.ParallelAssemble(), *att2r.ParallelAssemble()}; + return {std::make_unique(std::move(att1), nd_fespace, nd_fespace), + std::make_unique(std::move(att2r), nd_fespace, nd_fespace), + nullptr}; } constexpr MaterialPropertyType MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; MaterialPropertyCoefficient negepstandelta_func(mat_op); - mfem::ParBilinearForm att2i(&nd_fespace); - att2i.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(negepstandelta_func), - attr_marker); - // att2i.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - att2i.Assemble(skip_zeros); - att2i.Finalize(skip_zeros); - return {*att1.ParallelAssemble(), *att2r.ParallelAssemble(), *att2i.ParallelAssemble()}; + auto att2i = std::make_unique(&nd_fespace); + att2i->AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(negepstandelta_func), + attr_marker); + att2i->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + att2i->Assemble(skip_zeros); + att2i->Finalize(skip_zeros); + return {std::make_unique(std::move(att1), nd_fespace, nd_fespace), + std::make_unique(std::move(att2r), nd_fespace, nd_fespace), + std::make_unique(std::move(att2i), nd_fespace, nd_fespace)}; } -inline mfem::HypreParMatrix GetZ(mfem::ParFiniteElementSpace &fespace) +std::array, 6> +GetSystemMatrices(std::unique_ptr Btt, std::unique_ptr Btn, + std::unique_ptr Bnn1, std::unique_ptr Bnn2r, + std::unique_ptr Bnn2i, std::unique_ptr Att1, + std::unique_ptr Att2r, std::unique_ptr Att2i, + mfem::Array &nd_dbc_tdof_list, mfem::Array &h1_dbc_tdof_list) { - // Zero matrix on ND or H1 space dofs. - mfem::ParBilinearForm z(&fespace); - // z.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - z.Assemble(skip_zeros); - z.Finalize(skip_zeros); - return *z.ParallelAssemble(); + // Construct the 2x2 block matrices for the eigenvalue problem. We pre-compute the + // eigenvalue problem matrices such that: + // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. + Btt->SetEssentialTrueDofs(nd_dbc_tdof_list, Operator::DIAG_ZERO); + Btn->SetEssentialTrueDofs(&h1_dbc_tdof_list, &nd_dbc_tdof_list, Operator::DIAG_ZERO); + + Bnn1->SetEssentialTrueDofs(h1_dbc_tdof_list, Operator::DIAG_ZERO); + Bnn2r->SetEssentialTrueDofs(h1_dbc_tdof_list, Operator::DIAG_ZERO); + if (Bnn2i) + { + Bnn2i->SetEssentialTrueDofs(h1_dbc_tdof_list, Operator::DIAG_ZERO); + } + + Att1->SetEssentialTrueDofs(nd_dbc_tdof_list, Operator::DIAG_ONE); + Att2r->SetEssentialTrueDofs(nd_dbc_tdof_list, Operator::DIAG_ZERO); + if (Att2i) + { + Att2i->SetEssentialTrueDofs(nd_dbc_tdof_list, Operator::DIAG_ZERO); + } + + std::unique_ptr BtnT(Btn->ParallelAssemble().Transpose()); + + mfem::Array2D blocks(2, 2); + blocks(0, 0) = &Btt->ParallelAssemble(); + blocks(0, 1) = &Btn->ParallelAssemble(); + blocks(1, 0) = BtnT.get(); + blocks(1, 1) = &Bnn1->ParallelAssemble(); + std::unique_ptr A1(mfem::HypreParMatrixFromBlocks(blocks)); + + auto &Ztt = Btt->ParallelAssemble(); + Ztt *= 0.0; + + blocks = nullptr; + blocks(0, 0) = &Ztt; + blocks(1, 1) = &Bnn2r->ParallelAssemble(); + std::unique_ptr A2r(mfem::HypreParMatrixFromBlocks(blocks)); + + std::unique_ptr A2i; + if (Bnn2i) + { + blocks(1, 1) = &Bnn2i->ParallelAssemble(); + A2i.reset(mfem::HypreParMatrixFromBlocks(blocks)); + } + + auto &Inn = Bnn1->ParallelAssemble(); + Inn *= 0.0; + Inn.EliminateZeroRows(); // Sets diagonal entries to 1 + + blocks = nullptr; + blocks(0, 0) = &Att1->ParallelAssemble(); + blocks(1, 1) = &Inn; + std::unique_ptr B3(mfem::HypreParMatrixFromBlocks(blocks)); + + auto &Znn = Inn; + Znn *= 0.0; + + blocks(0, 0) = &Att2r->ParallelAssemble(); + blocks(1, 1) = &Znn; + std::unique_ptr B4r(mfem::HypreParMatrixFromBlocks(blocks)); + + std::unique_ptr B4i; + if (Att2i) + { + blocks(0, 0) = &Att2i->ParallelAssemble(); + B4i.reset(mfem::HypreParMatrixFromBlocks(blocks)); + } + + return {std::move(A1), std::move(A2r), std::move(A2i), + std::move(B3), std::move(B4r), std::move(B4i)}; } -// struct SystemMatrices -// { -// petsc::PetscParMatrix A1; -// petsc::PetscParMatrix A2; -// petsc::PetscParMatrix B3; -// petsc::PetscParMatrix B4; -// }; - -// SystemMatrices -// GetSystemMatrices(const mfem::HypreParMatrix &Att1, const mfem::HypreParMatrix &Att2r, -// const std::optional &Att2i, -// const mfem::HypreParMatrix &Btt, const mfem::HypreParMatrix &Btn, -// const mfem::HypreParMatrix &Bnn1, const mfem::HypreParMatrix &Bnn2r, -// const std::optional &Bnn2i, -// const mfem::HypreParMatrix &Ztt, const mfem::HypreParMatrix &Znn, -// const mfem::Array &nd_tdof_list, -// const mfem::Array &h1_tdof_list, int nd_tdof_offset) -// { -// // Construct the 2x2 block matrices for the eigenvalue problem. We pre-compute the -// // eigenvalue problem matrices such that: -// // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. -// mfem::Array2D blocks(2, 2); -// blocks(0, 0) = &Btt; -// blocks(0, 1) = &Btn; -// blocks(1, 0) = Btn.Transpose(); -// blocks(1, 1) = &Bnn1; -// std::unique_ptr hA1s(mfem::HypreParMatrixFromBlocks(blocks)); -// auto A1s = petsc::PetscAijMatrix(*hA1s); - -// blocks = nullptr; -// blocks(0, 0) = &Ztt; -// blocks(1, 1) = &Bnn2r; -// std::unique_ptr hA2r(mfem::HypreParMatrixFromBlocks(blocks)); -// auto A2s = [&]() -// { -// if (!Bnn2i) -// { -// return petsc::PetscAijMatrix(*hA2r); -// } -// blocks(1, 1) = &*Bnn2i; -// std::unique_ptr hA2i(mfem::HypreParMatrixFromBlocks(blocks)); -// return petsc::PetscAijMatrix(*hA2r, *hA2i); -// }(); - -// blocks = nullptr; -// blocks(0, 0) = &Att1; -// blocks(1, 1) = &Znn; -// std::unique_ptr hB3s(mfem::HypreParMatrixFromBlocks(blocks)); -// auto B3s = petsc::PetscAijMatrix(*hB3s); - -// blocks = nullptr; -// blocks(0, 0) = &Att2r; -// blocks(1, 1) = &Znn; -// std::unique_ptr hB4r(mfem::HypreParMatrixFromBlocks(blocks)); -// auto B4s = [&]() -// { -// if (!Att2i) -// { -// return petsc::PetscAijMatrix(*hB4r); -// } -// blocks(0, 0) = &*Att2i; -// std::unique_ptr hB4i(mfem::HypreParMatrixFromBlocks(blocks)); -// return petsc::PetscAijMatrix(*hB4r, *hB4i); -// }(); - -// // Consolidate list of local ND and H1 true dofs before extracting the respective -// // submatrices. The matrix is still distributed over the same number of processors, -// // though some are empty (PETSc handles this). -// mfem::Array tdof_list; -// tdof_list.Reserve(nd_tdof_list.Size() + h1_tdof_list.Size()); -// for (auto tdof : nd_tdof_list) -// { -// tdof_list.Append(tdof); -// } -// for (auto tdof : h1_tdof_list) -// { -// tdof_list.Append(tdof + nd_tdof_offset); -// } -// return {*A1s.GetSubMatrix(tdof_list, tdof_list), *A2s.GetSubMatrix(tdof_list, -// tdof_list), -// *B3s.GetSubMatrix(tdof_list, tdof_list), *B4s.GetSubMatrix(tdof_list, -// tdof_list)}; -// } +void GetInitialSpace(mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace, + const mfem::Array &nd_dbc_tdof_list, + const mfem::Array &h1_dbc_tdof_list, ComplexVector &v) +{ + // Initial space chosen as such that B v₀ = y₀, with y₀ = [y₀ₜ, 0, ... 0]ᵀ ⟂ null(A) + // (with Aₜₜ nonsingular). See Lee, Sun, and Cendes, 1991 for reference. + // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner + // product (since we use a general non-Hermitian solver due to complex symmetric B), then + // we just use v0 = y0 directly. + v.SetSize(2 * (nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize())); + linalg::SetRandom(nd_fespace.GetComm(), v); + // v = std::complex(1.0, 0.0); + v.Real().SetSubVector(nd_dbc_tdof_list, 0.0); + v.Imag().SetSubVector(nd_dbc_tdof_list, 0.0); + for (int i = nd_fespace.GetTrueVSize(); + i < nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize(); i++) + { + v.Real()[i] = v.Imag()[i] = 0.0; + } + v.SyncAlias(); +} } // namespace @@ -337,87 +404,143 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera attr_marker); // Construct operators for the generalized eigenvalue problem: - // [Aₜₜ 0] [eₜ] = -kₙ² [Bₜₜ Bₜₙ] [eₜ] + // [Aₜₜ 0] [eₜ] = -kₙ² [Bₜₜ Bₜₙ] [eₜ] // [0 0] [eₙ] [Bₜₙᵀ Bₙₙ] [eₙ] // for the wave port of the given index. The transformed variables are related to the true // field by Eₜ = eₜ/kₙ and Eₙ = ieₙ. This is solved on the global mesh so the result is a // grid function over the entire space, not just the port boundary (so that it can be // queried from functions which use the global mesh). - GetTrueDofs(nd_fespace, h1_fespace, dbc_marker, nd_attr_tdof_list, h1_attr_tdof_list); - - // Construct the system matrices. We will actually solve the shifted problem: + // + // We will actually solve the shifted problem A e = λ B e, where (see Lee, Sun, and + // Cendes, 1991): // [Bₜₜ Bₜₙ] [eₜ] = λ [Bₜₜ + 1/Θ² Aₜₜ Bₜₙ] [eₜ] - // [Bₜₙᵀ Bₙₙ] [eₙ] [Bₜₙᵀ Bₙₙ] [eₙ] - // (see Lee, Sun, and Cendes, 1991). Here we have λ = Θ²/(Θ²-kₙ²), where Θ² bounds the - // maximum kₙ² and is taken as ω² μₘₐₓ εₘₐₓ over the entire simulation domain. - double cmin = mfem::infinity(); + // [Bₜₙᵀ Bₙₙ] [eₙ] [Bₜₙᵀ Bₙₙ] [eₙ] . + // Here we have λ = Θ²/(Θ²-kₙ²), where Θ² bounds the maximum kₙ² and is taken as Θ² = + // ω² μₘₐₓ εₘₐₓ over the entire simulation domain. + double c_min = mfem::infinity(); for (auto attr : nd_fespace.GetParMesh()->attributes) { double s = mat_op.GetLightSpeedMin(attr); - if (s < cmin) + if (s < c_min) { - cmin = s; + c_min = s; } } - MFEM_VERIFY(cmin > 0.0, "Invalid material speed of light detected in WavePortOperator!"); - muepsmax = 1.0 / (cmin * cmin); + MFEM_VERIFY(c_min > 0.0, "Invalid material speed of light detected in WavePortOperator!"); + mu_eps_max = 1.0 / (c_min * c_min); // Pre-compute problem matrices such that: // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. - // First, create parallel objects and then gather to matrices and vectors to root. + mfem::Array nd_dbc_tdof_list, h1_dbc_tdof_list; + GetEssentialTrueDofs(nd_fespace, h1_fespace, attr_marker, dbc_marker, nd_dbc_tdof_list, + h1_dbc_tdof_list); + attr_tdof_sizes[0] = nd_fespace.GetTrueVSize() - nd_dbc_tdof_list.Size(); + attr_tdof_sizes[1] = h1_fespace.GetTrueVSize() - h1_dbc_tdof_list.Size(); + Mpi::GlobalSum(2, attr_tdof_sizes, nd_fespace.GetComm()); + { + auto Btt = GetBtt(mat_op, nd_fespace, attr_marker); + auto Btn = GetBtn(mat_op, nd_fespace, h1_fespace, attr_marker); + auto [Bnn1, Bnn2r, Bnn2i] = GetBnn(mat_op, h1_fespace, attr_marker); + auto [Att1, Att2r, Att2i] = GetAtt(mat_op, nd_fespace, attr_marker); + + std::tie(A1, A2r, A2i, B3, B4r, B4i) = + GetSystemMatrices(std::move(Btt), std::move(Btn), std::move(Bnn1), std::move(Bnn2r), + std::move(Bnn2i), std::move(Att1), std::move(Att2r), + std::move(Att2i), nd_dbc_tdof_list, h1_dbc_tdof_list); + } + + // Allocate storage for the eigenvalue problem operators. We have sparsity(A2) ⊆ + // sparsity(A1), sparsity(B3) = sparsity(B4) ⊆ sparsity(A1) + { + P = std::make_unique(*A1); + *P *= 0.0; + A = std::make_unique( + std::make_unique(*P), + std::make_unique(*P)); + B = std::make_unique( + std::make_unique(*P), + std::make_unique(*P)); + } + + // Create vector for initial space for eigenvalue solves (for nullspace of [Aₜₜ 0] + // [0 0] ). + GetInitialSpace(nd_fespace, h1_fespace, nd_dbc_tdof_list, h1_dbc_tdof_list, v0); + e0.SetSize(v0.Size()); + e0t.SetSize(2 * nd_fespace.GetTrueVSize()); + e0n.SetSize(2 * h1_fespace.GetTrueVSize()); + + // Configure the eigenvalue problem solver. As for the full 3D case, the system matrices + // are in general complex and symmetric. We supply the operators to the solver in + // shift-inverted form and handle the back-transformation externally. { - const auto &Btt = GetBtt(mat_op, nd_fespace, attr_marker); - const auto &Btn = GetBtn(mat_op, nd_fespace, h1_fespace, attr_marker); - const auto &[Bnn1, Bnn2r, Bnn2i] = GetBnn(mat_op, h1_fespace, attr_marker); - const auto &[Att1, Att2r, Att2i] = GetAtt(mat_op, nd_fespace, attr_marker); - const auto &Ztt = GetZ(nd_fespace); - const auto &Znn = GetZ(h1_fespace); - // auto system_mat = - // GetSystemMatrices(Att1, Att2r, Att2i, Btt, Btn, Bnn1, Bnn2r, Bnn2i, Ztt, Znn, - // nd_attr_tdof_list, h1_attr_tdof_list, - // nd_fespace.GetTrueVSize()); - // A1 = std::make_unique(std::move(system_mat.A1)); - // A2 = std::make_unique(std::move(system_mat.A2)); - // B3 = std::make_unique(std::move(system_mat.B3)); - // B4 = std::make_unique(std::move(system_mat.B4)); + // Define the linear solver to be used for solving systems associated with the + // generalized eigenvalue problem. + constexpr int print = 0; + config::LinearSolverData::Type pc_type = config::LinearSolverData::Type::DEFAULT; +#if defined(MFEM_USE_SUPERLU) + pc_type = config::LinearSolverData::Type::SUPERLU; +#elif defined(MFEM_USE_STRUMPACK) + pc_type = config::LinearSolverData::Type::STRUMPACK; +#elif defined(MFEM_USE_MUMPS) + pc_type = config::LinearSolverData::Type::MUMPS; +#else +#error "Wave port solver requires building with SuperLU_DIST, STRUMPACK, or MUMPS!" +#endif + std::unique_ptr pc; + if (pc_type == config::LinearSolverData::Type::SUPERLU) + { +#if defined(MFEM_USE_SUPERLU) + pc = std::make_unique(nd_fespace.GetComm(), 0, false, print); +#endif + } + if (pc_type == config::LinearSolverData::Type::STRUMPACK) + { +#if defined(MFEM_USE_STRUMPACK) + pc = std::make_unique( + nd_fespace.GetComm(), 0, strumpack::CompressionType::NONE, 0.0, 0, 0, print); +#endif + } + else // config::LinearSolverData::Type::MUMPS + { +#if defined(MFEM_USE_MUMPS) + pc = std::make_unique( + nd_fespace.GetComm(), mfem::MUMPSSolver::SYMMETRIC_INDEFINITE, 0, 0.0, print); +#endif + } + ksp = std::make_unique( + std::make_unique(nd_fespace.GetComm()), std::move(pc)); + + // Define the eigenvalue solver. + config::EigenSolverData::Type type = config::EigenSolverData::Type::DEFAULT; +#if defined(PALACE_WITH_SLEPC) + type = config::EigenSolverData::Type::SLEPC; +#elif defined(PALACE_WITH_ARPACK) + type = config::EigenSolverData::Type::ARPACK; +#else +#error "Wave port solver requires building with ARPACK or SLEPc!" +#endif + if (type == config::EigenSolverData::Type::ARPACK) + { +#if defined(PALACE_WITH_ARPACK) + eigen = std::make_unique(nd_fespace.GetComm(), print); +#endif + } + else // config::EigenSolverData::Type::SLEPC + { +#if defined(PALACE_WITH_SLEPC) + auto slepc = std::make_unique(nd_fespace.GetComm(), print); + slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); + eigen = std::move(slepc); +#endif + } + constexpr double tol = 1.0e-6; + eigen->SetNumModes(mode_idx, std::max(2 * mode_idx + 1, 5)); + eigen->SetTol(tol); + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::LARGEST_MAGNITUDE); + eigen->SetLinearSolver(*ksp); } - // Configure sequential vector and scatter from parallel. The original vector is created - // to be compatible with the parallel matrix, and the scatter creates a sequential vector - // compatible with the sequential matrix. Then, gather matrices so eigenvalue problem can - // be solved sequentially without communication. A1/A2/B3/B4 = nullptr if !root. - // { - // bool root = Mpi::Root(A1->GetComm()); - // e = std::make_unique(*A1); - // scatter = - // std::make_unique(petsc::PetscScatter::Type::TO_ZERO, *e, - // e0); - // A1 = A1->GetSequentialMatrix(root); - // A2 = A2->GetSequentialMatrix(root); - // B3 = B3->GetSequentialMatrix(root); - // B4 = B4->GetSequentialMatrix(root); - // } - // if (A1) - // { - // // sparsity(A2) ⊆ sparsity(A1), sparsity(B4) ⊆ sparsity(B3) ⊆ sparsity(A) - // A = std::make_unique(*A1); - // B = std::make_unique(*A1); - // A->SetSymmetric(); - // B->SetSymmetric(); - // A1->SetSymmetric(); - // A2->SetSymmetric(); - // B3->SetSymmetric(); - // B4->SetSymmetric(); - // } - - // Create vector for initial space (initially parallel, then scattered to root). - // { - // petsc::PetscParVector y(*e); - // GetInitialSpace(nd_attr_tdof_list.Size(), h1_attr_tdof_list.Size(), y); - // y0 = std::make_unique(*e0); - // scatter->Forward(y, *y0); - // } - // Coefficients store references to kₙ, ω so they are updated implicitly at each new // solve. Also, μ⁻¹ is persistent, so no copy is OK. kn0 = 0.0; @@ -426,232 +549,62 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera E0n = std::make_unique(&h1_fespace); nxH0r_func = std::make_unique(*E0t, *E0n, mat_op, false); nxH0i_func = std::make_unique(*E0t, *E0n, mat_op, true); +} // namespace palace - // Configure the eigenvalue problem solver. As for the full 3D case, the system matrices - // are in general complex and symmetric. We supply the operators to the solver in - // shift-inverted form and handle the back- transformation externally. - // if (A) //XX - { - // Define the linear solver to be used for solving systems associated with the - // generalized eigenvalue problem. We use PETSc's sequential sparse solvers. - int print = 0; - - // XX TODO REVISIT - - // ksp = std::make_unique(A->GetComm(), print, "port_"); - // ksp->SetType(KspSolver::Type::CHOLESKY); // Symmetric indefinite factorization - // ksp->SetOperator(*B); - - // // Define the eigenvalue solver. - // config::EigenSolverData::Type type = config::EigenSolverData::Type::DEFAULT; - // #if defined(PALACE_WITH_ARPACK) && defined(PALACE_WITH_SLEPC) - // if (type == config::EigenSolverData::Type::DEFAULT) - // { - // type = config::EigenSolverData::Type::SLEPC; - // } - // #elif defined(PALACE_WITH_ARPACK) - // if (type == config::EigenSolverData::Type::SLEPC) - // { - // Mpi::Warning("SLEPc eigensolver not available, using ARPACK!\n"); - // } - // type = config::EigenSolverData::Type::ARPACK; - // #elif defined(PALACE_WITH_SLEPC) - // if (type == config::EigenSolverData::Type::ARPACK) - // { - // Mpi::Warning("ARPACK eigensolver not available, using SLEPc!\n"); - // } - // type = config::EigenSolverData::Type::SLEPC; - // #else - // #error "Wave port solver requires building with ARPACK or SLEPc!" - // #endif - // if (type == config::EigenSolverData::Type::ARPACK) - // { - // #if defined(PALACE_WITH_ARPACK) - // eigen = std::unique_ptr(new arpack::ArpackEPSSolver(print)); - // #endif - // } - // else // config::EigenSolverData::Type::SLEPC - // { - // #if defined(PALACE_WITH_SLEPC) - // eigen = - // std::unique_ptr(new slepc::SlepcEPSSolver(A->GetComm(), - // print)); - // auto *slepc = dynamic_cast(eigen.get()); - // slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); - // slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); - // #endif - // } - // constexpr double tol = 1.0e-6; - // eigen->SetLinearSolver(*ksp); - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_MAGNITUDE); - // eigen->SetNumModes(mode_idx, std::max(2 * mode_idx + 1, 5)); - // eigen->SetTol(tol); - } -} - -void WavePortData::GetTrueDofs(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - const mfem::Array &dbc_marker, - mfem::Array &nd_tdof_list, - mfem::Array &h1_tdof_list) +void WavePortData::Initialize(double omega) { - // Ensures no duplicates in the attribute list for this port index (this would imply a - // mistake in the configuration file). We can, however, have multiple unique ports with - // shared boundary attributes. - nd_fespace.GetEssentialTrueDofs(attr_marker, nd_tdof_list); - h1_fespace.GetEssentialTrueDofs(attr_marker, h1_tdof_list); - int nd_tdofs = nd_tdof_list.Size(); - int h1_tdofs = h1_tdof_list.Size(); - - // Mark all ND and H1 dofs on the port, then unmark PEC boundaries. - mfem::Array nd_tdof_marker(nd_fespace.GetTrueVSize()), - h1_tdof_marker(h1_fespace.GetTrueVSize()), nd_dbc_tdof_list, h1_dbc_tdof_list; - nd_tdof_marker = 0; - h1_tdof_marker = 0; - nd_fespace.GetEssentialTrueDofs(dbc_marker, nd_dbc_tdof_list); - h1_fespace.GetEssentialTrueDofs(dbc_marker, h1_dbc_tdof_list); - for (auto tdof : nd_tdof_list) - { - nd_tdof_marker[tdof] = 1; - } - for (auto tdof : nd_dbc_tdof_list) - { - nd_tdof_marker[tdof] = 0; - } - for (auto tdof : h1_tdof_list) - { - h1_tdof_marker[tdof] = 1; - } - for (auto tdof : h1_dbc_tdof_list) + if (omega == omega0) { - h1_tdof_marker[tdof] = 0; + return; } - // Convert back to a list. - nd_tdof_list.DeleteAll(); - nd_tdof_list.Reserve(nd_tdofs); - for (int i = 0; i < nd_tdof_marker.Size(); i++) + // Use pre-computed matrices to construct and solve the generalized eigenvalue problem for + // the desired wave port mode. + double theta2 = mu_eps_max * omega * omega; { - if (nd_tdof_marker[i]) + auto &Ar = dynamic_cast(A->Real()); + auto &Ai = dynamic_cast(A->Imag()); + auto &Br = dynamic_cast(B->Real()); + auto &Bi = dynamic_cast(B->Imag()); + + Ar *= 0.0; + Ar.Add(1.0, *A1); + Ar.Add(-omega * omega, *A2r); + + if (A2i) { - nd_tdof_list.Append(i); + Ai *= 0.0; + Ai.Add(-omega * omega, *A2i); } - } - h1_tdof_list.DeleteAll(); - h1_tdof_list.Reserve(h1_tdofs); - for (int i = 0; i < h1_tdof_marker.Size(); i++) - { - if (h1_tdof_marker[i]) + + Br *= 0.0; + Br.Add(1.0, Ar); + Br.Add(1.0 / theta2, *B3); + Br.Add(-omega * omega / theta2, *B4r); + + if (B4i) { - h1_tdof_list.Append(i); + // When B4i is nonzero, so is A2i. + Bi *= 0.0; + Bi.Add(1.0, Ai); + Bi.Add(-omega * omega / theta2, *B4i); } - } -} -// void WavePortData::GetInitialSpace(int nt, int nn, petsc::PetscParVector &y0) -// { -// // Initial space chosen as such that B v₀ = y₀, with y₀ = [y₀ₜ, 0, ... 0]ᵀ ⟂ null(A) -// // (with Aₜₜ nonsingular). See Lee, Sun, and Cendes, 1991 for reference. -// // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner -// // product(since we use a general non-Hermitian solver due to complex symmetric B), -// then -// // we just use v0 = y0 directly. -// MFEM_VERIFY(y0.GetSize() == nt + nn, "Invalid vector size!"); -// y0.SetRandomReal(); -// PetscScalar *py0 = y0.GetArray(); -// // for (int i = 0; i < nt; i++) { py0[i] = 1.0; } -// for (int i = nt; i < nt + nn; i++) -// { -// py0[i] = 0.0; -// } -// y0.RestoreArray(py0); -// } - -// std::complex WavePortData::Solve(petsc::PetscParVector &y0, -// petsc::PetscParVector &e0, -// petsc::PetscParVector &e, -// petsc::PetscScatter &scatter) -// { -// double eig[2]; - -// // XX TODO REVISIT... - -// // if (A) // Only on root -// // { -// // // The y0 and e0 vectors are still parallel vectors, but with all data on root. -// We -// // want -// // // true sequential vectors. -// // PetscScalar *pe0 = e0.GetArray(); -// // petsc::PetscParVector e0s(e0.GetSize(), pe0); - -// // // Set starting vector. -// // { -// // PetscScalar *py0 = y0.GetArray(); -// // petsc::PetscParVector y0s(y0.GetSize(), py0); -// // eigen->SetInitialSpace(y0s); -// // y0.RestoreArray(py0); -// // } - -// // #if 0 -// // // Alternatively, use B-orthogonal initial space. Probably want to call SetBMat -// for -// // // the eigensolver in this case. -// // { -// // PetscScalar *py0 = y0.GetArray(); -// // petsc::PetscParVector y0s(y0.GetSize(), py0); -// // petsc::PetscParVector v0s(y0s); -// // ksp->Mult(y0s, v0s); -// // eigen->SetInitialSpace(v0s); -// // y0.RestoreArray(py0); -// // } -// // #endif - -// // // Solve (operators have been set in constructor). -// // int num_conv = 0; -// // eigen->SetOperators(*A, *B, EigenSolverBase::ScaleType::NONE); -// // num_conv = eigen->Solve(); -// // MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); -// // eigen->GetEigenvalue(mode_idx - 1, eig[0], eig[1]); -// // eigen->GetEigenvector(mode_idx - 1, e0s); -// // e0.RestoreArray(pe0); -// // } - -// // Scatter the result to all processors. -// scatter.Reverse(e0, e); -// Mpi::Broadcast(2, eig, 0, e.GetComm()); -// return {eig[0], eig[1]}; -// } - -void WavePortData::Initialize(double omega) -{ - if (omega == omega0) - { - return; + *P *= 0.0; + P->Add(1.0, Br); + P->Add(1.0, Bi); } - // Use pre-computed matrices to construct and solve the generalized eigenvalue problem for - // the desired wave port mode. - double theta2 = muepsmax * omega * omega; - // if (A) - // { - // MFEM_VERIFY(A1 && A2 && B3 && B4 && A && B, - // "Boundary mode eigenvalue problem operators uninitialized for solve!"); - // A->Scale(0.0); - // A->AXPY(1.0, *A1, petsc::PetscParMatrix::NNZStructure::SAME); - // A->AXPY(-omega * omega, *A2, petsc::PetscParMatrix::NNZStructure::SUBSET); - // B->Scale(0.0); - // B->AXPY(1.0, *A, petsc::PetscParMatrix::NNZStructure::SAME); - // B->AXPY(1.0 / theta2, *B3, petsc::PetscParMatrix::NNZStructure::SUBSET); - // B->AXPY(-omega * omega / theta2, *B4, petsc::PetscParMatrix::NNZStructure::SUBSET); - // } - // Configure and solve the eigenvalue problem for the desired boundary mode. - std::complex lambda; - // lambda = Solve(*y0, *e0, *e, *scatter); + ksp->SetOperator(*B, *P); + eigen->SetOperators(*A, *B, EigenvalueSolver::ScaleType::NONE); + eigen->SetInitialSpace(v0); + int num_conv = eigen->Solve(); + MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); + std::complex lambda = eigen->GetEigenvalue(mode_idx - 1); // Extract the eigenmode solution and postprocess. The extracted eigenvalue is λ = - // Θ²/(Θ²-kₙ²). + // Θ² / (Θ² - kₙ²). MFEM_VERIFY(lambda.real() > 1.0 / (1.0 - 1.0e-2), "Computed wave port mode is or is very close to being evanescent " << "(λ = " << lambda << ")!"); @@ -660,71 +613,36 @@ void WavePortData::Initialize(double omega) dynamic_cast(*nxH0r_func).SetFrequency(omega0, kn0); dynamic_cast(*nxH0i_func).SetFrequency(omega0, kn0); - mfem::Vector etr(nd_attr_tdof_list.Size()), eti(nd_attr_tdof_list.Size()), - enr(h1_attr_tdof_list.Size()), eni(h1_attr_tdof_list.Size()); - // MFEM_VERIFY(e->GetSize() == etr.Size() + enr.Size(), - // "Unexpected vector size in wave port eigenmode solver!"); - // e->GetToVectors(etr, eti, 0, nd_attr_tdof_list.Size()); - // e->GetToVectors(enr, eni, nd_attr_tdof_list.Size(), - // nd_attr_tdof_list.Size() + h1_attr_tdof_list.Size()); - - // Re-expand from restricted boundary dofs to true dofs and transform back to true - // electric field variables: Eₜ = eₜ/kₙ and Eₙ = ieₙ. - auto &nd_fespace = *E0t->ParFESpace(); - auto &h1_fespace = *E0n->ParFESpace(); - mfem::Vector E0tr(nd_fespace.GetTrueVSize()), E0ti(nd_fespace.GetTrueVSize()), - E0nr(h1_fespace.GetTrueVSize()), E0ni(h1_fespace.GetTrueVSize()); - E0tr = 0.0; - E0ti = 0.0; - E0nr = 0.0; - E0ni = 0.0; - std::complex ookn = 1.0 / kn0; - for (int i = 0; i < nd_attr_tdof_list.Size(); i++) - { - E0tr(nd_attr_tdof_list[i]) = ookn.real() * etr(i) - ookn.imag() * eti(i); - E0ti(nd_attr_tdof_list[i]) = ookn.imag() * etr(i) + ookn.real() * eti(i); + // Separate the computed field out into eₜ and eₙ and and transform back to true electric + // field variables: Eₜ = eₜ/kₙ and Eₙ = ieₙ. + eigen->GetEigenvector(mode_idx - 1, e0); + { + Vector e0tr, e0ti, e0nr, e0ni; + e0tr.MakeRef(e0, 0, e0t.Size() / 2); + e0nr.MakeRef(e0, e0t.Size() / 2, e0n.Size() / 2); + e0ti.MakeRef(e0, e0.Size() / 2, e0t.Size() / 2); + e0ni.MakeRef(e0, (e0.Size() + e0t.Size()) / 2, e0n.Size() / 2); + e0t.Real() = e0tr; + e0t.Imag() = e0ti; + e0n.Real() = e0nr; + e0n.Imag() = e0ni; + e0t *= 1.0 / kn0; + e0n *= 1i; + E0t->real().SetFromTrueDofs(e0t.Real()); // Parallel distribute + E0t->imag().SetFromTrueDofs(e0t.Imag()); + E0n->real().SetFromTrueDofs(e0n.Real()); + E0n->imag().SetFromTrueDofs(e0n.Imag()); } - for (int i = 0; i < h1_attr_tdof_list.Size(); i++) - { - E0nr(h1_attr_tdof_list[i]) = -eni(i); - E0ni(h1_attr_tdof_list[i]) = enr(i); - } - E0t->real().SetFromTrueDofs(E0tr); // Parallel distribute - E0t->imag().SetFromTrueDofs(E0ti); - E0n->real().SetFromTrueDofs(E0nr); - E0n->imag().SetFromTrueDofs(E0ni); // Normalize grid functions to a chosen polarization direction and unit power, |E x H⋆| ⋅ // n, integrated over the port surface (+n is the direction of propagation). The n x H // coefficients are updated implicitly as the only store references to the Et, En grid - // functions as well as kₙ, ω. - { - // Choose a (rather arbitrary) sign constraint: @ t = 0, 1ᵀ E > 0 when integrated over - // the port surface. This at least makes results for the same port consistent between - // frequencies/meshes. - mfem::Vector ones(nd_fespace.GetParMesh()->SpaceDimension()); - ones = 1.0; - mfem::VectorConstantCoefficient tdir(ones); - mfem::ConstantCoefficient ndir(1.0); - mfem::ParLinearForm sut(&nd_fespace), sun(&h1_fespace); - sut.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(tdir), attr_marker); - sun.AddBoundaryIntegrator(new BoundaryLFIntegrator(ndir), attr_marker); - sut.UseFastAssembly(false); - sun.UseFastAssembly(false); - sut.Assemble(); - sun.Assemble(); - if (sut(E0t->real()) + sun(E0n->real()) < 0.0) - { - E0t->real().Neg(); // This updates the n x H coefficients depending on Et, En - E0t->imag().Neg(); - E0n->real().Neg(); - E0n->imag().Neg(); - } - } + // functions as well as kₙ, ω. We choose a (rather arbitrary) sign constraint to at least + // make results for the same port consistent between frequencies/meshes. { // |E x H⋆| ⋅ n = |E ⋅ (-n x H⋆)| - sr = std::make_unique(&nd_fespace); - si = std::make_unique(&nd_fespace); + sr = std::make_unique(E0t->ParFESpace()); + si = std::make_unique(E0t->ParFESpace()); sr->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0r_func), attr_marker); si->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0i_func), attr_marker); sr->UseFastAssembly(false); @@ -733,7 +651,7 @@ void WavePortData::Initialize(double omega) si->Assemble(); std::complex s0(-(*sr)(E0t->real()) - (*si)(E0t->imag()), -(*sr)(E0t->imag()) + (*si)(E0t->real())); - double scale = 1.0 / std::sqrt(std::abs(s0)); + double scale = std::copysign(1.0 / std::sqrt(std::abs(s0)), s0.real()); E0t->real() *= scale; // This updates the n x H coefficients depending on Et, En too E0t->imag() *= scale; E0n->real() *= scale; @@ -966,19 +884,12 @@ void WavePortOperator::Initialize(double omega) data.Initialize(omega); if (!suppress_output) { - // if (first) - // { - // // Print header at first solve. - // if (data.GetA() && data.GetB()) - // { - // Mpi::Print(" Number of global unknowns for port {:d}: {}\n", idx, - // data.GetA()->GetGlobalNumRows()); - // Mpi::Print(" A: NNZ = {:d}, norm = {:e}\n", data.GetA()->NNZ(), - // data.GetA()->NormF()); - // Mpi::Print(" B: NNZ = {:d}, norm = {:e}\n", data.GetB()->NNZ(), - // data.GetB()->NormF()); - // } - // } + if (first) + { + Mpi::Print(" Number of global unknowns for port {:d}:\n" + " ND: {:d}\n H1: {:d}\n", + data.GlobalTrueNDSize(), data.GlobalTrueH1Size()); + } double k0 = 1.0 / iodata.DimensionalizeValue(IoData::ValueType::LENGTH, 1.0); Mpi::Print(" Port {:d}, mode {:d}: kₙ = {:.3e}{:+.3e}i m⁻¹\n", idx, data.GetModeIndex(), k0 * data.GetPropagationConstant().real(), diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp index 9cdfcb20e..3c8247f2d 100644 --- a/palace/models/waveportoperator.hpp +++ b/palace/models/waveportoperator.hpp @@ -8,9 +8,9 @@ #include #include #include -// #include "linalg/eps.hpp" -// #include "linalg/ksp.hpp" -// #include "linalg/petsc.hpp" +#include "linalg/complex.hpp" +#include "linalg/eps.hpp" +#include "linalg/ksp.hpp" namespace palace { @@ -41,14 +41,16 @@ class WavePortData // some MFEM API calls are not const correct. mutable mfem::Array attr_marker; - // Lists of non-essential true degrees of freedom associated with the port boundary. - mfem::Array nd_attr_tdof_list, h1_attr_tdof_list; - // Operator storage for repeated boundary mode eigenvalue problem solves. - // std::unique_ptr A, B, A1, A2, B3, B4; - // std::unique_ptr e, e0, y0; - // std::unique_ptr scatter; - double muepsmax; + double mu_eps_max; + HYPRE_BigInt attr_tdof_sizes[2]; + std::unique_ptr A1, A2r, A2i, B3, B4r, B4i, P; + std::unique_ptr A, B; + ComplexVector v0, e0, e0t, e0n; + + // Eigenvalue solver for boundary modes. + std::unique_ptr eigen; + std::unique_ptr ksp; // Grid functions storing the last computed electric field mode on the port and the // associated propagation constant. @@ -61,21 +63,6 @@ class WavePortData std::unique_ptr nxH0r_func, nxH0i_func; std::unique_ptr sr, si; - // // Eigenvalue solver for boundary modes. - // std::unique_ptr eigen; //XX TODO - // std::unique_ptr ksp; - - // Helper function to get true degrees of freedom on the port. - void GetTrueDofs(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - const mfem::Array &dbc_marker, mfem::Array &nd_tdof_list, - mfem::Array &h1_tdof_list); - - // // Configure and solve the linear eigenvalue problem for the boundary mode. - // void GetInitialSpace(int nt, int nn, petsc::PetscParVector &y0); - // std::complex Solve(petsc::PetscParVector &y0, petsc::PetscParVector &e0, - // petsc::PetscParVector &e, petsc::PetscScatter &scatter); - public: WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &nd_fespace, @@ -86,8 +73,8 @@ class WavePortData void Initialize(double omega); - // const petsc::PetscParMatrix *GetA() const { return A.get(); } - // const petsc::PetscParMatrix *GetB() const { return B.get(); } + HYPRE_BigInt GlobalTrueNDSize() const { return attr_tdof_sizes[0]; } + HYPRE_BigInt GlobalTrueH1Size() const { return attr_tdof_sizes[1]; } std::complex GetPropagationConstant() const { return kn0; } double GetOperatingFrequency() const { return omega0; } From f9df157963fdf616816a5d0cdbc2667f633f8049 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Wed, 17 May 2023 18:40:09 -0700 Subject: [PATCH 07/41] Debugging: Electrostatics example --- palace/drivers/eigensolver.cpp | 2 - palace/fem/multigrid.hpp | 4 +- palace/linalg/amg.cpp | 12 +- palace/linalg/amg.hpp | 5 +- palace/linalg/ams.cpp | 15 +- palace/linalg/ams.hpp | 3 +- palace/linalg/chebyshev.cpp | 92 +-- palace/linalg/chebyshev.hpp | 25 +- palace/linalg/complex.cpp | 13 +- palace/linalg/complex.hpp | 16 +- palace/linalg/distrelaxation.cpp | 13 +- palace/linalg/distrelaxation.hpp | 2 +- palace/linalg/gmg.cpp | 4 +- palace/linalg/gmg.hpp | 2 +- palace/linalg/jacobi.cpp | 2 +- palace/linalg/ksp.cpp | 78 ++- palace/linalg/ksp.hpp | 3 +- palace/linalg/mumps.cpp | 12 +- palace/linalg/mumps.hpp | 5 +- palace/linalg/operator.cpp | 267 ++++--- palace/linalg/operator.hpp | 36 +- palace/linalg/slepc.cpp | 1050 +++++++++++++++------------- palace/linalg/slepc.hpp | 54 +- palace/linalg/strumpack.cpp | 21 +- palace/linalg/strumpack.hpp | 1 - palace/linalg/superlu.cpp | 18 +- palace/linalg/superlu.hpp | 1 - palace/main.cpp | 6 +- palace/models/curlcurloperator.cpp | 26 +- palace/models/curlcurloperator.hpp | 2 +- palace/models/laplaceoperator.cpp | 18 +- palace/models/laplaceoperator.hpp | 2 +- palace/models/spaceoperator.cpp | 64 +- palace/models/spaceoperator.hpp | 2 +- palace/models/timeoperator.cpp | 2 +- palace/models/waveportoperator.cpp | 2 +- palace/utils/configfile.cpp | 62 +- palace/utils/configfile.hpp | 60 +- palace/utils/geodata.cpp | 2 +- palace/utils/iodata.cpp | 34 +- 40 files changed, 1087 insertions(+), 951 deletions(-) diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 19fe54079..72f5c33af 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -264,8 +264,6 @@ void EigenSolver::Solve(std::vector> &mesh, &spaceop.GetH1Spaces()); ksp->SetOperator(*A, P, &AuxP); eigen->SetLinearSolver(*ksp); - - // XX TODO REVISIT FOR LOG PRINT FORMATTING... } // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp index 6387b9aae..f9a70dfaf 100644 --- a/palace/fem/multigrid.hpp +++ b/palace/fem/multigrid.hpp @@ -98,7 +98,7 @@ mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy( } auto *P = new ParOperator( std::make_unique(fespaces.GetFinestFESpace(), *fespace), - fespaces.GetFinestFESpace(), *fespace); + fespaces.GetFinestFESpace(), *fespace, true); fespaces.AddLevel(mesh[l].get(), fespace, P, false, true, true); } @@ -112,7 +112,7 @@ mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy( } auto *P = new ParOperator( std::make_unique(fespaces.GetFinestFESpace(), *fespace), - fespaces.GetFinestFESpace(), *fespace); + fespaces.GetFinestFESpace(), *fespace, true); fespaces.AddLevel(mesh.back().get(), fespace, P, false, true, true); } return fespaces; diff --git a/palace/linalg/amg.cpp b/palace/linalg/amg.cpp index c8a1ea2ae..e303ad0b5 100644 --- a/palace/linalg/amg.cpp +++ b/palace/linalg/amg.cpp @@ -25,9 +25,17 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print) // HYPRE_BoomerAMGSetCycleRelaxType(*this, coarse_relax_type, 3); } -void BoomerAmgSolver::SetOperator(const ParOperator &op) +void BoomerAmgSolver::SetOperator(const Operator &op) { - mfem::HypreBoomerAMG::SetOperator(const_cast(&op)->ParallelAssemble()); + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + mfem::HypreBoomerAMG::SetOperator(const_cast(PtAP)->ParallelAssemble()); + } + else + { + mfem::HypreBoomerAMG::SetOperator(op); + } } } // namespace palace diff --git a/palace/linalg/amg.hpp b/palace/linalg/amg.hpp index 7c3a5c5a2..975930fb4 100644 --- a/palace/linalg/amg.hpp +++ b/palace/linalg/amg.hpp @@ -19,13 +19,12 @@ class BoomerAmgSolver : public mfem::HypreBoomerAMG public: BoomerAmgSolver(int cycle_it = 1, int smooth_it = 1, int print = 0); BoomerAmgSolver(const IoData &iodata, int print) - : BoomerAmgSolver((iodata.solver.linear.mat_gmg) ? 1 : iodata.solver.linear.mg_cycle_it, + : BoomerAmgSolver(iodata.solver.linear.pc_mg ? 1 : iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, print) { } - using mfem::HypreBoomerAMG::SetOperator; - void SetOperator(const ParOperator &op); + void SetOperator(const Operator &op) override; }; } // namespace palace diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 757442e63..1bc31ec23 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -209,7 +209,15 @@ void HypreAmsSolver::SetOperator(const Operator &op) InitializeSolver(); } - A = dynamic_cast(const_cast(&op)); + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + A = &const_cast(PtAP)->ParallelAssemble(); + } + else + { + A = dynamic_cast(const_cast(&op)); + } MFEM_VERIFY(A, "HypreAmsSolver requires a HypreParMatrix operator!"); height = A->Height(); width = A->Width(); @@ -225,9 +233,4 @@ void HypreAmsSolver::SetOperator(const Operator &op) auxX.Reset(); } -void HypreAmsSolver::SetOperator(const ParOperator &op) -{ - SetOperator(const_cast(&op)->ParallelAssemble()); -} - } // namespace palace diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp index 454d259f9..fe8ab33c2 100644 --- a/palace/linalg/ams.hpp +++ b/palace/linalg/ams.hpp @@ -54,7 +54,7 @@ class HypreAmsSolver : public mfem::HypreSolver HypreAmsSolver(const IoData &iodata, mfem::ParFiniteElementSpace &nd_fespace, mfem::ParFiniteElementSpace &h1_fespace, int print) : HypreAmsSolver(nd_fespace, h1_fespace, - iodata.solver.linear.mat_gmg ? 1 : iodata.solver.linear.mg_cycle_it, + iodata.solver.linear.pc_mg ? 1 : iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, (iodata.problem.type == config::ProblemData::Type::TRANSIENT || iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC) @@ -68,7 +68,6 @@ class HypreAmsSolver : public mfem::HypreSolver ~HypreAmsSolver() override; void SetOperator(const Operator &op) override; - void SetOperator(const ParOperator &op); operator HYPRE_Solver() const override { return ams; } diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 93f6fda8b..0cdc83027 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -14,9 +14,11 @@ ChebyshevSmoother::ChebyshevSmoother(int smooth_it, int poly_order) { } -void ChebyshevSmoother::SetOperator(const ParOperator &op) +void ChebyshevSmoother::SetOperator(const Operator &op) { - A = &op; + const auto *PtAP = dynamic_cast(&op); + MFEM_VERIFY(PtAP, "ChebyshevSmoother requires a ParOperator operator!"); + A = PtAP; height = A->Height(); width = A->Width(); @@ -24,7 +26,7 @@ void ChebyshevSmoother::SetOperator(const ParOperator &op) d.SetSize(height); dinv.SetSize(height); A->AssembleDiagonal(dinv); - // dinv.Reciprocal(); //XX TODO NEED MFEM PATCH + dinv.Reciprocal(); // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See // mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing: @@ -83,88 +85,4 @@ void ChebyshevSmoother::Mult(const Vector &x, Vector &y) const } } -// XX TODO REMOVE -// void ChebyshevSmoother::ArrayMult(const mfem::Array &X, -// mfem::Array &Y) const -// { -// // Initialize. -// const int nrhs = X.Size(); -// const int N = height; -// mfem::Array R(nrhs), D(nrhs); -// std::vector rrefs(nrhs), drefs(nrhs); -// if (nrhs * N != r.Size()) -// { -// r.SetSize(nrhs * N); -// d.SetSize(nrhs * N); -// } -// for (int j = 0; j < nrhs; j++) -// { -// rrefs[j].MakeRef(r, j * N, N); -// drefs[j].MakeRef(d, j * N, N); -// R[j] = &rrefs[j]; -// D[j] = &drefs[j]; -// } - -// // Apply smoother: y = y + p(A) (x - A y) . -// for (int it = 0; it < pc_it; it++) -// { -// if (iterative_mode || it > 0) -// { -// A->ArrayMult(Y, R); -// for (int j = 0; j < nrhs; j++) -// { -// subtract(*X[j], *R[j], *R[j]); -// } -// } -// else -// { -// for (int j = 0; j < nrhs; j++) -// { -// *R[j] = *X[j]; -// *Y[j] = 0.0; -// } -// } - -// // 4th-kind Chebyshev smoother -// { -// const auto *DI = dinv.Read(); -// for (int j = 0; j < nrhs; j++) -// { -// const auto *RR = R[j]->Read(); -// auto *DD = D[j]->ReadWrite(); - -// mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) -// { DD[i] = 4.0 / (3.0 * lambda_max) * DI[i] * RR[i]; }); -// } -// } -// for (int k = 1; k < order; k++) -// { -// A->ArrayAddMult(D, R, -1.0); -// { -// // From Phillips and Fischer or Lottes (with k -> k + 1 shift due to 1-based -// // indexing) -// const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); -// const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); -// const auto *DI = dinv.Read(); -// for (int j = 0; j < nrhs; j++) -// { -// const auto *RR = R[j]->Read(); -// auto *YY = Y[j]->ReadWrite(); -// auto *DD = D[j]->ReadWrite(); -// mfem::forall(N, -// [=] MFEM_HOST_DEVICE(int i) -// { -// YY[i] += DD[i]; -// DD[i] = sd * DD[i] + sr * DI[i] * RR[i]; -// }); -// } -// } -// } -// for (int j = 0; j < nrhs; j++) -// { -// *Y[j] += *D[j]; -// } -// } -// } - } // namespace palace diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index 96d5117ca..1302b113f 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -39,11 +39,7 @@ class ChebyshevSmoother : public mfem::Solver public: ChebyshevSmoother(int smooth_it, int poly_order); - void SetOperator(const Operator &op) override - { - MFEM_ABORT("ChebyshevSmoother requires a ParOperator operator!"); - } - void SetOperator(const ParOperator &op); + void SetOperator(const Operator &op) override; void Mult(const Vector &x, Vector &y) const override; @@ -51,25 +47,6 @@ class ChebyshevSmoother : public mfem::Solver { Mult(x, y); // Assumes operator symmetry } - - // XX TODO REMOVE... - // void Mult(const mfem::Vector &x, mfem::Vector &y) const override - // { - // mfem::Array X(1); - // mfem::Array Y(1); - // X[0] = &x; - // Y[0] = &y; - // ArrayMult(X, Y); - // } - - // void ArrayMult(const mfem::Array &X, - // mfem::Array &Y) const override; - - // void ArrayMultTranspose(const mfem::Array &X, - // mfem::Array &Y) const override - // { - // ArrayMult(X, Y); // Assumes operator symmetry - // } }; } // namespace palace diff --git a/palace/linalg/complex.cpp b/palace/linalg/complex.cpp index 376b81426..6c1400dcb 100644 --- a/palace/linalg/complex.cpp +++ b/palace/linalg/complex.cpp @@ -310,16 +310,16 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) - : ComplexOperator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), + : ComplexOperator(2 * test_fespace.GetTrueVSize(), 2 * trial_fespace.GetTrueVSize()), A_(std::move(A)), trial_fespace_(trial_fespace), test_fespace_(test_fespace), use_R_(test_restrict), trial_dbc_tdof_list_(nullptr), test_dbc_tdof_list_(nullptr), diag_policy_(DiagonalPolicy::DIAG_ONE) { MFEM_VERIFY(A_, "Cannot construct ComplexParOperator from an empty matrix!"); - lxr_.SetSize(A_->Width()); - lxi_.SetSize(A_->Width()); - lyr_.SetSize(A_->Height()); - lyi_.SetSize(A_->Height()); + lxr_.SetSize(A_->Width() / 2); + lxi_.SetSize(A_->Width() / 2); + lyr_.SetSize(A_->Height() / 2); + lyi_.SetSize(A_->Height() / 2); txr_.SetSize(width / 2); txi_.SetSize(width / 2); if (height != width) @@ -592,7 +592,8 @@ void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vecto ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai) - : ComplexOperator(Ar ? Ar->Height() : Ai->Height(), Ar ? Ar->Width() : Ai->Width()), + : ComplexOperator(2 * (Ar ? Ar->Height() : Ai->Height()), + 2 * (Ar ? Ar->Width() : Ai->Width())), Ar_(std::move(Ar)), Ai_(std::move(Ai)) { MFEM_VERIFY(Ar_ || Ai_, "Cannot construct ComplexWrapperOperator from an empty matrix!"); diff --git a/palace/linalg/complex.hpp b/palace/linalg/complex.hpp index 3f5e69fc5..bfb881fb5 100644 --- a/palace/linalg/complex.hpp +++ b/palace/linalg/complex.hpp @@ -124,10 +124,13 @@ class ComplexVector : public Vector // height and width are twice the actual complex-valued size. class ComplexOperator : public Operator { -public: - ComplexOperator(int s) : Operator(2 * s) {} - ComplexOperator(int h, int w) : Operator(2 * h, 2 * w) {} +protected: + // The sizes provided by derived class constructors should already be twice the actual + // complex-valued size. + ComplexOperator(int s) : Operator(s) {} + ComplexOperator(int h, int w) : Operator(h, w) {} +public: // Test whether or not the operator is purely real or imaginary. virtual bool IsReal() const = 0; virtual bool IsImag() const = 0; @@ -252,7 +255,7 @@ class ComplexParOperator : public ComplexOperator // Get access to the underlying local (L-vector) operator. const ComplexOperator &LocalOperator() const { - MFEM_VERIFY(A_, "No local matrix available for ComplexParOperator::LocalOperator!"); + MFEM_ASSERT(A_, "No local matrix available for ComplexParOperator::LocalOperator!"); return *A_; } @@ -274,7 +277,7 @@ class ComplexParOperator : public ComplexOperator { MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ZERO, "Essential boundary condition true dof elimination for rectangular " - "ParOperator only supports DiagonalPolicy::DIAG_ZERO!"); + "ComplexParOperator only supports DiagonalPolicy::DIAG_ZERO!"); trial_dbc_tdof_list_ = trial_dbc_tdof_list; test_dbc_tdof_list_ = test_dbc_tdof_list; diag_policy_ = diag_policy; @@ -289,9 +292,6 @@ class ComplexParOperator : public ComplexOperator return trial_dbc_tdof_list_; } - // Set the diagonal policy for the operator. - void SetDiagonalPolicy(DiagonalPolicy diag_policy) { diag_policy_ = diag_policy; } - // Get the associated MPI communicator. MPI_Comm GetComm() const { return trial_fespace_.GetComm(); } diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 87126ee52..db268719f 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -32,13 +32,16 @@ DistRelaxationSmoother::DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_f B_G->iterative_mode = false; } -void DistRelaxationSmoother::SetOperator(const ParOperator &op, const ParOperator &op_G) +void DistRelaxationSmoother::SetOperator(const Operator &op, const Operator &op_G) { - A = &op; - A_G = &op_G; - MFEM_VERIFY(A->Height() == G->Height() && A->Width() == G->Height() && - A_G->Height() == G->Width() && A_G->Width() == G->Width(), + MFEM_VERIFY(op.Height() == G->Height() && op.Width() == G->Height() && + op_G.Height() == G->Width() && op_G.Width() == G->Width(), "Invalid operator sizes for DistRelaxationSmoother!"); + const auto *PtAP = dynamic_cast(&op); + const auto *PtAP_G = dynamic_cast(&op_G); + MFEM_VERIFY(PtAP && PtAP_G, "ChebyshevSmoother requires a ParOperator operator!"); + A = PtAP; + A_G = PtAP_G; height = A->Height(); width = A->Width(); diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index a4e8a338c..78c43ee64 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -48,7 +48,7 @@ class DistRelaxationSmoother : public mfem::Solver MFEM_ABORT("SetOperator with a single operator is not implemented for " "DistRelaxationSmoother, use the two argument signature instead!"); } - void SetOperator(const ParOperator &op, const ParOperator &op_G); + void SetOperator(const Operator &op, const Operator &op_G); void Mult(const Vector &x, Vector &y) const override; diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index d0a6310d1..2db83860f 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -22,7 +22,7 @@ GeometricMultigridSolver::GeometricMultigridSolver( MFEM_VERIFY(n_levels > 0, "Empty finite element space hierarchy during multigrid solver setup!"); A_.resize(n_levels, nullptr); - P_.resize(n_levels, nullptr); + P_.resize(n_levels - 1, nullptr); x_.resize(n_levels, Vector()); y_.resize(n_levels, Vector()); r_.resize(n_levels, Vector()); @@ -34,7 +34,7 @@ GeometricMultigridSolver::GeometricMultigridSolver( R_.resize(n_levels, mfem::Array()); // Configure prolongation operators. - for (int l = 0; l < n_levels; l++) + for (int l = 0; l < n_levels - 1; l++) { const auto *PtAP_l = dynamic_cast(fespaces.GetProlongationAtLevel(l)); diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index a5d9c0667..2193d2fe3 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -59,7 +59,7 @@ class GeometricMultigridSolver : public mfem::Solver void SetOperator(const Operator &op) override { MFEM_ABORT("SetOperator with a single operator is not implemented for " - "GeometricMultigridSolver, use the other signature instead!"); + "GeometricMultigridSolver, use the overloaded SetOperator instead!"); } void SetOperator(const std::vector> &ops, const std::vector> *aux_ops = nullptr); diff --git a/palace/linalg/jacobi.cpp b/palace/linalg/jacobi.cpp index b4ccb86b1..417bdb6db 100644 --- a/palace/linalg/jacobi.cpp +++ b/palace/linalg/jacobi.cpp @@ -14,7 +14,7 @@ void JacobiSmoother::SetOperator(const Operator &op) width = op.Width(); dinv.SetSize(height); op.AssembleDiagonal(dinv); - // dinv.Reciprocal(); //XX TODO NEED MFEM PATCH + dinv.Reciprocal(); } void JacobiSmoother::Mult(const Vector &x, Vector &y) const diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index 05a39ba38..ae36b6d30 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -89,7 +89,7 @@ std::unique_ptr ConfigureKrylovSolver(MPI_Comm comm, MFEM_ABORT("Unexpected solver type for Krylov solver configuration!"); break; } - ksp->iterative_mode = iodata.solver.linear.ksp_initial_guess; + ksp->iterative_mode = iodata.solver.linear.initial_guess; ksp->SetRelTol(iodata.solver.linear.tol); ksp->SetMaxIter(iodata.solver.linear.max_it); ksp->SetPrintLevel(print); @@ -158,8 +158,11 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, case config::LinearSolverData::Type::STRUMPACK: #if defined(MFEM_USE_STRUMPACK) pc = std::make_unique(comm, iodata, print); - break; +#else + MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a " + "different solver!"); #endif + break; case config::LinearSolverData::Type::STRUMPACK_MP: #if defined(MFEM_USE_STRUMPACK) pc = std::make_unique(comm, iodata, print); @@ -180,16 +183,24 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, MFEM_ABORT("Unexpected solver type for preconditioner configuration!"); break; } - if (iodata.solver.linear.mat_gmg) + if (iodata.solver.linear.pc_mg) { // This will construct the multigrid hierarchy using pc as the coarse solver // (ownership of pc is transfered to the GeometricMultigridSolver). When a special // auxiliary space smoother for pre-/post-smoothing is not desired, the auxiliary // space is a nullptr here. - return std::make_unique( - iodata, std::move(pc), fespaces, - (iodata.problem.type != config::ProblemData::Type::MAGNETOSTATIC) ? aux_fespaces - : nullptr); + if (iodata.solver.linear.mg_smooth_aux) + { + MFEM_VERIFY(aux_fespaces, "Multigrid with auxiliary space smoothers requires both " + "primary space and auxiliary spaces for construction!"); + return std::make_unique(iodata, std::move(pc), fespaces, + aux_fespaces); + } + else + { + return std::make_unique(iodata, std::move(pc), fespaces, + nullptr); + } } else { @@ -208,6 +219,8 @@ class ComplexBlockDiagonalSolver : public mfem::Solver { } + mfem::Solver &GetSolver() { return *op_; } + void SetOperator(const Operator &op) override {} void Mult(const Vector &x, Vector &y) const override @@ -245,36 +258,29 @@ KspSolver::KspSolver(std::unique_ptr &&ksp, std::unique_ptr &&pc) : mfem::Solver(), ksp_(std::move(ksp)), pc_(std::move(pc)), ksp_mult(0), ksp_mult_it(0) { - ksp_->SetPreconditioner(*pc_); } -void KspSolver::SetOperator(const Operator &op, const Operator &pc_op) +void KspSolver::SetOperatorFinalize(const Operator &op) { // Unset the preconditioner before so that IterativeSolver::SetOperator does not set the // preconditioner operator again. - auto *gmg = dynamic_cast(pc_.get()); - if (gmg) - { - MFEM_ABORT("KspSolver with a GeometricMultigridSolver preconditioner must " - "use the other signature for SetOperator!"); - } - else - { - pc_->SetOperator(pc_op); - } - // ksp_->SetPreconditioner(nullptr); //XX TODO WAITING MFEM PATCH + ksp_->SetPreconditioner(nullptr); ksp_->SetOperator(op); ksp_->SetPreconditioner(*pc_); height = op.Height(); width = op.Width(); } +void KspSolver::SetOperator(const Operator &op, const Operator &pc_op) +{ + pc_->SetOperator(pc_op); + SetOperatorFinalize(op); +} + void KspSolver::SetOperator(const Operator &op, const std::vector> &pc_ops, const std::vector> *aux_pc_ops) { - // Unset the preconditioner before so that IterativeSolver::SetOperator does not set the - // preconditioner operator again. auto *gmg = dynamic_cast(pc_.get()); if (gmg) { @@ -282,17 +288,9 @@ void KspSolver::SetOperator(const Operator &op, } else { - MFEM_VERIFY( - !aux_pc_ops, - "Auxiliary space operators should not be specified for KspSolver::SetOperator " - "unless the preconditioner is a GeometricMultigridSolver!"); pc_->SetOperator(*pc_ops.back()); } - // ksp_->SetPreconditioner(nullptr); //XX TODO WAITING MFEM PATCH - ksp_->SetOperator(op); - ksp_->SetPreconditioner(*pc_); - height = op.Height(); - width = op.Width(); + SetOperatorFinalize(op); } void KspSolver::Mult(const Vector &x, Vector &y) const @@ -326,19 +324,31 @@ ComplexKspSolver::ComplexKspSolver(std::unique_ptr &&ksp, void ComplexKspSolver::SetOperator(const ComplexOperator &op, const Operator &pc_op) { - KspSolver::SetOperator(op, pc_op); // XX TODO TEST THIS AT RUNTIME... + auto &block = static_cast(pc_.get())->GetSolver(); + block.SetOperator(pc_op); + SetOperatorFinalize(op); } void ComplexKspSolver::SetOperator( const ComplexOperator &op, const std::vector> &pc_ops, const std::vector> *aux_pc_ops) { - KspSolver::SetOperator(op, pc_ops, aux_pc_ops); // XX TODO TEST THIS AT RUNTIME... + auto &block = static_cast(pc_.get())->GetSolver(); + auto *gmg = dynamic_cast(&block); + if (gmg) + { + gmg->SetOperator(pc_ops, aux_pc_ops); + } + else + { + block.SetOperator(*pc_ops.back()); + } + SetOperatorFinalize(op); } void ComplexKspSolver::Mult(const ComplexVector &x, ComplexVector &y) const { - KspSolver::Mult(x, y); // XX TODO TEST THIS AT RUNTIME... + KspSolver::Mult(x, y); y.Sync(); } diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp index 7b51fcf3d..214f0ca1e 100644 --- a/palace/linalg/ksp.hpp +++ b/palace/linalg/ksp.hpp @@ -24,7 +24,6 @@ class KspSolver : public mfem::Solver std::unique_ptr ksp_; std::unique_ptr pc_; -private: // Counters for number of calls to Mult method for linear solves, and cumulative number // of iterations. mutable int ksp_mult, ksp_mult_it; @@ -32,6 +31,8 @@ class KspSolver : public mfem::Solver protected: KspSolver() : ksp_(nullptr), pc_(nullptr), ksp_mult(0), ksp_mult_it(0) {} + void SetOperatorFinalize(const Operator &op); + public: KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); diff --git a/palace/linalg/mumps.cpp b/palace/linalg/mumps.cpp index a54ba6e87..a63422b40 100644 --- a/palace/linalg/mumps.cpp +++ b/palace/linalg/mumps.cpp @@ -43,9 +43,17 @@ MumpsSolver::MumpsSolver(MPI_Comm comm, mfem::MUMPSSolver::MatType sym, } } -void MumpsSolver::SetOperator(const ParOperator &op) +void MumpsSolver::SetOperator(const Operator &op) { - mfem::MUMPSSolver::SetOperator(const_cast(&op)->ParallelAssemble()); + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + mfem::MUMPSSolver::SetOperator(const_cast(PtAP)->ParallelAssemble()); + } + else + { + mfem::MUMPSSolver::SetOperator(op); + } } } // namespace palace diff --git a/palace/linalg/mumps.hpp b/palace/linalg/mumps.hpp index a6620aaf4..f98bd0266 100644 --- a/palace/linalg/mumps.hpp +++ b/palace/linalg/mumps.hpp @@ -24,7 +24,7 @@ class MumpsSolver : public mfem::MUMPSSolver config::LinearSolverData::SymFactType reorder, double blr_tol, int print); MumpsSolver(MPI_Comm comm, const IoData &iodata, int print) : MumpsSolver(comm, - (iodata.solver.linear.mat_shifted || + (iodata.solver.linear.pc_mat_shifted || iodata.problem.type == config::ProblemData::Type::TRANSIENT || iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC || iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC) @@ -39,8 +39,7 @@ class MumpsSolver : public mfem::MUMPSSolver { } - using mfem::MUMPSSolver::SetOperator; - void SetOperator(const ParOperator &op); + void SetOperator(const Operator &op) override; }; } // namespace palace diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index ca14f69d5..6f7046703 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -19,7 +19,7 @@ ParOperator::ParOperator(std::unique_ptr &&A, : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), A_(std::move(A)), trial_fespace_(trial_fespace), test_fespace_(test_fespace), use_R_(test_restrict), trial_dbc_tdof_list_(nullptr), test_dbc_tdof_list_(nullptr), - diag_policy_(DiagonalPolicy::DIAG_ONE), RAP_(nullptr) + diag_policy_(DiagonalPolicy::DIAG_ONE), RAP_(nullptr), save_A_(false) { MFEM_VERIFY(A_, "Cannot construct ParOperator from an empty matrix!"); lx_.SetSize(A_->Width()); @@ -58,10 +58,12 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const } // Apply the unconstrained operator. + std::unique_ptr b_RAP_ = std::move(RAP_); const mfem::Array *b_trial_dbc_tdof_list_ = trial_dbc_tdof_list_; const mfem::Array *b_test_dbc_tdof_list_ = test_dbc_tdof_list_; trial_dbc_tdof_list_ = test_dbc_tdof_list_ = nullptr; AddMult(tx_, b, -1.0); + RAP_ = std::move(b_RAP_); trial_dbc_tdof_list_ = b_trial_dbc_tdof_list_; test_dbc_tdof_list_ = b_test_dbc_tdof_list_; @@ -102,7 +104,31 @@ void ParOperator::AssembleDiagonal(Vector &diag) const // entry-wise absolute values of the conforming prolongation operator. MFEM_VERIFY(&trial_fespace_ == &test_fespace_, "Diagonal assembly is only available for square ParOperator!"); - A_->AssembleDiagonal(ly_); + if (auto *bfA = dynamic_cast(A_.get())) + { + if (bfA->HasSpMat()) + { + bfA->SpMat().GetDiag(ly_); + } + else if (bfA->HasExt()) + { + bfA->Ext().AssembleDiagonal(ly_); + } + else + { + MFEM_ABORT("Unable to assemble the local operator diagonal of BilinearForm!"); + } + } + else if (auto *sA = dynamic_cast(A_.get())) + { + sA->GetDiag(ly_); + } + else + { + MFEM_ABORT("ParOperator::AssembleDiagonal requires A as a BilinearForm or " + "SparseMatrix!"); + } + const Operator *P = test_fespace_.GetProlongationMatrix(); if (const auto *hP = dynamic_cast(P)) { @@ -132,111 +158,191 @@ void ParOperator::AssembleDiagonal(Vector &diag) const mfem::HypreParMatrix &ParOperator::ParallelAssemble() { + if (RAP_) + { + return *RAP_; + } // XX TODO: For mfem::AssemblyLevel::PARTIAL, we cannot use CeedOperatorFullAssemble for // a ND space with p > 1. We should throw an error here that the user needs to // use AssemblyLevel::LEGACY in this case. - if (!RAP_) + // Build the square or rectangular RᵀAP HypreParMatrix. + if (&trial_fespace_ == &test_fespace_) { - auto *bfA = dynamic_cast(A_.get()); - auto *mbfA = dynamic_cast(A_.get()); - auto *lA = dynamic_cast(A_.get()); - if (bfA || lA) - { - MFEM_VERIFY(&trial_fespace_ == &test_fespace_ && (!lA || lA->Height() == lA->Width()), - "Only square ParOperator should use a BilinearForm or SparseMatrix!"); - if (bfA) + mfem::SparseMatrix *lA; + bool own_lA = false; + if (auto *bfA = dynamic_cast(A_.get())) + { +#ifdef MFEM_USE_CEED + if (bfA->HasSpMat()) { - - // XX TODO MFEM PATCH - - // lA = bfA->HasSpMat() ? bfA->LoseMat() : - // mfem::ceed::CeedOperatorFullAssemble(*bfA); + lA = &bfA->SpMat(); } - mfem::HypreParMatrix *hA = - new mfem::HypreParMatrix(trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), - trial_fespace_.GetDofOffsets(), lA); - const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); - RAP_ = - std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); - delete hA; - if (bfA) + else if (bfA->HasExt()) { - delete lA; + lA = mfem::ceed::CeedOperatorFullAssemble(*bfA); + own_lA = true; } + else + { + MFEM_ABORT("Unable to assemble the local operator for parallel assembly of " + "BilinearForm!"); + } +#else + MFEM_VERIFY( + bfA->HasSpMat(), + "Missing assembled SparseMatrix for parallel assembly of BilinearForm!"); + lA = &bfA->SpMat(); +#endif } - else if (mbfA) + else if (auto *sA = dynamic_cast(A_.get())) { - - // XX TODO MFEM PATCH - - // lA = mbfA->HasSpMat() ? mbfA->LoseMat() - // : mfem::ceed::CeedOperatorFullAssemble(*mbfA, use_R_); - mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( - trial_fespace_.GetComm(), test_fespace_.GlobalVSize(), - trial_fespace_.GlobalVSize(), test_fespace_.GetDofOffsets(), - trial_fespace_.GetDofOffsets(), lA); - const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); - if (!use_R_) + lA = sA; + } + else + { + MFEM_ABORT("ParOperator::ParallelAssemble requires A as a BilinearForm or " + "SparseMatrix!"); + lA = nullptr; + } + mfem::HypreParMatrix *hA = + new mfem::HypreParMatrix(trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), + trial_fespace_.GetDofOffsets(), lA); + const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); + RAP_ = + std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); + delete hA; + if (own_lA) + { + delete lA; + } + } + else + { + mfem::SparseMatrix *lA; + bool own_lA = false; + if (auto *mbfA = dynamic_cast(A_.get())) + { +#ifdef MFEM_USE_CEED + if (mbfA->HasSpMat()) + { + lA = &mbfA->SpMat(); + } + else if (bfA->HasExt()) { - const mfem::HypreParMatrix *Rt = test_fespace_.Dof_TrueDof_Matrix(); - RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), - true); + lA = mfem::ceed::CeedOperatorFullAssemble(*bfA); + own_lA = true; } else { - mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace_.GetRestrictionMatrix()); - mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( - trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), - trial_fespace_.GlobalTrueVSize(), trial_fespace_.GetDofOffsets(), - trial_fespace_.GetTrueDofOffsets(), sRt); - RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), - true); - delete sRt; - delete hRt; + MFEM_ABORT("Unable to assemble the local operator for parallel assembly of " + "MixedBilinearForm!"); } - delete hA; - delete lA; +#else + MFEM_VERIFY( + mbfA->HasSpMat(), + "Missing assembled SparseMatrix for parallel assembly of MixedBilinearForm!"); + lA = &mbfA->SpMat(); +#endif + } + else if (auto *sA = dynamic_cast(A_.get())) + { + lA = sA; } else { - MFEM_ABORT("ParOperator::ParallelAssemble requires A as a BilinearForm or " - "MixedBilinearForm!"); + MFEM_ABORT("ParOperator::ParallelAssemble requires A as a MixedBilinearForm or " + "SparseMatrix!"); + lA = nullptr; + } + mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( + trial_fespace_.GetComm(), test_fespace_.GlobalVSize(), + trial_fespace_.GlobalVSize(), test_fespace_.GetDofOffsets(), + trial_fespace_.GetDofOffsets(), lA); + const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); + if (!use_R_) + { + const mfem::HypreParMatrix *Rt = test_fespace_.Dof_TrueDof_Matrix(); + RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), + true); + } + else + { + mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace_.GetRestrictionMatrix()); + mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( + trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), + trial_fespace_.GlobalTrueVSize(), trial_fespace_.GetDofOffsets(), + trial_fespace_.GetTrueDofOffsets(), sRt); + RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), + true); + delete sRt; + delete hRt; } + delete hA; + if (own_lA) + { + delete lA; + } + } + hypre_ParCSRMatrixSetNumNonzeros(*RAP_); - // Delete the original local operator. + // Delete the original local operator. + if (!save_A_) + { A_.reset(); + } - // Eliminate boundary conditions on the assembled matrix. - if (test_dbc_tdof_list_ || trial_dbc_tdof_list_) + // Eliminate boundary conditions on the assembled matrix. + if (test_dbc_tdof_list_ || trial_dbc_tdof_list_) + { + if (test_dbc_tdof_list_ == trial_dbc_tdof_list_) + { + // Elimination for a square operator. + MFEM_VERIFY( + &trial_fespace_ == &test_fespace_, + "Only square ParOperator should have same trial and test eliminated tdofs!"); + RAP_->EliminateBC(*trial_dbc_tdof_list_, diag_policy_); + } + else { - if (test_dbc_tdof_list_ == trial_dbc_tdof_list_) + // Rectangular elimination sets all eliminated rows/columns to zero. + if (test_dbc_tdof_list_) { - // Elimination for a square operator. - MFEM_VERIFY( - &trial_fespace_ == &test_fespace_, - "Only square ParOperator should have same trial and test eliminated tdofs!"); - RAP_->EliminateBC(*trial_dbc_tdof_list_, diag_policy_); + RAP_->EliminateRows(*test_dbc_tdof_list_); } - else + if (trial_dbc_tdof_list_) { - // Rectangular elimination sets all eliminated rows/columns to zero. - if (test_dbc_tdof_list_) - { - RAP_->EliminateRows(*test_dbc_tdof_list_); - } - if (trial_dbc_tdof_list_) - { - mfem::HypreParMatrix *RAPe = RAP_->EliminateCols(*trial_dbc_tdof_list_); - delete RAPe; - } + mfem::HypreParMatrix *RAPe = RAP_->EliminateCols(*trial_dbc_tdof_list_); + delete RAPe; } } } return *RAP_; } +void ParOperator::Mult(const Vector &x, Vector &y) const +{ + if (RAP_) + { + RAP_->Mult(x, y); + return; + } + y = 0.0; + AddMult(x, y); +} + +void ParOperator::MultTranspose(const Vector &x, Vector &y) const +{ + if (RAP_) + { + RAP_->MultTranspose(x, y); + return; + } + y = 0.0; + AddMultTranspose(x, y); +} + void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const { if (RAP_) @@ -384,11 +490,9 @@ double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm, double t int max_it) { // XX TODO: Use ARPACK or SLEPc for this when configured. - // #if defined(PALACE_WITH_SLEPC) - - double slepc_l = slepc::GetMaxSingularValue(comm, A, herm, tol, max_it); - - // #else +#if defined(PALACE_WITH_SLEPC) + return slepc::GetMaxSingularValue(comm, A, herm, tol, max_it); +#else // Power iteration loop: ||A||₂² = λₙ(Aᴴ A). int it = 0; double res = 0.0; @@ -426,13 +530,8 @@ double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm, double t "lambda = {:.3e}!\n", it, res, l); } - - // XX TODO DEBUG - Mpi::Print(comm, "\nSPECTRAL NORM...Power iteration: {}, SLEPc: {}\n\n", - herm ? l : std::sqrt(l), slepc_l); - return herm ? l : std::sqrt(l); - // #endif +#endif } } // namespace linalg diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index 227a70cb8..8dec4ccc9 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -36,8 +36,11 @@ class ParOperator : public Operator // Diagonal policy for constrained true dofs. DiagonalPolicy diag_policy_; - // Assembled operator as a parallel Hypre matrix. - std::unique_ptr RAP_; + // Assembled operator as a parallel Hypre matrix. If the save flag is true, calls to + // ParallelAssemble will not delete the local operator. This is useful for later on calls + // to EliminateRHS, for example. + mutable std::unique_ptr RAP_; + bool save_A_; // Temporary storage for operator application. mutable Vector lx_, ly_, tx_, ty_; @@ -51,7 +54,7 @@ class ParOperator : public Operator // Get access to the underlying local (L-vector) operator. const Operator &LocalOperator() const { - MFEM_VERIFY(A_, "No local matrix available for ParOperator::LocalOperator!"); + MFEM_ASSERT(A_, "No local matrix available for ParOperator::LocalOperator!"); return *A_; } @@ -88,6 +91,11 @@ class ParOperator : public Operator return trial_dbc_tdof_list_; } + // A call to ParallelAssemble will typically free the memory associated with the local + // operator as it is no longer required. When the save flag is set, the local operator + // will not be deleted during parallel assembly. + void SaveLocalOperator() { save_A_ = true; } + // Eliminate essential true dofs from the RHS vector b, using the essential boundary // condition values in x. void EliminateRHS(const Vector &x, Vector &b) const; @@ -102,27 +110,9 @@ class ParOperator : public Operator // Get the associated MPI communicator. MPI_Comm GetComm() const { return trial_fespace_.GetComm(); } - void Mult(const Vector &x, Vector &y) const override - { - if (RAP_) - { - RAP_->Mult(x, y); - return; - } - y = 0.0; - AddMult(x, y); - } + void Mult(const Vector &x, Vector &y) const override; - void MultTranspose(const Vector &x, Vector &y) const override - { - if (RAP_) - { - RAP_->MultTranspose(x, y); - return; - } - y = 0.0; - AddMultTranspose(x, y); - } + void MultTranspose(const Vector &x, Vector &y) const override; void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override; diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index 4ec2bb45b..31c9a72c5 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -14,9 +14,120 @@ #include "linalg/vector.hpp" #include "utils/communication.hpp" +static PetscErrorCode __mat_apply_EPS_A0(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_EPS_A1(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_EPS_B(Mat, Vec, Vec); +static PetscErrorCode __pc_apply_EPS(PC, Vec, Vec); +static PetscErrorCode __mat_apply_PEPLinear_L0(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_PEPLinear_L1(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_PEPLinear_B(Mat, Vec, Vec); +static PetscErrorCode __pc_apply_PEPLinear(PC, Vec, Vec); +static PetscErrorCode __mat_apply_PEP_A0(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_PEP_A1(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_PEP_A2(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_PEP_B(Mat, Vec, Vec); +static PetscErrorCode __pc_apply_PEP(PC, Vec, Vec); + namespace palace::slepc { +namespace +{ + +struct MatShellContext +{ + const ComplexOperator &A; + ComplexVector &x, &y; +}; + +PetscErrorCode __mat_apply_shell(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + MatShellContext *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->A.Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_transpose_shell(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + MatShellContext *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->A.MultTranspose(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_hermitian_transpose_shell(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + MatShellContext *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->A.MultHermitianTranspose(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +}; + +void ConfigurePCShell(ST st, void *ctx, PetscErrorCode (*__pc_apply)(PC, Vec, Vec)) +{ + KSP ksp; + PC pc; + PalacePetscCall(STGetKSP(st, &ksp)); + PalacePetscCall(KSPGetPC(ksp, &pc)); + PalacePetscCall(PCSetType(pc, PCSHELL)); + PalacePetscCall(PCShellSetContext(pc, (void *)ctx)); + PalacePetscCall(PCShellSetApply(pc, (PetscErrorCode(*)(PC, Vec, Vec)) & __pc_apply)); +} + +} // namespace + void Initialize(int &argc, char **&argv, const char rc_file[], const char help[]) { PalacePetscCall(SlepcInitialize(&argc, &argv, rc_file, help)); @@ -39,72 +150,13 @@ PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm // or SVD solvers, namely MATOP_MULT and MATOP_MULT_HERMITIAN_TRANSPOSE (if the matrix // is not Hermitian). Mat A0; - PetscInt n = A.Height() / 2; - PalacePetscCall(MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, nullptr, &A0)); ComplexVector x(A.Height()), y(A.Height()); - auto __mat_apply_shell = [&A, &x, &y](Mat, Vec x0, Vec y0) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x0, &n)); - - const PetscScalar *px0; - PetscCall(VecGetArrayRead(x0, &px0)); - x.Set(px0, n); - PetscCall(VecRestoreArrayRead(x0, &px0)); - - A.Mult(x, y); - - PetscScalar *py0; - PetscCall(VecGetArrayWrite(y0, &py0)); - y.Get(py0, n); - PetscCall(VecRestoreArrayWrite(y0, &py0)); - - PetscFunctionReturn(0); - }; - auto __mat_apply_transpose_shell = [&A, &x, &y](Mat, Vec x0, Vec y0) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x0, &n)); - - const PetscScalar *px0; - PetscCall(VecGetArrayRead(x0, &px0)); - x.Set(px0, n); - PetscCall(VecRestoreArrayRead(x0, &px0)); - - A.MultTranspose(x, y); - - PetscScalar *py0; - PetscCall(VecGetArrayWrite(y0, &py0)); - y.Get(py0, n); - PetscCall(VecRestoreArrayWrite(y0, &py0)); - - PetscFunctionReturn(0); - }; - auto __mat_apply_hermitian_transpose_shell = [&A, &x, &y](Mat, Vec x0, - Vec y0) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x0, &n)); - - const PetscScalar *px0; - PetscCall(VecGetArrayRead(x0, &px0)); - x.Set(px0, n); - PetscCall(VecRestoreArrayRead(x0, &px0)); - - A.MultHermitianTranspose(x, y); - - PetscScalar *py0; - PetscCall(VecGetArrayWrite(y0, &py0)); - y.Get(py0, n); - PetscCall(VecRestoreArrayWrite(y0, &py0)); - - PetscFunctionReturn(0); - }; - PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)()) & __mat_apply_shell)); - + MatShellContext ctx = {A, x, y}; + PetscInt n = A.Height() / 2; + PalacePetscCall( + MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)&ctx, &A0)); + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void)) & __mat_apply_shell)); if (herm) { EPS eps; @@ -137,10 +189,10 @@ PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm else { PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT_TRANSPOSE, - (void (*)()) & __mat_apply_transpose_shell)); + (void (*)(void)) & __mat_apply_transpose_shell)); PalacePetscCall( MatShellSetOperation(A0, MATOP_MULT_HERMITIAN_TRANSPOSE, - (void (*)()) & __mat_apply_hermitian_transpose_shell)); + (void (*)(void)) & __mat_apply_hermitian_transpose_shell)); SVD svd; PetscInt num_conv; @@ -624,50 +676,10 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); PalacePetscCall( MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); - auto __mat_apply_shell_A0 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opK->Mult(x, y); - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - auto __mat_apply_shell_A1 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opM->Mult(x, y); - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; PalacePetscCall( - MatShellSetOperation(A0, MATOP_MULT, (void (*)()) & __mat_apply_shell_A0)); + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void)) & __mat_apply_EPS_A0)); PalacePetscCall( - MatShellSetOperation(A1, MATOP_MULT, (void (*)()) & __mat_apply_shell_A1)); + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void)) & __mat_apply_EPS_A1)); PalacePetscCall(EPSSetOperators(eps, A0, A1)); if (first && type != ScaleType::NONE) @@ -694,7 +706,7 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato // allows use of the divergence-free projector as a linear solve side-effect. if (first) { - ConfigurePCShell(); + ConfigurePCShell(GetST(), (void *)this, __pc_apply_EPS); } } @@ -705,83 +717,13 @@ void SlepcEPSSolver::SetBMat(const Operator &B) PetscInt n = B.Height(); PalacePetscCall( MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); - auto __mat_apply_shell = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opB->Mult(x.Real(), y.Real()); - opB->Mult(x.Imag(), y.Imag()); - y *= delta * gamma; - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)()) & __mat_apply_shell)); + PalacePetscCall( + MatShellSetOperation(B0, MATOP_MULT, (void (*)(void)) & __mat_apply_EPS_B)); BV bv = GetBV(); PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); } -void SlepcEPSSolver::ConfigurePCShell() -{ - auto __pc_apply = [this](PC, Vec x_, Vec y_) -> PetscErrorCode - { - // Solve the linear system associated with the generalized eigenvalue problem: y = - // M⁻¹ x, or shift-and-invert spectral transformation: y = (K - σ M)⁻¹ x . Enforces the - // divergence-free constraint using the supplied projector. - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opInv->Mult(x, y); - if (!sinvert) - { - y *= 1.0 / (delta * gamma); - } - else - { - y *= 1.0 / delta; - } - if (opProj) - { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y)); - opProj->Mult(y); - // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(GetComm(), y)); - } - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - - KSP ksp; - PC pc; - ST st = GetST(); - PalacePetscCall(STGetKSP(st, &ksp)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCSHELL)); - PalacePetscCall(PCShellSetApply(pc, (PetscErrorCode(*)(PC, Vec, Vec)) & __pc_apply)); -} - PetscReal SlepcEPSSolver::GetResidualNorm(int i) const { // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ. @@ -831,64 +773,10 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO (void *)this, &A0)); PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); - auto __mat_apply_shell_A0 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - // Apply the linearized operator L₀ = [ 0 I ] - // [ -K -C ] . - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x1.Set(px_, n / 2); - x2.Set(px_ + n / 2, n / 2); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - y1 = x2; - opC->Mult(x2, y2); - y2 *= gamma; - opK->AddMult(x1, y2, std::complex(1.0, 0.0)); - y2 *= -delta; - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y1.Get(py_, n / 2); - y2.Get(py_ + n / 2, n / 2); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - auto __mat_apply_shell_A1 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - // Apply the linearized operator L₁ = [ I 0 ] - // [ 0 M ] . - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x1.Set(px_, n / 2); - x2.Set(px_ + n / 2, n / 2); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - y1 = x1; - opM->Mult(x2, y2); - y2 *= delta * gamma * gamma; - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y1.Get(py_, n / 2); - y2.Get(py_ + n / 2, n / 2); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; PalacePetscCall( - MatShellSetOperation(A0, MATOP_MULT, (void (*)()) & __mat_apply_shell_A0)); + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void)) & __mat_apply_PEPLinear_L0)); PalacePetscCall( - MatShellSetOperation(A1, MATOP_MULT, (void (*)()) & __mat_apply_shell_A1)); + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void)) & __mat_apply_PEPLinear_L1)); PalacePetscCall(EPSSetOperators(eps, A0, A1)); if (first && type != ScaleType::NONE) @@ -918,7 +806,7 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO // Configure linear solver. if (first) { - ConfigurePCShell(); + ConfigurePCShell(GetST(), (void *)this, __pc_apply_PEPLinear); } } @@ -929,34 +817,8 @@ void SlepcPEPLinearSolver::SetBMat(const Operator &B) PetscInt n = B.Height(); PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); - auto __mat_apply_shell = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x1.Set(px_, n / 2); - x2.Set(px_ + n / 2, n / 2); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opB->Mult(x1.Real(), y1.Real()); - opB->Mult(x1.Imag(), y1.Imag()); - opB->Mult(x2.Real(), y2.Real()); - opB->Mult(x2.Imag(), y2.Imag()); - y1 *= delta * gamma * gamma; - y2 *= delta * gamma * gamma; - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y1.Get(py_, n / 2); - y2.Get(py_ + n / 2, n / 2); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)()) & __mat_apply_shell)); + PalacePetscCall( + MatShellSetOperation(B0, MATOP_MULT, (void (*)(void)) & __mat_apply_PEPLinear_B)); BV bv = GetBV(); PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); @@ -1014,84 +876,6 @@ void SlepcPEPLinearSolver::GetEigenvector(int i, ComplexVector &x) const } } -void SlepcPEPLinearSolver::ConfigurePCShell() -{ - auto __pc_apply = [this](PC, Vec x_, Vec y_) -> PetscErrorCode - { - // Solve the linear system associated with the generalized eigenvalue problem after - // linearization: y = L₁⁻¹ x, or with the shift-and-invert spectral transformation: - // y = (L₀ - σ L₁)⁻¹ x, with: - // L₀ = [ 0 I ] L₁ = [ I 0 ] - // [ -K -C ] , [ 0 M ] . - // Enforces the divergence-free constraint using the supplied projector. - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x1.Set(px_, n / 2); - x2.Set(px_ + n / 2, n / 2); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - if (!sinvert) - { - y1 = x1; - if (opProj) - { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y1)); - opProj->Mult(y1); - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y1)); - } - - opInv->Mult(x2, y2); - y2 *= 1.0 / (delta * gamma * gamma); - if (opProj) - { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y2)); - opProj->Mult(y2); - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y2)); - } - } - else - { - y1.AXPBY(-sigma / (delta * gamma), x2, 0.0); // Temporarily - opK->AddMult(x1, y1, std::complex(1.0, 0.0)); - opInv->Mult(y1, y2); - if (opProj) - { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y2)); - opProj->Mult(y2); - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y2)); - } - - y1.AXPBYPCZ(gamma / sigma, y2, -gamma / sigma, x1, 0.0); - if (opProj) - { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y1)); - opProj->Mult(y1); - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y1)); - } - } - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y1.Get(py_, n / 2); - y2.Get(py_ + n / 2, n / 2); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - - KSP ksp; - PC pc; - ST st = GetST(); - PalacePetscCall(STGetKSP(st, &ksp)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCSHELL)); - PalacePetscCall(PCShellSetApply(pc, (PetscErrorCode(*)(PC, Vec, Vec)) & __pc_apply)); -} - PetscReal SlepcPEPLinearSolver::GetResidualNorm(int i) const { // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for @@ -1400,72 +1184,12 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); PalacePetscCall( MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2)); - auto __mat_apply_shell_A0 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opK->Mult(x, y); - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - auto __mat_apply_shell_A1 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opC->Mult(x, y); - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - auto __mat_apply_shell_A2 = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opM->Mult(x, y); - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; PalacePetscCall( - MatShellSetOperation(A0, MATOP_MULT, (void (*)()) & __mat_apply_shell_A0)); + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void)) & __mat_apply_PEP_A0)); PalacePetscCall( - MatShellSetOperation(A1, MATOP_MULT, (void (*)()) & __mat_apply_shell_A1)); + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void)) & __mat_apply_PEP_A1)); PalacePetscCall( - MatShellSetOperation(A2, MATOP_MULT, (void (*)()) & __mat_apply_shell_A2)); + MatShellSetOperation(A2, MATOP_MULT, (void (*)(void)) & __mat_apply_PEP_A2)); Mat A[3] = {A0, A1, A2}; PalacePetscCall(PEPSetOperators(pep, 3, A)); @@ -1494,7 +1218,7 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato // Configure linear solver. if (first) { - ConfigurePCShell(); + ConfigurePCShell(GetST(), (void *)this, __pc_apply_PEP); } } @@ -1505,83 +1229,13 @@ void SlepcPEPSolver::SetBMat(const Operator &B) PetscInt n = B.Height(); PalacePetscCall( MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); - auto __mat_apply_shell = [this](Mat, Vec x_, Vec y_) -> PetscErrorCode - { - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opB->Mult(x.Real(), y.Real()); - opB->Mult(x.Imag(), y.Imag()); - y *= delta * gamma; - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)()) & __mat_apply_shell)); + PalacePetscCall( + MatShellSetOperation(B0, MATOP_MULT, (void (*)(void)) & __mat_apply_PEP_B)); BV bv = GetBV(); PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); } -void SlepcPEPSolver::ConfigurePCShell() -{ - auto __pc_apply = [this](PC, Vec x_, Vec y_) -> PetscErrorCode - { - // Solve the linear system associated with the generalized eigenvalue problem: y = - // M⁻¹ x, or shift-and-invert spectral transformation: y = P(σ)⁻¹ x . Enforces the - // divergence-free constraint using the supplied projector. - PetscFunctionBeginUser; - PetscInt n; - PetscCall(VecGetLocalSize(x_, &n)); - - const PetscScalar *px_; - PetscCall(VecGetArrayRead(x_, &px_)); - x.Set(px_, n); - PetscCall(VecRestoreArrayRead(x_, &px_)); - - opInv->Mult(x, y); - if (!sinvert) - { - y *= 1.0 / (delta * gamma * gamma); - } - else - { - y *= 1.0 / delta; - } - if (opProj) - { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(GetComm(), y)); - opProj->Mult(y); - // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(GetComm(), y)); - } - - PetscScalar *py_; - PetscCall(VecGetArrayWrite(y_, &py_)); - y.Get(py_, n); - PetscCall(VecRestoreArrayWrite(y_, &py_)); - - PetscFunctionReturn(0); - }; - - KSP ksp; - PC pc; - ST st = GetST(); - PalacePetscCall(STGetKSP(st, &ksp)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCSHELL)); - PalacePetscCall(PCShellSetApply(pc, (PetscErrorCode(*)(PC, Vec, Vec)) & __pc_apply)); -} - PetscReal SlepcPEPSolver::GetResidualNorm(int i) const { // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for @@ -1616,4 +1270,438 @@ PetscReal SlepcPEPSolver::GetBackwardScaling(PetscScalar l) const } // namespace palace::slepc +PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcEPSSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opK->Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_EPS_A1(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcEPSSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opM->Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_EPS_B(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcEPSSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opB->Mult(ctx->x.Real(), ctx->y.Real()); + ctx->opB->Mult(ctx->x.Imag(), ctx->y.Imag()); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __pc_apply_EPS(PC pc, Vec x, Vec y) +{ + // Solve the linear system associated with the generalized eigenvalue problem: y = + // M⁻¹ x, or shift-and-invert spectral transformation: y = (K - σ M)⁻¹ x . Enforces the + // divergence-free constraint using the supplied projector. + PetscFunctionBeginUser; + palace::slepc::SlepcEPSSolver *ctx; + PetscCall(PCShellGetContext(pc, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opInv->Mult(ctx->x, ctx->y); + if (!ctx->sinvert) + { + ctx->y *= 1.0 / (ctx->delta * ctx->gamma); + } + else + { + ctx->y *= 1.0 / ctx->delta; + } + if (ctx->opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y)); + ctx->opProj->Mult(ctx->y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y)); + } + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y) +{ + // Apply the linearized operator L₀ = [ 0 I ] + // [ -K -C ] . + PetscFunctionBeginUser; + palace::slepc::SlepcPEPLinearSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x1.Set(px, n / 2); + ctx->x2.Set(px + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->y1 = ctx->x2; + ctx->opC->Mult(ctx->x2, ctx->y2); + ctx->y2 *= ctx->gamma; + ctx->opK->AddMult(ctx->x1, ctx->y2, std::complex(1.0, 0.0)); + ctx->y2 *= -ctx->delta; + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y1.Get(py, n / 2); + ctx->y2.Get(py + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_PEPLinear_L1(Mat A, Vec x, Vec y) +{ + // Apply the linearized operator L₁ = [ I 0 ] + // [ 0 M ] . + PetscFunctionBeginUser; + palace::slepc::SlepcPEPLinearSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x1.Set(px, n / 2); + ctx->x2.Set(px + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->y1 = ctx->x1; + ctx->opM->Mult(ctx->x2, ctx->y2); + ctx->y2 *= ctx->delta * ctx->gamma * ctx->gamma; + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y1.Get(py, n / 2); + ctx->y2.Get(py + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_PEPLinear_B(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcPEPLinearSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x1.Set(px, n / 2); + ctx->x2.Set(px + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real()); + ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag()); + ctx->opB->Mult(ctx->x2.Real(), ctx->y2.Real()); + ctx->opB->Mult(ctx->x2.Imag(), ctx->y2.Imag()); + ctx->y1 *= ctx->delta * ctx->gamma * ctx->gamma; + ctx->y2 *= ctx->delta * ctx->gamma * ctx->gamma; + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y1.Get(py, n / 2); + ctx->y2.Get(py + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y) +{ + // Solve the linear system associated with the generalized eigenvalue problem after + // linearization: y = L₁⁻¹ x, or with the shift-and-invert spectral transformation: + // y = (L₀ - σ L₁)⁻¹ x, with: + // L₀ = [ 0 I ] L₁ = [ I 0 ] + // [ -K -C ] , [ 0 M ] . + // Enforces the divergence-free constraint using the supplied projector. + PetscFunctionBeginUser; + palace::slepc::SlepcPEPLinearSolver *ctx; + PetscCall(PCShellGetContext(pc, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x1.Set(px, n / 2); + ctx->x2.Set(px + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x, &px)); + + if (!ctx->sinvert) + { + ctx->y1 = ctx->x1; + if (ctx->opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + ctx->opProj->Mult(ctx->y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + } + + ctx->opInv->Mult(ctx->x2, ctx->y2); + ctx->y2 *= 1.0 / (ctx->delta * ctx->gamma * ctx->gamma); + if (ctx->opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); + ctx->opProj->Mult(ctx->y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); + } + } + else + { + ctx->y1.AXPBY(-ctx->sigma / (ctx->delta * ctx->gamma), ctx->x2, 0.0); // Temporarily + ctx->opK->AddMult(ctx->x1, ctx->y1, std::complex(1.0, 0.0)); + ctx->opInv->Mult(ctx->y1, ctx->y2); + if (ctx->opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); + ctx->opProj->Mult(ctx->y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); + } + + ctx->y1.AXPBYPCZ(ctx->gamma / ctx->sigma, ctx->y2, -ctx->gamma / ctx->sigma, ctx->x1, + 0.0); + if (ctx->opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + ctx->opProj->Mult(ctx->y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + } + } + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y1.Get(py, n / 2); + ctx->y2.Get(py + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opK->Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_PEP_A1(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opC->Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_PEP_A2(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opM->Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_PEP_B(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opB->Mult(ctx->x.Real(), ctx->y.Real()); + ctx->opB->Mult(ctx->x.Imag(), ctx->y.Imag()); + ctx->y *= ctx->delta * ctx->gamma; + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __pc_apply_PEP(PC pc, Vec x, Vec y) +{ + // Solve the linear system associated with the generalized eigenvalue problem: y = M⁻¹ x, + // or shift-and-invert spectral transformation: y = P(σ)⁻¹ x . Enforces the divergence- + // free constraint using the supplied projector. + PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(PCShellGetContext(pc, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); + + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opInv->Mult(ctx->x, ctx->y); + if (!ctx->sinvert) + { + ctx->y *= 1.0 / (ctx->delta * ctx->gamma * ctx->gamma); + } + else + { + ctx->y *= 1.0 / ctx->delta; + } + if (ctx->opProj) + { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y)); + ctx->opProj->Mult(ctx->y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y)); + } + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + #endif diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp index e1335631e..22d4eb521 100644 --- a/palace/linalg/slepc.hpp +++ b/palace/linalg/slepc.hpp @@ -244,18 +244,24 @@ class SlepcEPSSolverBase : public SlepcEigenSolver // Generalized eigenvalue problem solver: K x = λ M x . class SlepcEPSSolver : public SlepcEPSSolverBase { -private: +public: + using SlepcEigenSolver::delta; + using SlepcEigenSolver::gamma; + using SlepcEigenSolver::opB; + using SlepcEigenSolver::opInv; + using SlepcEigenSolver::opProj; + using SlepcEigenSolver::sigma; + using SlepcEigenSolver::sinvert; + // References to matrices defining the generalized eigenvalue problem (not owned). const ComplexOperator *opK, *opM; - // Operator norms for scaling. - mutable PetscReal normK, normM; - // Workspace vector for operator applications. mutable ComplexVector x, y; - // Configure linear solver for generalized problem or spectral transformation. - void ConfigurePCShell(); +private: + // Operator norms for scaling. + mutable PetscReal normK, normM; protected: PetscReal GetResidualNorm(int i) const override; @@ -275,19 +281,25 @@ class SlepcEPSSolver : public SlepcEPSSolverBase // linearization: L₀ y = λ L₁ y . class SlepcPEPLinearSolver : public SlepcEPSSolverBase { -private: +public: + using SlepcEigenSolver::delta; + using SlepcEigenSolver::gamma; + using SlepcEigenSolver::opB; + using SlepcEigenSolver::opInv; + using SlepcEigenSolver::opProj; + using SlepcEigenSolver::sigma; + using SlepcEigenSolver::sinvert; + // References to matrices defining the quadratic polynomial eigenvalue problem // (not owned). const ComplexOperator *opK, *opC, *opM; - // Operator norms for scaling. - mutable PetscReal normK, normC, normM; - // Workspace vectors for operator applications. mutable ComplexVector x1, x2, y1, y2; - // Configure linear solver for generalized problem or spectral transformation. - void ConfigurePCShell(); +private: + // Operator norms for scaling. + mutable PetscReal normK, normC, normM; protected: PetscReal GetResidualNorm(int i) const override; @@ -366,19 +378,25 @@ class SlepcPEPSolverBase : public SlepcEigenSolver // Quadratic eigenvalue problem solver: P(λ) x = (K + λ C + λ² M) x = 0 . class SlepcPEPSolver : public SlepcPEPSolverBase { -private: +public: + using SlepcEigenSolver::delta; + using SlepcEigenSolver::gamma; + using SlepcEigenSolver::opB; + using SlepcEigenSolver::opInv; + using SlepcEigenSolver::opProj; + using SlepcEigenSolver::sigma; + using SlepcEigenSolver::sinvert; + // References to matrices defining the quadratic polynomial eigenvalue problem // (not owned). const ComplexOperator *opK, *opC, *opM; - // Operator norms for scaling. - mutable PetscReal normK, normC, normM; - // Workspace vector for operator applications. mutable ComplexVector x, y; - // Configure linear solver for generalized problem or spectral transformation. - void ConfigurePCShell(); +private: + // Operator norms for scaling. + mutable PetscReal normK, normC, normM; protected: PetscReal GetResidualNorm(int i) const override; diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index 1db94d765..18e5946bc 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -108,16 +108,17 @@ void StrumpackSolverBase::SetOperator(const Operator &op) { // Convert the input operator to a distributed STRUMPACK matrix (always assume a symmetric // sparsity pattern). Safe to delete the matrix since STRUMPACK copies it on input. - mfem::STRUMPACKRowLocMatrix A(op, true); - - // Set up base class. - StrumpackSolverType::SetOperator(A); -} - -template -void StrumpackSolverBase::SetOperator(const ParOperator &op) -{ - SetOperator(const_cast(&op)->ParallelAssemble()); + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + mfem::STRUMPACKRowLocMatrix A(const_cast(PtAP)->ParallelAssemble()); + StrumpackSolverType::SetOperator(A); + } + else + { + mfem::STRUMPACKRowLocMatrix A(op, true); + StrumpackSolverType::SetOperator(A); + } } template class StrumpackSolverBase; diff --git a/palace/linalg/strumpack.hpp b/palace/linalg/strumpack.hpp index fa1be49c2..f1d17c979 100644 --- a/palace/linalg/strumpack.hpp +++ b/palace/linalg/strumpack.hpp @@ -38,7 +38,6 @@ class StrumpackSolverBase : public StrumpackSolverType } void SetOperator(const Operator &op) override; - void SetOperator(const ParOperator &op); }; using StrumpackSolver = StrumpackSolverBase; diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index f787ca172..5c3e235be 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -80,17 +80,19 @@ void SuperLUSolver::SetOperator(const Operator &op) { solver.SetFact(mfem::superlu::SamePattern_SameRowPerm); } - A = std::make_unique(op); - - // Set up base class. + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + A = std::make_unique( + const_cast(PtAP)->ParallelAssemble()); + } + else + { + A = std::make_unique(op); + } solver.SetOperator(*A); } -void SuperLUSolver::SetOperator(const ParOperator &op) -{ - SetOperator(const_cast(&op)->ParallelAssemble()); -} - } // namespace palace #endif diff --git a/palace/linalg/superlu.hpp b/palace/linalg/superlu.hpp index c4e16799b..74e857423 100644 --- a/palace/linalg/superlu.hpp +++ b/palace/linalg/superlu.hpp @@ -36,7 +36,6 @@ class SuperLUSolver : public mfem::Solver } void SetOperator(const Operator &op) override; - void SetOperator(const ParOperator &op); void Mult(const Vector &x, Vector &y) const override { solver.Mult(x, y); } void ArrayMult(const mfem::Array &X, diff --git a/palace/main.cpp b/palace/main.cpp index 9e400f954..e5b16a9f7 100644 --- a/palace/main.cpp +++ b/palace/main.cpp @@ -129,9 +129,8 @@ int main(int argc, char *argv[]) PrintBanner(world_comm, world_size, num_thread, git_tag); IoData iodata(argv[1], false); - // Initialize Hypre and, optionally, SLEPc. + // Initialize Hypre and, optionally, SLEPc/PETSc. mfem::Hypre::Init(); - // petsc::Initialize(argc, argv, nullptr, nullptr); //XX TODO REMOVE... #if defined(PALACE_WITH_SLEPC) slepc::Initialize(argc, argv, nullptr, nullptr); #endif @@ -185,11 +184,10 @@ int main(int argc, char *argv[]) solver->SaveMetadata(timer); Mpi::Print(world_comm, "\n"); - // Finalize PETSc. + // Finalize SLEPc/PETSc. #if defined(PALACE_WITH_SLEPC) slepc::Finalize(); #endif - // petsc::Finalize(); //XX TODO REMOVE return 0; } diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index e81fea645..994bf8bcd 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -71,22 +71,22 @@ CurlCurlOperator::CurlCurlOperator(const IoData &iodata, const std::vector> &mesh) : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL : mfem::AssemblyLevel::LEGACY), - skip_zeros(0), pc_gmg(iodata.solver.linear.mat_gmg), print_hdr(true), + skip_zeros(0), pc_mg(iodata.solver.linear.pc_mg), print_hdr(true), dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), nd_fecs(utils::ConstructFECollections( - pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), + pc_mg, false, iodata.solver.order, mesh.back()->Dimension())), h1_fecs(utils::ConstructFECollections( - pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), + pc_mg, false, iodata.solver.order, mesh.back()->Dimension())), rt_fec(iodata.solver.order - 1, mesh.back()->Dimension()), - nd_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, nd_fecs, &dbc_marker, &dbc_tdof_lists) - : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *nd_fecs.back(), &dbc_marker, - &dbc_tdof_lists.emplace_back())), - h1_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, h1_fecs) - : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *h1_fecs.back())), + nd_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, nd_fecs, &dbc_marker, &dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *nd_fecs.back(), &dbc_marker, + &dbc_tdof_lists.emplace_back())), + h1_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, h1_fecs) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *h1_fecs.back())), rt_fespace(mesh.back().get(), &rt_fec), mat_op(iodata, *mesh.back()), surf_j_op(iodata, GetH1Space()) { @@ -117,7 +117,7 @@ void CurlCurlOperator::GetStiffnessMatrix(std::vector> &mesh) : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL : mfem::AssemblyLevel::LEGACY), - skip_zeros(0), pc_gmg(iodata.solver.linear.mat_gmg), print_hdr(true), + skip_zeros(0), pc_mg(iodata.solver.linear.pc_mg), print_hdr(true), dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), h1_fecs(utils::ConstructFECollections( - pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), + pc_mg, false, iodata.solver.order, mesh.back()->Dimension())), nd_fec(iodata.solver.order, mesh.back()->Dimension()), - h1_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, h1_fecs, &dbc_marker, &dbc_tdof_lists) - : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *h1_fecs.back(), &dbc_marker, - &dbc_tdof_lists.emplace_back())), + h1_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, h1_fecs, &dbc_marker, &dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *h1_fecs.back(), &dbc_marker, + &dbc_tdof_lists.emplace_back())), nd_fespace(mesh.back().get(), &nd_fec), mat_op(iodata, *mesh.back()), source_attr_lists(ConstructSources(iodata)) { @@ -140,7 +140,7 @@ void LaplaceOperator::GetStiffnessMatrix(std::vector(std::move(k), h1_fespace_l, h1_fespace_l)); K.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); } + // Save local (uneliminated) operator after parallel assembly for RHS BC elimination. + K.back()->SaveLocalOperator(); print_hdr = false; } diff --git a/palace/models/laplaceoperator.hpp b/palace/models/laplaceoperator.hpp index c1cfb1df7..f1f8f0ac8 100644 --- a/palace/models/laplaceoperator.hpp +++ b/palace/models/laplaceoperator.hpp @@ -25,7 +25,7 @@ class LaplaceOperator private: const mfem::AssemblyLevel assembly_level; // Use full or partial assembly for operators const int skip_zeros; // Skip zeros during full assembly of operators - const bool pc_gmg; // Use geometric multigrid in preconditioning + const bool pc_mg; // Use geometric multigrid in preconditioning // Helper variable for log file printing. bool print_hdr; diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 6009ab017..941413da0 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -106,25 +106,25 @@ SpaceOperator::SpaceOperator(const IoData &iodata, const std::vector> &mesh) : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL : mfem::AssemblyLevel::LEGACY), - skip_zeros(0), pc_gmg(iodata.solver.linear.mat_gmg), - pc_lor(iodata.solver.linear.mat_lor), pc_shifted(iodata.solver.linear.mat_shifted), - print_hdr(true), print_prec_hdr(true), + skip_zeros(0), pc_mg(iodata.solver.linear.pc_mg), + pc_lor(iodata.solver.linear.pc_mat_lor), + pc_shifted(iodata.solver.linear.pc_mat_shifted), print_hdr(true), print_prec_hdr(true), dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), nd_fecs(utils::ConstructFECollections( - pc_gmg, pc_lor, iodata.solver.order, mesh.back()->Dimension())), + pc_mg, pc_lor, iodata.solver.order, mesh.back()->Dimension())), h1_fecs(utils::ConstructFECollections( - pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), + pc_mg, false, iodata.solver.order, mesh.back()->Dimension())), rt_fec(iodata.solver.order - 1, mesh.back()->Dimension()), - nd_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, nd_fecs, &dbc_marker, &nd_dbc_tdof_lists) - : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *nd_fecs.back(), &dbc_marker, - &nd_dbc_tdof_lists.emplace_back())), - h1_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, h1_fecs, &dbc_marker, &h1_dbc_tdof_lists) - : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *h1_fecs.back(), &dbc_marker, - &h1_dbc_tdof_lists.emplace_back())), + nd_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, nd_fecs, &dbc_marker, &nd_dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *nd_fecs.back(), &dbc_marker, + &nd_dbc_tdof_lists.emplace_back())), + h1_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, h1_fecs, &dbc_marker, &h1_dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *h1_fecs.back(), &dbc_marker, + &h1_dbc_tdof_lists.emplace_back())), rt_fespace(mesh.back().get(), &rt_fec), mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()), surf_sigma_op(iodata, *mesh.back()), surf_z_op(iodata, *mesh.back()), lumped_port_op(iodata, GetH1Space()), @@ -211,7 +211,7 @@ SpaceOperator::GetSystemMatrix(SpaceOperator::OperatorType type, if (print_hdr) { Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" - " ND: {:d}\n H1: {:d}\n RT: {:d}\n", + " ND: {:d}, H1: {:d}, RT: {:d}\n", GetNDSpace().GlobalTrueVSize(), GetH1Space().GlobalTrueVSize(), GetRTSpace().GlobalTrueVSize()); print_hdr = false; @@ -254,7 +254,7 @@ SpaceOperator::GetComplexSystemMatrix(SpaceOperator::OperatorType type, double o if (print_hdr) { Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" - " ND: {:d}\n H1: {:d}\n RT: {:d}\n", + " ND: {:d}, H1: {:d}, RT: {:d}\n", GetNDSpace().GlobalTrueVSize(), GetH1Space().GlobalTrueVSize(), GetRTSpace().GlobalTrueVSize()); print_hdr = false; @@ -326,18 +326,18 @@ std::unique_ptr SpaceOperator::GetSystemMatrix(double a0, double a1 int height = -1, width = -1; if (K) { - height = K->Height(); - width = K->Width(); + height = K->LocalOperator().Height(); + width = K->LocalOperator().Width(); } else if (C) { - height = C->Height(); - width = C->Width(); + height = C->LocalOperator().Height(); + width = C->LocalOperator().Width(); } else if (M) { - height = M->Height(); - width = M->Width(); + height = M->LocalOperator().Height(); + width = M->LocalOperator().Width(); } MFEM_VERIFY(height >= 0 && width >= 0, "At least one argument to GetSystemMatrix must not be empty!"); @@ -367,23 +367,23 @@ std::unique_ptr SpaceOperator::GetComplexSystemMatrix( int height = -1, width = -1; if (K) { - height = K->Height(); - width = K->Width(); + height = K->LocalOperator().Height(); + width = K->LocalOperator().Width(); } else if (C) { - height = C->Height(); - width = C->Width(); + height = C->LocalOperator().Height(); + width = C->LocalOperator().Width(); } else if (M) { - height = M->Height(); - width = M->Width(); + height = M->LocalOperator().Height(); + width = M->LocalOperator().Width(); } else if (A2) { - height = A2->Height(); - width = A2->Width(); + height = A2->LocalOperator().Height(); + width = A2->LocalOperator().Width(); } MFEM_VERIFY(height >= 0 && width >= 0, "At least one argument to GetSystemMatrix must not be empty!"); @@ -478,7 +478,7 @@ void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, dou { HYPRE_BigInt nnz = b->SpMat().NumNonZeroElems(); Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); - Mpi::Print("{:d} NNZ\n", nnz); + Mpi::Print(", {:d} NNZ\n", nnz); } else { diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp index 9da54232a..b8774037e 100644 --- a/palace/models/spaceoperator.hpp +++ b/palace/models/spaceoperator.hpp @@ -33,7 +33,7 @@ class SpaceOperator private: const mfem::AssemblyLevel assembly_level; // Use full or partial assembly for operators const int skip_zeros; // Skip zeros during full assembly of operators - const bool pc_gmg; // Use geometric multigrid in preconditioning + const bool pc_mg; // Use geometric multigrid in preconditioning const bool pc_lor; // Use low-order refined (LOR) space for the preconditioner const bool pc_shifted; // Use shifted mass matrix for the preconditioner diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index e3f5d06f7..423d757bc 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -63,7 +63,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera { // PCG with a simple Jacobi preconditioner for mass matrix systems. auto pcg = std::make_unique(M->GetComm()); - pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; + pcg->iterative_mode = iodata.solver.linear.initial_guess; pcg->SetRelTol(iodata.solver.linear.tol); pcg->SetMaxIter(iodata.solver.linear.max_it); pcg->SetPrintLevel(0); diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index 3c7f1609f..c93bf0090 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -887,7 +887,7 @@ void WavePortOperator::Initialize(double omega) if (first) { Mpi::Print(" Number of global unknowns for port {:d}:\n" - " ND: {:d}\n H1: {:d}\n", + " ND: {:d}, H1: {:d}\n", data.GlobalTrueNDSize(), data.GlobalTrueH1Size()); } double k0 = 1.0 / iodata.DimensionalizeValue(IoData::ValueType::LENGTH, 1.0); diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp index 2c52a41be..514f5e92e 100644 --- a/palace/utils/configfile.cpp +++ b/palace/utils/configfile.cpp @@ -1549,23 +1549,22 @@ void LinearSolverData::SetUp(json &solver) tol = linear->value("Tol", tol); max_it = linear->value("MaxIts", max_it); max_size = linear->value("MaxSize", max_size); - orthog_mgs = linear->value("UseMGS", orthog_mgs); - orthog_cgs2 = linear->value("UseCGS2", orthog_cgs2); - ksp_initial_guess = linear->value("UseInitialGuess", ksp_initial_guess); - ksp_piped = linear->value("UseKSPPiped", ksp_piped); + initial_guess = linear->value("UseInitialGuess", initial_guess); // Preconditioner-specific options - mat_pa = linear->value("UsePA", mat_pa); - mat_gmg = linear->value("UseGMG", mat_gmg); - mat_lor = linear->value("UseLOR", mat_lor); - mat_shifted = linear->value("UsePCShifted", mat_shifted); + mat_pa = linear->value("UsePartialAssembly", mat_pa); + pc_mat_lor = linear->value("UseLowOrderRefined", pc_mat_lor); + pc_mat_shifted = linear->value("UsePCMatShifted", pc_mat_shifted); + pc_side_type = linear->value("PCSide", pc_side_type); + MFEM_VERIFY(pc_side_type != LinearSolverData::SideType::INVALID, + "Invalid value for config[\"Linear\"][\"PCSide\"] in configuration file!"); + + pc_mg = linear->value("UseMultigrid", pc_mg); + mg_smooth_aux = linear->value("MGAuxiliarySpaceSmoother", mg_smooth_aux); mg_cycle_it = linear->value("MGCycleIts", mg_cycle_it); mg_smooth_it = linear->value("MGSmoothIts", mg_smooth_it); mg_smooth_order = linear->value("MGSmoothOrder", mg_smooth_order); - pc_side_type = linear->value("PrecondSide", pc_side_type); - MFEM_VERIFY( - pc_side_type != LinearSolverData::SideType::INVALID, - "Invalid value for config[\"Linear\"][\"PrecondSide\"] in configuration file!"); + sym_fact_type = linear->value("Reordering", sym_fact_type); MFEM_VERIFY( sym_fact_type != LinearSolverData::SymFactType::INVALID, @@ -1580,28 +1579,31 @@ void LinearSolverData::SetUp(json &solver) linear->value("STRUMPACKLossyPrecision", strumpack_lossy_precision); strumpack_butterfly_l = linear->value("STRUMPACKButterflyLevels", strumpack_butterfly_l); superlu_3d = linear->value("SuperLU3D", superlu_3d); + ams_vector = linear->value("AMSVector", ams_vector); + divfree_tol = linear->value("DivFreeTol", divfree_tol); divfree_max_it = linear->value("DivFreeMaxIts", divfree_max_it); + orthog_mgs = linear->value("OrthogUseMGS", orthog_mgs); + orthog_cgs2 = linear->value("OrthogUseCGS2", orthog_cgs2); + // Cleanup linear->erase("Type"); linear->erase("KSPType"); linear->erase("Tol"); linear->erase("MaxIts"); linear->erase("MaxSize"); - linear->erase("UseMGS"); - linear->erase("UseCGS2"); linear->erase("UseInitialGuess"); - linear->erase("UseKSPPiped"); - linear->erase("UsePA"); - linear->erase("UseGMG"); - linear->erase("UseLOR"); - linear->erase("UsePCShifted"); + linear->erase("UsePartialAssembly"); + linear->erase("UseLowOrderRefined"); + linear->erase("UsePCMatShifted"); + linear->erase("PCSide"); + linear->erase("UseMultigrid"); + linear->erase("MGAuxiliarySmoother"); linear->erase("MGCycleIts"); linear->erase("MGSmoothIts"); linear->erase("MGSmoothOrder"); - linear->erase("PrecondSide"); linear->erase("Reordering"); linear->erase("STRUMPACKCompressionType"); linear->erase("STRUMPACKCompressionTol"); @@ -1611,6 +1613,8 @@ void LinearSolverData::SetUp(json &solver) linear->erase("AMSVector"); linear->erase("DivFreeTol"); linear->erase("DivFreeMaxIts"); + linear->erase("OrthogUseMGS"); + linear->erase("OrthogUseCGS2"); MFEM_VERIFY(linear->empty(), "Found an unsupported configuration file keyword under \"Linear\"!\n" << linear->dump(2)); @@ -1621,18 +1625,16 @@ void LinearSolverData::SetUp(json &solver) // std::cout << "Tol: " << tol << '\n'; // std::cout << "MaxIts: " << max_it << '\n'; // std::cout << "MaxSize: " << max_size << '\n'; - // std::cout << "UseMGS: " << orthog_mgs << '\n'; - // std::cout << "UseCGS2: " << orthog_cgs2 << '\n'; - // std::cout << "UseInitialGuess: " << ksp_initial_guess << '\n'; - // std::cout << "UseKSPPiped: " << ksp_piped << '\n'; - // std::cout << "UsePA: " << mat_pa << '\n'; - // std::cout << "UseGMG: " << mat_gmg << '\n'; - // std::cout << "UseLOR: " << mat_lor << '\n'; - // std::cout << "UsePCShifted: " << mat_shifted << '\n'; + // std::cout << "UseInitialGuess: " << initial_guess << '\n'; + // std::cout << "UsePartialAssembly: " << mat_pa << '\n'; + // std::cout << "UseLowOrderRefined: " << pc_mat_lor << '\n'; + // std::cout << "UsePCMatShifted: " << pc_mat_shifted << '\n'; + // std::cout << "PCSide: " << pc_side_type << '\n'; + // std::cout << "UseMultigrid: " << pc_mg << '\n'; + // std::cout << "MGAuxiliarySmoother: " << mg_smooth_aux << '\n'; // std::cout << "MGCycleIts: " << mg_cycle_it << '\n'; // std::cout << "MGSmoothIts: " << mg_smooth_it << '\n'; // std::cout << "MGSmoothOrder: " << mg_smooth_order << '\n'; - // std::cout << "PrecondSide: " << pc_side_type << '\n'; // std::cout << "Reordering: " << sym_fact_type << '\n'; // std::cout << "STRUMPACKCompressionType: " << strumpack_compression_type << '\n'; // std::cout << "STRUMPACKCompressionTol: " << strumpack_lr_tol << '\n'; @@ -1642,6 +1644,8 @@ void LinearSolverData::SetUp(json &solver) // std::cout << "AMSVector: " << ams_vector << '\n'; // std::cout << "DivFreeTol: " << divfree_tol << '\n'; // std::cout << "DivFreeMaxIts: " << divfree_max_it << '\n'; + // std::cout << "OrthogUseMGS: " << orthog_mgs << '\n'; + // std::cout << "OrthogUseCGS2: " << orthog_cgs2 << '\n'; } void SolverData::SetUp(json &config) diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp index b6e1e0685..5f9f58671 100644 --- a/palace/utils/configfile.hpp +++ b/palace/utils/configfile.hpp @@ -697,10 +697,6 @@ struct TransientSolverData struct LinearSolverData { - - // XX TODO REVISIT AVAILABLE OPTIONS FOR KSP AFTER HYPRE SWITCH... (ALSO ADD "DEFAULT") - // XX TODO REVISIT OPTIONS FOR PA AND KEYWORDS... "GMG" "PA" CAN DO BETTER (RATEL?) - public: // Solver type. enum class Type @@ -738,31 +734,36 @@ struct LinearSolverData // Maximum Krylov space dimension for GMRES/FGMRES iterative solvers. int max_size = -1; - // Enable modified Gram-Schmidt orthogonalization instead of classical for GMRES/FGMRES - // Krylov solvers and SLEPc eigenvalue solver. - bool orthog_mgs = false; - bool orthog_cgs2 = false; - // Reuse previous solution as initial guess for Krylov solvers. - int ksp_initial_guess = -1; - - // Enable pipelined Krylov solver variants to reduce blocking communications. - bool ksp_piped = false; // XX TODO REMOVE.... + int initial_guess = -1; // Enable partial assembly for operators. bool mat_pa = false; - // Enable hp-geometric multigrid coarsening, using the solver specified by the type member - // at the coarsest level. - bool mat_gmg = true; - // Enable low-order refined (LOR) preconditioner construction. Only available for meshes - // based on tensor elements. - bool mat_lor = false; + // based on tensor-product elements. + bool pc_mat_lor = false; // For frequency domain applications, precondition linear systems with a shifted matrix // (makes the preconditoner matrix SPD). - int mat_shifted = -1; + int pc_mat_shifted = -1; + + // Choose left or right preconditioning. + enum class SideType + { + RIGHT, + LEFT, + DEFAULT, + INVALID = -1 + }; + SideType pc_side_type = SideType::DEFAULT; + + // Enable hp-geometric multigrid coarsening, using the solver specified by the type member + // at the coarsest level. + bool pc_mg = true; + + // Use auxiliary space smoothers on geometric multigrid levels + int mg_smooth_aux = -1; // Number of iterations for preconditioners which support it. For multigrid, this is the // number of V-cycles per Krylov solver iteration. @@ -775,17 +776,7 @@ struct LinearSolverData // Order of polynomial smoothing for geometric multigrid. int mg_smooth_order = 4; - // Choose left or right preconditioning. - enum class SideType - { - RIGHT, - LEFT, - DEFAULT, - INVALID = -1 - }; - SideType pc_side_type = SideType::DEFAULT; // XX TODO REMOVE... - - // Choose left or right preconditioning. + // Specify details for symbolic factorization used by sparse direct solvers. enum class SymFactType { METIS, @@ -798,7 +789,7 @@ struct LinearSolverData SymFactType sym_fact_type = SymFactType::DEFAULT; // Low-rank and butterfly compression parameters for sparse direct solvers which support - // it. + // it (mainly STRUMPACK). enum class CompressionType { NONE, @@ -827,6 +818,11 @@ struct LinearSolverData // Maximum number of iterations for solving linear systems in divergence-free projector. int divfree_max_it = 100; + // Enable modified Gram-Schmidt orthogonalization instead of classical for GMRES/FGMRES + // Krylov solvers and SLEPc eigenvalue solver. + bool orthog_mgs = false; + bool orthog_cgs2 = false; + void SetUp(json &solver); }; diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp index 1f081de98..9032b97a6 100644 --- a/palace/utils/geodata.cpp +++ b/palace/utils/geodata.cpp @@ -152,7 +152,7 @@ void RefineMesh(const IoData &iodata, std::vector max_region_ref_levels = sphere.ref_levels; } } - if (iodata.solver.linear.mat_gmg) + if (iodata.solver.linear.pc_mg) { mesh.reserve(1 + uniform_ref_levels + max_region_ref_levels); } diff --git a/palace/utils/iodata.cpp b/palace/utils/iodata.cpp index d766df879..74a84face 100644 --- a/palace/utils/iodata.cpp +++ b/palace/utils/iodata.cpp @@ -332,24 +332,26 @@ void IoData::CheckConfiguration() { solver.linear.max_size = solver.linear.max_it; } - if (solver.linear.ksp_initial_guess < 0) + if (solver.linear.initial_guess < 0) { if ((problem.type == config::ProblemData::Type::DRIVEN && solver.driven.adaptive_tol <= 0.0) || - problem.type == config::ProblemData::Type::TRANSIENT) + problem.type == config::ProblemData::Type::TRANSIENT || + problem.type == config::ProblemData::Type::ELECTROSTATIC || + problem.type == config::ProblemData::Type::MAGNETOSTATIC) { - // Default true only driven simulations without adaptive frequency sweep, or transient - // simulations. - solver.linear.ksp_initial_guess = 1; + // Default true only driven simulations without adaptive frequency sweep, transient + // simulations, or electrostatic or magnetostatics. + solver.linear.initial_guess = 1; } else { - solver.linear.ksp_initial_guess = 0; + solver.linear.initial_guess = 0; } } - if (solver.linear.mat_shifted < 0) + if (solver.linear.pc_mat_shifted < 0) { - solver.linear.mat_shifted = 0; // Default false for most cases + solver.linear.pc_mat_shifted = 0; // Default false for most cases if (problem.type == config::ProblemData::Type::DRIVEN) { #if defined(MFEM_USE_SUPERLU) || defined(MFEM_USE_STRUMPACK) || defined(MFEM_USE_MUMPS) @@ -360,10 +362,24 @@ void IoData::CheckConfiguration() #endif { // Default true only driven simulations using AMS. - solver.linear.mat_shifted = 1; + solver.linear.pc_mat_shifted = 1; } } } + if (solver.linear.mg_smooth_aux < 0) + { + if (problem.type == config::ProblemData::Type::ELECTROSTATIC || + problem.type == config::ProblemData::Type::MAGNETOSTATIC) + { + // Disable auxiliary space smoothing using distributive relaxation by default for + // problems which don't need it. + solver.linear.mg_smooth_aux = 0; + } + else + { + solver.linear.mg_smooth_aux = 1; + } + } } namespace From 467edb52fb8aa81b535e665a234c086a248237e5 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Wed, 17 May 2023 20:30:23 -0700 Subject: [PATCH 08/41] Debugging: Magnetostatics and transient simulations, with bug fixes for parallel assembly and system matrix construction, as well as AMS upgrades --- palace/drivers/drivensolver.cpp | 4 +-- palace/drivers/eigensolver.cpp | 4 +-- palace/drivers/transientsolver.cpp | 1 + palace/linalg/ams.cpp | 57 ++++++++++++++---------------- palace/linalg/ams.hpp | 6 ++-- palace/linalg/operator.cpp | 41 +++++++++++---------- palace/linalg/operator.hpp | 13 +++++-- palace/models/spaceoperator.cpp | 4 +++ palace/models/timeoperator.cpp | 8 ++--- palace/utils/configfile.cpp | 2 +- 10 files changed, 72 insertions(+), 68 deletions(-) diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index 9b209eea7..e9f527bb2 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -117,10 +117,10 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in // KspSolver::SetOperators). Compute everything at the first frequency step. std::unique_ptr K = spaceop.GetComplexSystemMatrix( SpaceOperator::OperatorType::STIFFNESS, Operator::DIAG_ONE); - std::unique_ptr M = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::MASS, Operator::DIAG_ZERO); std::unique_ptr C = spaceop.GetComplexSystemMatrix( SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); + std::unique_ptr M = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::MASS, Operator::DIAG_ZERO); std::unique_ptr A2 = spaceop.GetComplexSystemMatrix( SpaceOperator::OperatorType::EXTRA, omega0, Operator::DIAG_ZERO); std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 72f5c33af..3d070da76 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -33,10 +33,10 @@ void EigenSolver::Solve(std::vector> &mesh, SpaceOperator spaceop(iodata, mesh); std::unique_ptr K = spaceop.GetComplexSystemMatrix( SpaceOperator::OperatorType::STIFFNESS, Operator::DIAG_ONE); - std::unique_ptr M = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::MASS, Operator::DIAG_ZERO); std::unique_ptr C = spaceop.GetComplexSystemMatrix( SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); + std::unique_ptr M = spaceop.GetComplexSystemMatrix( + SpaceOperator::OperatorType::MASS, Operator::DIAG_ZERO); std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); SaveMetadata(spaceop.GetNDSpace()); diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp index 6a33e93dc..bcd821b51 100644 --- a/palace/drivers/transientsolver.cpp +++ b/palace/drivers/transientsolver.cpp @@ -89,6 +89,7 @@ void TransientSolver::Solve(std::vector> &mesh, // Single time step t => t + dt. if (step == 0) { + Mpi::Print("\n"); t += delta_t; timeop.Init(); // Initial conditions } diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 1bc31ec23..e8f408eca 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -54,7 +54,8 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_ grad->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); grad->Assemble(); grad->Finalize(); - G = std::make_unique(std::move(grad), h1_fespace, nd_fespace, true); + ParOperator RAP_G(std::move(grad), h1_fespace, nd_fespace, true); + G = RAP_G.StealParallelAssemble(); } // Vertex coordinates for the lowest order case, or Nedelec interpolation matrix or @@ -109,22 +110,23 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_ } else { - // XX TODO: Partial assembly option? - h1d_fespace = std::make_unique( - &mesh, h1_fespace.FEColl(), space_dim, mfem::Ordering::byVDIM); - auto pi = - std::make_unique(h1d_fespace.get(), &nd_fespace); - pi->AddDomainInterpolator(new mfem::IdentityInterpolator); - pi->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); - pi->Assemble(); - pi->Finalize(); - Pi = std::make_unique(std::move(pi), *h1d_fespace, nd_fespace, true); + { + // XX TODO: Partial assembly option? + mfem::ParFiniteElementSpace h1d_fespace(&mesh, h1_fespace.FEColl(), space_dim, + mfem::Ordering::byVDIM); + auto pi = std::make_unique(&h1d_fespace, &nd_fespace); + pi->AddDomainInterpolator(new mfem::IdentityInterpolator); + pi->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + pi->Assemble(); + pi->Finalize(); + ParOperator RAP_Pi(std::move(pi), h1d_fespace, nd_fespace, true); + Pi = RAP_Pi.StealParallelAssemble(); + } if (cycle_type >= 10) { // Get blocks of Pi corresponding to each component, and free Pi. - mfem::Array2D Pi_blocks(1, h1d_fespace->GetVDim()); - Pi->ParallelAssemble().GetBlocks( - Pi_blocks, false, h1d_fespace->GetOrdering() == mfem::Ordering::byVDIM); + mfem::Array2D Pi_blocks(1, space_dim); + Pi->GetBlocks(Pi_blocks, false, true); Pix.reset(Pi_blocks(0, 0)); if (space_dim > 1) { @@ -179,24 +181,19 @@ void HypreAmsSolver::InitializeSolver() // HYPRE_AMSSetBetaAMGCoarseRelaxType(ams, coarse_relax_type); // Set the discrete gradient matrix. - HYPRE_AMSSetDiscreteGradient(ams, G->ParallelAssemble()); + HYPRE_AMSSetDiscreteGradient(ams, (HYPRE_ParCSRMatrix)*G); // Set the mesh vertex coordinates or Nedelec interpolation matrix or matrices. - if (x) - { - HYPRE_ParVector HY_X = (x) ? (HYPRE_ParVector)*x : nullptr; - HYPRE_ParVector HY_Y = (y) ? (HYPRE_ParVector)*y : nullptr; - HYPRE_ParVector HY_Z = (z) ? (HYPRE_ParVector)*z : nullptr; - HYPRE_AMSSetCoordinateVectors(ams, HY_X, HY_Y, HY_Z); - } - else - { - HYPRE_ParCSRMatrix HY_Pi = (Pi) ? (HYPRE_ParCSRMatrix)Pi->ParallelAssemble() : nullptr; - HYPRE_ParCSRMatrix HY_Pix = (Pix) ? (HYPRE_ParCSRMatrix)*Pix : nullptr; - HYPRE_ParCSRMatrix HY_Piy = (Piy) ? (HYPRE_ParCSRMatrix)*Piy : nullptr; - HYPRE_ParCSRMatrix HY_Piz = (Piz) ? (HYPRE_ParCSRMatrix)*Piz : nullptr; - HYPRE_AMSSetInterpolations(ams, HY_Pi, HY_Pix, HY_Piy, HY_Piz); - } + HYPRE_ParVector HY_X = (x) ? (HYPRE_ParVector)*x : nullptr; + HYPRE_ParVector HY_Y = (y) ? (HYPRE_ParVector)*y : nullptr; + HYPRE_ParVector HY_Z = (z) ? (HYPRE_ParVector)*z : nullptr; + HYPRE_AMSSetCoordinateVectors(ams, HY_X, HY_Y, HY_Z); + + HYPRE_ParCSRMatrix HY_Pi = (Pi) ? (HYPRE_ParCSRMatrix)*Pi : nullptr; + HYPRE_ParCSRMatrix HY_Pix = (Pix) ? (HYPRE_ParCSRMatrix)*Pix : nullptr; + HYPRE_ParCSRMatrix HY_Piy = (Piy) ? (HYPRE_ParCSRMatrix)*Piy : nullptr; + HYPRE_ParCSRMatrix HY_Piz = (Piz) ? (HYPRE_ParCSRMatrix)*Piz : nullptr; + HYPRE_AMSSetInterpolations(ams, HY_Pi, HY_Pix, HY_Piy, HY_Piz); } void HypreAmsSolver::SetOperator(const Operator &op) diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp index fe8ab33c2..8e9810b69 100644 --- a/palace/linalg/ams.hpp +++ b/palace/linalg/ams.hpp @@ -29,13 +29,11 @@ class HypreAmsSolver : public mfem::HypreSolver const int print; // Discrete gradient matrix. - std::unique_ptr G; + std::unique_ptr G; // Nedelec interpolation matrix and its components, or, for p = 1, the mesh vertex // coordinates. - std::unique_ptr h1d_fespace; - std::unique_ptr Pi; - std::unique_ptr Pix, Piy, Piz; + std::unique_ptr Pi, Pix, Piy, Piz; std::unique_ptr x, y, z; // Helper function to set up the auxiliary objects required by the AMS solver. diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index 6f7046703..3b83397a0 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -58,14 +58,16 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const } // Apply the unconstrained operator. - std::unique_ptr b_RAP_ = std::move(RAP_); - const mfem::Array *b_trial_dbc_tdof_list_ = trial_dbc_tdof_list_; - const mfem::Array *b_test_dbc_tdof_list_ = test_dbc_tdof_list_; - trial_dbc_tdof_list_ = test_dbc_tdof_list_ = nullptr; - AddMult(tx_, b, -1.0); - RAP_ = std::move(b_RAP_); - trial_dbc_tdof_list_ = b_trial_dbc_tdof_list_; - test_dbc_tdof_list_ = b_test_dbc_tdof_list_; + trial_fespace_.GetProlongationMatrix()->Mult(tx_, lx_); + A_->Mult(lx_, ly_); + if (!use_R_) + { + test_fespace_.GetProlongationMatrix()->AddMultTranspose(ly_, b, -1.0); + } + else + { + test_fespace_.GetRestrictionMatrix()->AddMult(ly_, b, -1.0); + } { if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) @@ -190,9 +192,8 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble() "BilinearForm!"); } #else - MFEM_VERIFY( - bfA->HasSpMat(), - "Missing assembled SparseMatrix for parallel assembly of BilinearForm!"); + MFEM_VERIFY(bfA->HasSpMat(), + "Missing assembled SparseMatrix for parallel assembly of BilinearForm!"); lA = &bfA->SpMat(); #endif } @@ -210,8 +211,7 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble() new mfem::HypreParMatrix(trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), trial_fespace_.GetDofOffsets(), lA); const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); - RAP_ = - std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); + RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); delete hA; if (own_lA) { @@ -257,23 +257,22 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble() lA = nullptr; } mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( - trial_fespace_.GetComm(), test_fespace_.GlobalVSize(), - trial_fespace_.GlobalVSize(), test_fespace_.GetDofOffsets(), - trial_fespace_.GetDofOffsets(), lA); + trial_fespace_.GetComm(), test_fespace_.GlobalVSize(), trial_fespace_.GlobalVSize(), + test_fespace_.GetDofOffsets(), trial_fespace_.GetDofOffsets(), lA); const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); if (!use_R_) { const mfem::HypreParMatrix *Rt = test_fespace_.Dof_TrueDof_Matrix(); - RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), - true); + RAP_ = + std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), true); } else { mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace_.GetRestrictionMatrix()); mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( - trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), - trial_fespace_.GlobalTrueVSize(), trial_fespace_.GetDofOffsets(), - trial_fespace_.GetTrueDofOffsets(), sRt); + test_fespace_.GetComm(), test_fespace_.GlobalVSize(), + test_fespace_.GlobalTrueVSize(), test_fespace_.GetDofOffsets(), + test_fespace_.GetTrueDofOffsets(), sRt); RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), true); delete sRt; diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index 8dec4ccc9..cf35350c7 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -31,7 +31,7 @@ class ParOperator : public Operator const bool use_R_; // Lists of constrained essential boundary true dofs for elimination. - mutable const mfem::Array *trial_dbc_tdof_list_, *test_dbc_tdof_list_; + const mfem::Array *trial_dbc_tdof_list_, *test_dbc_tdof_list_; // Diagonal policy for constrained true dofs. DiagonalPolicy diag_policy_; @@ -39,7 +39,7 @@ class ParOperator : public Operator // Assembled operator as a parallel Hypre matrix. If the save flag is true, calls to // ParallelAssemble will not delete the local operator. This is useful for later on calls // to EliminateRHS, for example. - mutable std::unique_ptr RAP_; + std::unique_ptr RAP_; bool save_A_; // Temporary storage for operator application. @@ -107,6 +107,15 @@ class ParOperator : public Operator // with the local operator. mfem::HypreParMatrix &ParallelAssemble(); + // Steal the assembled parallel sparse matrix. The local operator is saved so that this + // object still can perform operations after this is called. + std::unique_ptr StealParallelAssemble() + { + SaveLocalOperator(); + ParallelAssemble(); + return std::move(RAP_); + } + // Get the associated MPI communicator. MPI_Comm GetComm() const { return trial_fespace_.GetComm(); } diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 941413da0..466a0630b 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -226,6 +226,7 @@ SpaceOperator::GetSystemMatrix(SpaceOperator::OperatorType type, break; case OperatorType::DAMPING: AddDampingCoefficients(1.0, f, fb); + break; case OperatorType::MASS: AddRealMassCoefficients(1.0, f, fb); break; @@ -434,8 +435,11 @@ void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, dou SumCoefficient dfb; AddStiffnessCoefficients(a0, df, f, fb); AddDampingCoefficients(a1, f, fb); + // XX TODO: Test out difference of |Mr + i Mi| vs. Mr + Mi AddRealMassCoefficients( pc_shifted ? std::abs(a2) : a2, f, fb); + // AddRealMassCoefficients(pc_shifted ? std::abs(a2) : a2, f, fb); + // AddImagMassCoefficients(a2, f, fb); AddExtraSystemBdrCoefficients(a3, dfb, dfb, fb, fb); auto b = std::make_unique(&fespace_l); if (s == 0) diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index 423d757bc..ff37ab041 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -51,8 +51,8 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // required on the RHS. Diagonal entries are set in M (so M is non-singular). K = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::STIFFNESS, Operator::DIAG_ZERO); - M = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::MASS, Operator::DIAG_ONE); C = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); + M = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::MASS, Operator::DIAG_ONE); // Set up RHS vector for the current source term: -g'(t) J, where g(t) handles the time // dependence. @@ -106,7 +106,6 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera void Mult(const Vector &u, const Vector &du, Vector &ddu) const override { // Solve: M ddu = -(K u + C du) - g'(t) J. - Mpi::Print("\n"); if (kspM->NumTotalMult() == 0) { // Operators have already been set in constructor. @@ -122,10 +121,6 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Solve: (a0 K + a1 C + M) k = -(K u + C du) - g'(t) J, where a0 may be 0 in the // explicit case. At first iteration, construct the solver. Also don't print a newline // if already done by the mass matrix solve at the first iteration. - if (kspA && kspA->NumTotalMult() > 0) - { - Mpi::Print("\n"); - } if (!kspA || a0 != a0_ || a1 != a1_) { // Configure the linear solver, including the system matrix and also the matrix @@ -135,6 +130,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera a1_ = a1; k = 0.0; } + Mpi::Print("\n"); FormRHS(u, du, RHS); kspA->Mult(RHS, k); } diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp index 514f5e92e..e6eb8fd44 100644 --- a/palace/utils/configfile.cpp +++ b/palace/utils/configfile.cpp @@ -1560,7 +1560,7 @@ void LinearSolverData::SetUp(json &solver) "Invalid value for config[\"Linear\"][\"PCSide\"] in configuration file!"); pc_mg = linear->value("UseMultigrid", pc_mg); - mg_smooth_aux = linear->value("MGAuxiliarySpaceSmoother", mg_smooth_aux); + mg_smooth_aux = linear->value("MGAuxiliarySmoother", mg_smooth_aux); mg_cycle_it = linear->value("MGCycleIts", mg_cycle_it); mg_smooth_it = linear->value("MGSmoothIts", mg_smooth_it); mg_smooth_order = linear->value("MGSmoothOrder", mg_smooth_order); From c435ace0839cbae99dbe43ac0bc34ee17da89178 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Thu, 18 May 2023 11:40:43 -0700 Subject: [PATCH 09/41] Debugging: Complex-valued simulation types, driven with lumped ports and no adaptive frequency sweeps and eigenmode solves Bug fixes include: ComplexVector AXPBY and AXPBYPCX with uninitialized vector, ParOperator real-valued mass matrix orthogonalization for eigenvalue solves, unique_ptr array construction, remove unused FEAST code (for now), remove unused PETSc code, SLEPc function pointer fixes, and others. --- palace/drivers/drivensolver.cpp | 7 +- palace/drivers/eigensolver.cpp | 209 +-- palace/linalg/CMakeLists.txt | 1 - palace/linalg/arpack.cpp | 36 +- palace/linalg/arpack.hpp | 10 +- palace/linalg/complex.cpp | 80 +- palace/linalg/complex.hpp | 21 +- palace/linalg/curlcurl.cpp | 4 +- palace/linalg/divfree.cpp | 6 +- palace/linalg/feast.cpp | 1298 -------------- palace/linalg/feast.hpp | 291 ---- palace/linalg/gmg.cpp | 3 - palace/linalg/operator.hpp | 14 +- palace/linalg/petsc.cpp | 2555 ---------------------------- palace/linalg/petsc.hpp | 584 ------- palace/linalg/slepc.cpp | 122 +- palace/linalg/slepc.hpp | 10 +- palace/linalg/superlu.cpp | 2 + palace/models/curlcurloperator.cpp | 2 +- palace/models/laplaceoperator.cpp | 2 +- palace/models/spaceoperator.cpp | 25 +- palace/models/waveportoperator.cpp | 26 +- 22 files changed, 264 insertions(+), 5044 deletions(-) delete mode 100644 palace/linalg/feast.cpp delete mode 100644 palace/linalg/feast.hpp delete mode 100644 palace/linalg/petsc.cpp diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index e9f527bb2..001937666 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -8,6 +8,7 @@ #include "linalg/complex.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" #include "models/romoperator.hpp" @@ -179,7 +180,8 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); - Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n", E.Norml2(), RHS.Norml2()); + Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n", linalg::Norml2(A->GetComm(), E), + linalg::Norml2(A->GetComm(), RHS)); if (!iodata.solver.driven.only_port_post) { E_elec = postop.GetEFieldEnergy(); @@ -337,7 +339,8 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); - Mpi::Print(" Sol. ||E|| = {:.6e}\n", E.Norml2()); + // Mpi::Print(" Sol. ||E|| = {:.6e}\n", linalg::Norml2(A->GetComm(), E)); //XX TODO + // PROM if (!iodata.solver.driven.only_port_post) { E_elec = postop.GetEFieldEnergy(); diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 3d070da76..a8392d5bc 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -7,7 +7,6 @@ #include "linalg/arpack.hpp" #include "linalg/complex.hpp" #include "linalg/divfree.hpp" -#include "linalg/feast.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/slepc.hpp" @@ -46,9 +45,7 @@ void EigenSolver::Solve(std::vector> &mesh, // Define and configure the eigensolver to solve the eigenvalue problem: // (K + λ C + λ² M) u = 0 or K u = -λ² M u - // with λ = iω. A shift-and-invert strategy is employed to solve for the eigenvalues - // closest to the specified target, σ. In general, the system matrices are complex and - // symmetric. + // with λ = iω. In general, the system matrices are complex and symmetric. std::unique_ptr eigen; config::EigenSolverData::Type type = iodata.solver.eigenmode.type; #if defined(PALACE_WITH_ARPACK) && defined(PALACE_WITH_SLEPC) @@ -78,21 +75,6 @@ void EigenSolver::Solve(std::vector> &mesh, if (type == config::EigenSolverData::Type::FEAST) { MFEM_ABORT("FEAST eigenvalue solver is currently not supported!"); -#if defined(PALACE_WITH_SLEPC) - // Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); - // if (C) - // { - // eigen = std::make_unique( - // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - // iodata.problem.verbose); - // } - // else - // { - // eigen = std::make_unique( - // K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - // iodata.problem.verbose); - // } -#endif } else if (type == config::EigenSolverData::Type::ARPACK) { @@ -158,70 +140,79 @@ void EigenSolver::Solve(std::vector> &mesh, Mpi::Print(" Scaling γ = {:.3e}, δ = {:.3e}\n", eigen->GetScalingGamma(), eigen->GetScalingDelta()); + // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The + // constructed matrix just references the real SPD part of the mass matrix (no copy is + // performed). Boundary conditions don't need to be eliminated here. + std::unique_ptr Mr; + if (iodata.solver.eigenmode.mass_orthog) + { + // Mpi::Print(" Basis uses M-inner product\n"); + // Mr = std::make_unique( + // std::make_unique(M->LocalOperator().Real(), 1.0), + // M->GetFESpace()); + // eigen->SetBMat(*Mr); + + Mpi::Print(" Basis uses (K + M)-inner product\n"); + auto KM = std::make_unique(M->LocalOperator().Real(), 1.0); + KM->AddOperator(K->LocalOperator().Real(), 1.0); + Mr = std::make_unique(std::move(KM), M->GetFESpace()); + eigen->SetBMat(*Mr); + } + + // Construct a divergence-free projector so the eigenvalue solve is performed in the space + // orthogonal to the zero eigenvalues of the stiffness matrix. + std::unique_ptr divfree; + if (iodata.solver.linear.divfree_max_it > 0) + { + constexpr int divfree_verbose = 0; + divfree = std::make_unique( + spaceop.GetMaterialOp(), spaceop.GetNDSpace(), spaceop.GetH1Spaces(), + spaceop.GetAuxBdrTDofLists(), iodata.solver.linear.divfree_tol, + iodata.solver.linear.divfree_max_it, divfree_verbose); + eigen->SetDivFreeProjector(*divfree); + } + + // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and is + // projected appropriately. + if (iodata.solver.eigenmode.init_v0) + { + ComplexVector v0; + if (iodata.solver.eigenmode.init_v0_const) + { + Mpi::Print(" Using constant starting vector\n"); + spaceop.GetConstantInitialVector(v0); + } + else + { + Mpi::Print(" Using random starting vector\n"); + spaceop.GetRandomInitialVector(v0); + } + if (divfree) + { + divfree->Mult(v0); + } + eigen->SetInitialSpace(v0); // Copies the vector + + // Debug + // auto Grad = spaceop.GetComplexGradMatrix(); + // ComplexVector r0(Grad->Width()); + // Grad->MultTranspose(v0, r0); + // r0.Print(); + } + + // Configure the shift-and-invert strategy is employed to solve for the eigenvalues + // closest to the specified target, σ. const double target = iodata.solver.eigenmode.target; const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, target); std::unique_ptr A; std::vector> P, AuxP; std::unique_ptr ksp; - // #if defined(PALACE_WITH_SLEPC) - // auto *feast = dynamic_cast(eigen.get()); - // if (feast) - // { - // // Configure the FEAST integration contour. The linear solvers are set up inside - // the - // // solver. - // if (iodata.solver.eigenmode.feast_contour_np > 1) - // { - // double contour_ub = iodata.solver.eigenmode.feast_contour_ub; - // double f_contour_ub = - // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, contour_ub); - // double contour_ar = iodata.solver.eigenmode.feast_contour_ar; - // MFEM_VERIFY(contour_ub > target, - // "FEAST eigensolver requires a specified upper frequency target!"); - // MFEM_VERIFY( - // contour_ar >= 0.0 && contour_ar <= 1.0, - // "Contour aspect ratio for FEAST eigenvalue solver must be in range - // [0.0, 1.0]!"); - // Mpi::Print(" FEAST search contour: σ_lower = {:.3e} GHz ({:.3e})\n" - // " σ_upper = {:.3e} GHz ({:.3e})\n" - // " AR = {:.1e}\n", - // f_target, target, f_contour_ub, contour_ub, contour_ar); - // if (C) - // { - // // Search for eigenvalues in the range λ = iσₗₒ to iσₕᵢ. - // double h = (contour_ub - target) * contour_ar; - // feast->SetContour(-0.5 * h, target, 0.5 * h, contour_ub, false, true); - // } - // else - // { - // // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues from μ = - // σₗₒ² to - // // σₕᵢ². - // double h = (contour_ub * contour_ub - target * target) * contour_ar; - // feast->SetContour(target * target, -0.5 * h, contour_ub * contour_ub, 0.5 * h); - // } - // } - // else - // { - // Mpi::Print(" FEAST search target: σ = {:.3e} GHz ({:.3e})\n", f_target, target); - // if (C) - // { - // feast->SetContour(0.0, target, 0.0, target, false, true); - // } - // else - // { - // feast->SetContour(target * target, 0.0, target * target, 0.0); - // } - // } - // } - // else - // #endif { Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); if (C) { // Search for eigenvalues closest to λ = iσ. - eigen->SetShiftInvert(0.0, target); + eigen->SetShiftInvert(1i * target); if (type == config::EigenSolverData::Type::ARPACK) { // ARPACK searches based on eigenvalues of the transformed problem. The eigenvalue @@ -237,7 +228,7 @@ void EigenSolver::Solve(std::vector> &mesh, else { // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = σ². - eigen->SetShiftInvert(target * target, 0.0); + eigen->SetShiftInvert(target * target); if (type == config::EigenSolverData::Type::ARPACK) { // ARPACK searches based on eigenvalues of the transformed problem. 1 / (μ - σ²) @@ -265,80 +256,12 @@ void EigenSolver::Solve(std::vector> &mesh, ksp->SetOperator(*A, P, &AuxP); eigen->SetLinearSolver(*ksp); } - - // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The - // constructed matrix just references the real SPD part of the mass matrix (no copy is - // performed). - std::unique_ptr Mr; - if (iodata.solver.eigenmode.mass_orthog) - { - // Mpi::Print(" Basis uses M-inner product\n"); - // Mr = std::make_unique(M->Real(), 1.0); - // eigen->SetBMat(*Mr); - - Mpi::Print(" Basis uses (K + M)-inner product\n"); - auto KM = std::make_unique(M->Real(), 1.0); - KM->AddOperator(K->Real(), 1.0); - Mr = std::move(KM); - eigen->SetBMat(*Mr); - } - - // Construct a divergence-free projector so the eigenvalue solve is performed in the space - // orthogonal to the zero eigenvalues of the stiffness matrix. - std::unique_ptr divfree; - if (iodata.solver.linear.divfree_max_it > 0) - { - constexpr int divfree_verbose = 0; - divfree = std::make_unique( - spaceop.GetMaterialOp(), spaceop.GetNDSpace(), spaceop.GetH1Spaces(), - spaceop.GetAuxBdrTDofLists(), iodata.solver.linear.divfree_tol, - iodata.solver.linear.divfree_max_it, divfree_verbose); - eigen->SetDivFreeProjector(*divfree); - } - - // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and is - // projected appropriately. - if (iodata.solver.eigenmode.init_v0) - { - ComplexVector v0; - if (iodata.solver.eigenmode.init_v0_const) - { - Mpi::Print(" Using constant starting vector\n"); - spaceop.GetConstantInitialVector(v0); - } - else - { - Mpi::Print(" Using random starting vector\n"); - spaceop.GetRandomInitialVector(v0); - } - if (divfree) - { - divfree->Mult(v0); - } - eigen->SetInitialSpace(v0); // Copies the vector - - // Debug - // auto Grad = spaceop.GetComplexGradMatrix(); - // ComplexVector r0(Grad->Width()); - // Grad->MultTranspose(v0, r0); - // r0.Print(); - } timer.construct_time += timer.Lap(); // Eigenvalue problem solve. Mpi::Print("\n"); int num_conv = eigen->Solve(); -#if defined(PALACE_WITH_SLEPC) - // auto *feast = dynamic_cast(eigen.get()); - // if (feast) - // { - // SaveMetadata(feast->GetLinearSolver()); - // } - // else -#endif - { - SaveMetadata(*ksp); - } + SaveMetadata(*ksp); timer.solve_time += timer.Lap(); // Postprocess the results. diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt index 8f93f9a28..195d2d2cc 100644 --- a/palace/linalg/CMakeLists.txt +++ b/palace/linalg/CMakeLists.txt @@ -15,7 +15,6 @@ target_sources(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/curlcurl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/distrelaxation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/divfree.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/feast.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gmg.cpp ${CMAKE_CURRENT_SOURCE_DIR}/jacobi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ksp.cpp diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp index 681a6eeaf..f785de0df 100644 --- a/palace/linalg/arpack.cpp +++ b/palace/linalg/arpack.cpp @@ -268,7 +268,7 @@ void ArpackEigenSolver::SetInitialSpace(const ComplexVector &v) "Must call SetOperators before using SetInitialSpace for ARPACK eigenvalue solver!"); if (!r) { - r = std::make_unique>(n); + r = std::make_unique[]>(n); } MFEM_VERIFY(v.Size() == 2 * n, "Invalid size mismatch for provided initial space vector!"); @@ -323,9 +323,9 @@ int ArpackEigenSolver::SolveInternal(int n, std::complex *r, // Allocate work arrays. a_int lworkl = 3 * ncv * ncv + 5 * ncv; - auto workd = std::make_unique>(3 * n); - auto workl = std::make_unique>(lworkl); - auto rwork = std::make_unique(ncv); + auto workd = std::make_unique[]>(3 * n); + auto workl = std::make_unique[]>(lworkl); + auto rwork = std::make_unique(ncv); // Begin RCI loop. a_int ido = 0, ainfo = (a_int)info, ipntr[14] = {0}; @@ -379,8 +379,8 @@ int ArpackEigenSolver::SolveInternal(int n, std::complex *r, ::arpack::howmny howmny_option = ::arpack::howmny::ritz_vectors; // Allocate eigenvalue storage and work arrays. - auto select = std::make_unique(ncv); - auto workev = std::make_unique>(2 * ncv); + auto select = std::make_unique(ncv); + auto workev = std::make_unique[]>(2 * ncv); // Call complex problem driver. neupd(fcomm, rvec, howmny_option, select.get(), eig, V, (a_int)n, sigma / gamma, @@ -512,7 +512,7 @@ int ArpackEPSSolver::Solve() // Initialize if user did not provide an initial space. if (!r) { - r = std::make_unique>(n); + r = std::make_unique[]>(n); info = 0; } if (!info) @@ -523,15 +523,15 @@ int ArpackEPSSolver::Solve() // Allocate Arnoldi basis for the problem. if (!V) { - V = std::make_unique>(n * ncv); + V = std::make_unique[]>(n * ncv); } // Allocate storage for eigenvalues and residual norms. if (!eig) { - eig = std::make_unique>(nev + 1); - perm = std::make_unique(nev); - res = std::make_unique(nev); + eig = std::make_unique[]>(nev + 1); + perm = std::make_unique(nev); + res = std::make_unique(nev); } // Solve the generalized eigenvalue problem. @@ -666,30 +666,30 @@ int ArpackPEPSolver::Solve() // Initialize if user did not provide an initial space. if (!r) { - r = std::make_unique>(n); + r = std::make_unique[]>(n); info = 0; } if (!info) { std::fill(r.get(), r.get() + n, 0.0); } - auto s = std::make_unique>(2 * n); + auto s = std::make_unique[]>(2 * n); std::copy(r.get(), r.get() + n, s.get()); std::fill(s.get() + n, s.get() + 2 * n, 0.0); // Allocate Arnoldi basis for original and linearized problem. if (!V) { - V = std::make_unique>(n * ncv); + V = std::make_unique[]>(n * ncv); } - auto W = std::make_unique>(2 * n * ncv); + auto W = std::make_unique[]>(2 * n * ncv); // Allocate storage for eigenvalues and residual norms. if (!eig) { - eig = std::make_unique>(nev + 1); - perm = std::make_unique(nev + 1); - res = std::make_unique(nev + 1); + eig = std::make_unique[]>(nev + 1); + perm = std::make_unique(nev + 1); + res = std::make_unique(nev + 1); } // Solve the linearized eigenvalue problem. diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp index 3bd992cf2..a09038a31 100644 --- a/palace/linalg/arpack.hpp +++ b/palace/linalg/arpack.hpp @@ -60,18 +60,18 @@ class ArpackEigenSolver : public EigenvalueSolver bool sinvert; // Storage for computed eigenvalues. - std::unique_ptr> eig; - std::unique_ptr perm; + std::unique_ptr[]> eig; + std::unique_ptr perm; // Storage for Arnoldi basis vectors. - std::unique_ptr> V; + std::unique_ptr[]> V; // Storage for computed residual norms. - std::unique_ptr res; + std::unique_ptr res; // On input used to define optional initial guess, on output stores final residual // vector. - std::unique_ptr> r; + std::unique_ptr[]> r; // Reference to linear solver used for operator action for M⁻¹ (with no spectral // transformation) or (K - σ M)⁻¹ (generalized EVP with shift-and- invert) or P(σ)⁻¹ diff --git a/palace/linalg/complex.cpp b/palace/linalg/complex.cpp index 6c1400dcb..073e7cc78 100644 --- a/palace/linalg/complex.cpp +++ b/palace/linalg/complex.cpp @@ -65,7 +65,7 @@ void ComplexVector::Set(const std::complex *py, int n) MFEM_VERIFY(2 * n == Size(), "Mismatch in dimension for array of std::complex in ComplexVector!"); Vector y(reinterpret_cast(const_cast *>(py)), 2 * n); - const int N = Size() / 2; + const int N = n; const auto *Y = y.Read(); auto *XR = Real().Write(); auto *XI = Imag().Write(); @@ -84,7 +84,7 @@ void ComplexVector::Get(std::complex *py, int n) const MFEM_VERIFY(2 * n == Size(), "Mismatch in dimension for array of std::complex in ComplexVector!"); Vector y(reinterpret_cast(py), 2 * n); - const int N = Size() / 2; + const int N = n; const auto *XR = Real().Read(); const auto *XI = Imag().Read(); auto *Y = y.Write(); @@ -116,7 +116,7 @@ ComplexVector &ComplexVector::operator*=(std::complex s) { if (s.imag() == 0.0) { - *this *= s.real(); + Vector::operator*=(s.real()); } else { @@ -175,19 +175,33 @@ void ComplexVector::AXPBY(std::complex alpha, const ComplexVector &y, const int N = Size() / 2; const double ar = alpha.real(); const double ai = alpha.imag(); - const double br = beta.real(); - const double bi = beta.imag(); const auto *YR = y.Real().Read(); const auto *YI = y.Imag().Read(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const double t = bi * XR[i] + br * XI[i]; - XR[i] = ar * YR[i] - ai * YI[i] + br * XR[i] - bi * XI[i]; - XI[i] = ai * YR[i] + ar * YI[i] + t; - }); + if (beta != 0.0) + { + const double br = beta.real(); + const double bi = beta.imag(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = bi * XR[i] + br * XI[i]; + XR[i] = ar * YR[i] - ai * YI[i] + br * XR[i] - bi * XI[i]; + XI[i] = ai * YR[i] + ar * YI[i] + t; + }); + } + else + { + auto *XR = Real().Write(); + auto *XI = Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + XR[i] = ar * YR[i] - ai * YI[i]; + XI[i] = ai * YR[i] + ar * YI[i]; + }); + } RestoreReal(); RestoreImag(); } @@ -201,22 +215,36 @@ void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &y, const double ai = alpha.imag(); const double br = beta.real(); const double bi = beta.imag(); - const double gr = gamma.real(); - const double gi = gamma.imag(); const auto *YR = y.Real().Read(); const auto *YI = y.Imag().Read(); const auto *ZR = z.Real().Read(); const auto *ZI = z.Imag().Read(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const double t = gi * XR[i] + gr * XI[i]; - XR[i] = ar * YR[i] - ai * YI[i] + br * ZR[i] - bi * ZI[i] + gr * XR[i] - - gi * XI[i]; - XI[i] = ai * YR[i] + ar * YI[i] + bi * ZR[i] + br * ZI[i] + t; - }); + if (gamma != 0.0) + { + const double gr = gamma.real(); + const double gi = gamma.imag(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = gi * XR[i] + gr * XI[i]; + XR[i] = ar * YR[i] - ai * YI[i] + br * ZR[i] - bi * ZI[i] + gr * XR[i] - + gi * XI[i]; + XI[i] = ai * YR[i] + ar * YI[i] + bi * ZR[i] + br * ZI[i] + t; + }); + } + else + { + auto *XR = Real().Write(); + auto *XI = Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + XR[i] = ar * YR[i] - ai * YI[i] + br * ZR[i] - bi * ZI[i]; + XI[i] = ai * YR[i] + ar * YI[i] + bi * ZR[i] + br * ZI[i]; + }); + } RestoreReal(); RestoreImag(); } diff --git a/palace/linalg/complex.hpp b/palace/linalg/complex.hpp index bfb881fb5..386bb651a 100644 --- a/palace/linalg/complex.hpp +++ b/palace/linalg/complex.hpp @@ -249,8 +249,12 @@ class ComplexParOperator : public ComplexOperator // operator. ComplexParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict = false); + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ComplexParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &fespace) + : ComplexParOperator(std::move(A), fespace, fespace, false) + { + } // Get access to the underlying local (L-vector) operator. const ComplexOperator &LocalOperator() const @@ -292,17 +296,20 @@ class ComplexParOperator : public ComplexOperator return trial_dbc_tdof_list_; } + // Get access to the finite element spaces associated with the operator. + const mfem::ParFiniteElementSpace &GetFESpace() const + { + MFEM_VERIFY(&trial_fespace_ == &test_fespace_ && height == width, + "GetFESpace should only be used for square ParOperator!"); + return trial_fespace_; + } + // Get the associated MPI communicator. MPI_Comm GetComm() const { return trial_fespace_.GetComm(); } bool IsReal() const override { return A_->IsReal(); } bool IsImag() const override { return A_->IsImag(); } - const Operator &Real() const override { return A_->Real(); } - Operator &Real() override { return A_->Real(); } - const Operator &Imag() const override { return A_->Imag(); } - Operator &Imag() override { return A_->Imag(); } - using ComplexOperator::AddMult; using ComplexOperator::AddMultHermitianTranspose; using ComplexOperator::AddMultTranspose; diff --git a/palace/linalg/curlcurl.cpp b/palace/linalg/curlcurl.cpp index 5ac31b8cf..7cb7ae78c 100644 --- a/palace/linalg/curlcurl.cpp +++ b/palace/linalg/curlcurl.cpp @@ -47,7 +47,7 @@ CurlCurlMassSolver::CurlCurlMassSolver( a->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); a->Assemble(0); a->Finalize(0); - A_.push_back(std::make_unique(std::move(a), fespace_l, fespace_l)); + A_.push_back(std::make_unique(std::move(a), fespace_l)); A_.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); } @@ -67,7 +67,7 @@ CurlCurlMassSolver::CurlCurlMassSolver( pcg->SetMaxIter(max_it); pcg->SetPrintLevel(print); - ksp = std::make_unique(std::move(pcg), std::move(ams)); + ksp = std::make_unique(std::move(pcg), std::move(gmg)); ksp->SetOperator(*A.back(), A, &AuxA); } diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index b2b719a0a..d9118c022 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -33,7 +33,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, m->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); m->Assemble(0); m->Finalize(0); - M.push_back(std::make_unique(std::move(m), h1_fespace_l, h1_fespace_l)); + M.push_back(std::make_unique(std::move(m), h1_fespace_l)); M.back()->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); } @@ -48,7 +48,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, weakDiv->Assemble(); weakDiv->Finalize(); WeakDiv = std::make_unique(std::move(weakDiv), nd_fespace, - h1_fespaces.GetFinestFESpace()); + h1_fespaces.GetFinestFESpace(), false); } { // XX TODO: Partial assembly option? @@ -75,7 +75,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, pcg->SetMaxIter(max_it); pcg->SetPrintLevel(print); - ksp = std::make_unique(std::move(pcg), std::move(amg)); + ksp = std::make_unique(std::move(pcg), std::move(gmg)); ksp->SetOperator(*M.back(), M); psi.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); diff --git a/palace/linalg/feast.cpp b/palace/linalg/feast.cpp deleted file mode 100644 index 83e664c37..000000000 --- a/palace/linalg/feast.cpp +++ /dev/null @@ -1,1298 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "feast.hpp" - -#if 0 // XX TODO DISABLE FEAST FOR NOW - -#if defined(PALACE_WITH_SLEPC) - -#include -#include -#include -#include -#include -#include -#include "linalg/divfree.hpp" -#include "linalg/ksp.hpp" -#include "linalg/pc.hpp" -#include "models/spaceoperator.hpp" -#include "utils/communication.hpp" -#include "utils/iodata.hpp" - -static PetscErrorCode __mat_apply_FEAST_EPS(Mat, Vec, Vec); -static PetscErrorCode __mat_apply_FEAST_PEP(Mat, Vec, Vec); - -namespace palace::feast -{ - -namespace internal -{ - -// Linear solver helper class - -class FeastLinearSolver -{ -public: - PetscScalar zk, wk; - KspSolver ksp; - KspPreconditioner pc; - const petsc::PetscParMatrix *opK, *opC, *opM; // Reference to EVP operators (not owned) - -private: - SpaceOperator &spaceop; // Reference to spatial discretization (not owned) - std::unique_ptr A; - std::vector> P, AuxP; - -public: - FeastLinearSolver(int k, MPI_Comm comm, const IoData &iodata, SpaceOperator &sp) - : zk(0.0), wk(0.0), ksp(comm, iodata, "ksp" + std::to_string(k + 1) + "_"), - pc(iodata, sp.GetDbcMarker(), sp.GetNDSpaces(), &sp.GetH1Spaces()), spaceop(sp) - { - ksp.SetTabLevel(1); - ksp.SetPrintOptions(false); - ksp.SetPreconditioner(pc); - opK = opC = opM = nullptr; - } - - void SetOperators(PetscScalar z, PetscScalar w, const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M) - { - zk = z; - wk = w; - opK = &K; - opM = &M; - { - Mat A_; - MPI_Comm comm = K.GetComm(); - PetscInt n = K.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A_)); - PalacePetscCall( - MatShellSetOperation(A_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_FEAST_EPS))); - A = std::make_unique(A_, false); // Inherits the PETSc Mat - ksp.SetOperator(*A); - } - const double sigma = PetscSqrtReal(PetscAbsScalar(zk)); - constexpr bool print = false; - spaceop.GetPreconditionerMatrix(sigma, P, AuxP, print); - pc.SetOperator(P, &AuxP); - } - - void SetOperators(PetscScalar z, PetscScalar w, const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, const petsc::PetscParMatrix &M, - KspPreconditioner *op = nullptr) - { - zk = z; - wk = w; - opK = &K; - opC = &C; - opM = &M; - { - Mat A_; - MPI_Comm comm = K.GetComm(); - PetscInt n = K.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A_)); - PalacePetscCall( - MatShellSetOperation(A_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_FEAST_PEP))); - A = std::make_unique(A_, false); // Inherits the PETSc Mat - ksp.SetOperator(*A); - } - const double sigma = PetscAbsScalar(zk); - constexpr bool print = false; - spaceop.GetPreconditionerMatrix(sigma, P, AuxP, print); - pc.SetOperator(P, &AuxP); - } - - void Mult(const PetscScalar *eig, const petsc::PetscDenseMatrix &X, - const petsc::PetscDenseMatrix &R, petsc::PetscDenseMatrix &Q, - petsc::PetscParVector &v, bool *converged, PetscReal gamma) const - { - // Solve P(zₖ) Qₖ = R, Q += wₖ (X - Qₖ) (zₖ I - Λ)⁻¹ (residual-inverse iteration). Note: - // Q may have g.t. m0 columns, but we just use the first m0 for the result (X should - // have exactly m0 columns). - PetscInt m0 = X.GetGlobalNumCols(); - PetscInt M = Q.GetGlobalNumCols() / (2 * m0); - MFEM_VERIFY(M == 1 || M == 2, - "FEAST eigensolver only supports up to 2 subspace moments!"); - for (PetscInt j = 0; j < m0; j++) - { - const petsc::PetscParVector x = X.GetColumnRead(j); - if (converged && converged[j]) - { - // When R[j] is converged, Q[j] += wₖ/(zₖ - λₖ) X[j] (with Qₖ[j] = 0) . - v.AXPBY(wk / (zk / gamma - eig[j]), x, 0.0); - } - else - { - const petsc::PetscParVector r = R.GetColumnRead(j); - ksp.Mult(r, v); - v.AXPBY(wk / (zk / gamma - eig[j]), x, -wk / (zk / gamma - eig[j])); - R.RestoreColumnRead(j, r); - } - X.RestoreColumnRead(j, x); - - petsc::PetscParVector q = Q.GetColumn(j); - q.AXPY(1.0, v); - Q.RestoreColumn(j, q); - if (M > 1) - { - petsc::PetscParVector q = Q.GetColumn(j + m0); - q.AXPY(zk / gamma, v); - Q.RestoreColumn(j + m0, q); - } - } - } - - PetscScalar Mult(const petsc::PetscDenseMatrix &X, petsc::PetscParVector &r, - petsc::PetscParVector &v) const - { - // Solve P(zₖ) Qₖ = P'(zₖ) X, sum += wₖ tr(Xᵀ Qₖ) for estimating the eigenvalue count - // inside of the contour. - PetscInt m0 = X.GetGlobalNumCols(); - PetscScalar sum = 0.0; - for (PetscInt j = 0; j < m0; j++) - { - const petsc::PetscParVector x = X.GetColumnRead(j); - opM->Mult(x, r); - if (opC) - { - r.Scale(zk); - opC->MultAdd(x, r); - } - ksp.Mult(r, v); - sum += x.TransposeDot(v); - X.RestoreColumnRead(j, x); - } - return wk * sum; - } -}; - -} // namespace internal - -// Base class methods - -FeastEigenSolver::FeastEigenSolver(MPI_Comm comm, const IoData &iodata, - SpaceOperator &spaceop, int np, int print_lvl) -{ - // Initialization. - print = print_lvl; - info = 0; - nev = m0 = mQ = 0; - M = iodata.solver.eigenmode.feast_moments; - MFEM_VERIFY(M == 1 || M == 2, - "FEAST eigensolver only supports up to 2 subspace moments!"); - rtol = 0.0; - max_it = 0; - gamma = delta = 1.0; - bl = tr = 0.0; - real_threshold = imag_threshold = false; - - eig = nullptr; - perm = nullptr; - X = nullptr; - res = nullptr; - r0 = nullptr; - opProj = nullptr; - opB = nullptr; - - // Construct the linear solvers for each quadrature point. - opInv.reserve(np); - for (int k = 0; k < np; k++) - { - opInv.emplace_back(k, comm, iodata, spaceop); - } -} - -FeastEigenSolver::~FeastEigenSolver() -{ - delete[] eig; - delete[] perm; - delete[] res; - delete X; - delete r0; -} - -void FeastEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_ABORT("SetOperators not defined for base class FeastEigenSolver!"); -} - -void FeastEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_ABORT("SetOperators not defined for base class FeastEigenSolver!"); -} - -void FeastEigenSolver::SetProjector(const DivFreeSolver &divfree) -{ - opProj = &divfree; -} - -void FeastEigenSolver::SetNumModes(int numeig, int numvec) -{ - if (nev > 0 && numeig != nev) - { - delete[] eig; - delete[] perm; - delete[] res; - eig = nullptr; - perm = nullptr; - res = nullptr; - } - if (m0 > 0 && numvec != m0) - { - delete X; - X = nullptr; - } - nev = numeig; - if (numvec > 0) - { - m0 = numvec; - } - else - { - if (nev <= 3) - { - m0 = std::max(nev + 2, 2 * nev); // Just a guess for subspace dimension - } - else - { - m0 = std::max(nev + 3, nev + (nev + 1) / 2); - } - } - mQ = 2 * M * m0; // Real-valued basis splitting leads to factor of 2 -} - -void FeastEigenSolver::SetTol(double tol) -{ - rtol = tol; -} - -void FeastEigenSolver::SetMaxIter(int maxits) -{ - max_it = maxits; -} - -void FeastEigenSolver::SetContour(double blr, double bli, double trr, double tri, - bool filter_small_real, bool filter_small_imag) -{ - MFEM_VERIFY(blr <= trr && bli <= tri, - "Integration contour must be defined by bottom-left and top-right " - "points in the complex plane!"); - bl = blr + PETSC_i * bli; - tr = trr + PETSC_i * tri; - real_threshold = filter_small_real; - imag_threshold = filter_small_imag; -} - -void FeastEigenSolver::SetBMat(const petsc::PetscParMatrix &B) -{ - opB = &B; -} - -void FeastEigenSolver::SetInitialSpace(const petsc::PetscParVector &v) -{ - if (!r0) - { - r0 = new petsc::PetscParVector(v); - } - else - { - MFEM_VERIFY(v.GetSize() == r0->GetSize(), - "Invalid modification of eigenvalue problem size!"); - r0->Copy(v); - } - info = 1; -} - -int FeastEigenSolver::SolveInternal(RG rg) -{ - // Allocate space for subspace and residuals. R is constructed with mQ columns for - // computing products of form R = A Q during projection. - MFEM_VERIFY(X && X->GetGlobalNumCols() == m0, - "Unexpected number of eigenvector columns in FEAST solver!"); - MPI_Comm comm = X->GetComm(); - PetscInt n = X->GetNumRows(); - petsc::PetscDenseMatrix R(comm, n, PETSC_DECIDE, PETSC_DECIDE, mQ, nullptr); - petsc::PetscDenseMatrix Q(comm, n, PETSC_DECIDE, PETSC_DECIDE, mQ, nullptr); - - // Allocate other workspace variables. - PetscInt *inside = new PetscInt[m0]; - bool *converged = new bool[m0]; - if (!eig) - { - eig = new PetscScalar[m0]; - perm = new PetscInt[m0]; - res = new PetscReal[m0]; - } - for (PetscInt j = 0; j < m0; j++) - { - res[j] = -1.0; - } - mfem::Vector qr(n), qi(n); - -#if 0 - // XX TODO: Stochastic estimates - bool est_stochastic = true; - if (est_stochastic) - { - X->SetRandomReal(0, m0); - if (info) - { - for (PetscInt j = 0; j < m0; j++) - { - // Ensure homogeneous Dirichlet BC are satisfied by the subspace. - petsc::PetscParVector x = X->GetColumn(j); - x.PointwiseMult(*r0, false); - X->RestoreColumn(j, x); - } - } - X->SetRandomSign(0, m0, true); - - PetscScalar sum = 0; - petsc::PetscParVector r = R.GetColumn(0); // Just for workspace - for (const auto &op : opInv) - { - sum += op.Mult(*X, r, *r0); - } - R.RestoreColumn(0, r); - PetscInt m = (PetscInt)PetscCeilReal(PetscAbsScalar(sum)/(PetscReal)m0); - - // Debug - Mpi::Print("Eigenvalue estimate: {:d}\n", m); - } -#endif - - // Initialize the subspace. - Q.SetRandom(0, mQ / 2); - if (info) - { - petsc::PetscParVector q = Q.GetColumn(0); - q.Copy(*r0); - Q.RestoreColumn(0, q); - for (PetscInt j = 1; j < mQ / 2; j++) - { - // Ensure homogeneous Dirichlet BC are satisfied by the starting subspace. - petsc::PetscParVector q = Q.GetColumn(j); - q.PointwiseMult(*r0, false); - Q.RestoreColumn(j, q); - } - } - - // Begin main FEAST loop. - int it = 0, nconv, ninside; - while (true) - { - // Orthonormalize the (real-valued) basis Q. - { - bool mgs = false, cgs2 = true; - for (PetscInt j = 0; j < mQ / 2; j++) - { - petsc::PetscParVector q1 = Q.GetColumn(j); - q1.GetToVectors(qr, qi); - if (opProj) - { - opProj->Mult(qr); - opProj->Mult(qi); - } - q1.SetFromVector(qr); - Q.RestoreColumn(j, q1); - - petsc::PetscParVector q2 = Q.GetColumn(j + mQ / 2); - q2.SetFromVector(qi); - Q.RestoreColumn(j + mQ / 2, q2); - } - for (PetscInt j = 0; j < mQ; j++) - { - if (opB) - { - Q.OrthonormalizeColumn(j, mgs, cgs2, *opB, *r0); - } - else - { - Q.OrthonormalizeColumn(j, mgs, cgs2); - } - } - } - - // Form and solve the projected EVP. Select the m0 best eigenpair candidates and - // reconstruct the full-dimensional eigenvectors. - SolveProjectedProblem(Q, R, *X, eig); - - // Update the eigenpair residuals and check convergence. Residual calculation and - // convergence tests occur in the unscaled space. - nconv = ninside = 0; - bool check = true; - PetscReal rmin = mfem::infinity(), rmax = 0.0; - PetscInt jmin = -1, jmax = -1; - if (rg) - { - PalacePetscCall(RGCheckInside(rg, m0, eig, nullptr, inside)); - } - else - { - for (PetscInt j = 0; j < m0; j++) - { - inside[j] = true; - } - } - for (PetscInt j = 0; j < m0; j++) - { - PetscScalar sigma = eig[j] * gamma; - petsc::PetscParVector x = X->GetColumn(j); - petsc::PetscParVector r = R.GetColumn(j); - if (opB) - { - x.Normalize(*opB, *r0); - } - else - { - x.Normalize(); - } - GetResidual(sigma, x, r); - PetscReal res = r.Norml2() / (x.Norml2() * PetscAbsScalar(sigma)); - // PetscReal res = r.Norml2()/x.Norml2(); - X->RestoreColumn(j, x); - R.RestoreColumn(j, r); - if (res < rtol) - { - // Mark converged even for eigenvalues outside the contour. - converged[j] = true; - nconv++; - if (res > rmax) - { - rmax = res; - jmax = j; - } - } - else - { - converged[j] = false; - if (res < rmin) - { - rmin = res; - jmin = j; - } - } - if (inside[j] >= 0) - { - ninside++; - if (!converged[j]) - { - check = false; // Only finish when inside eigenvalues are converged - } - } - - // Debug - // Mpi::Print(comm, " res[{:d}] = {:e} (eig = {:+e}{:+e}i, inside = {:d})\n", - // j, res, PetscRealPart(sigma), - // PetscImaginaryPart(sigma), inside[j]); - } - if (print > 0) - { - if (ninside > 0 || nconv > 0) - { - if (jmin >= 0) - { - Mpi::Print(comm, - " {:d} FEAST inside={:d} converged={:d} first " - "unconverged value (error) {:+.3e}{:+.3e}i ({:.6e})\n", - it, ninside, nconv, PetscRealPart(eig[jmin] * gamma), - PetscImaginaryPart(eig[jmin] * gamma), rmin); - } - else - { - Mpi::Print(comm, - " {:d} FEAST inside={:d} converged={:d} last " - "converged value (error) {:+.3e}{:+.3e}i ({:.6e})\n", - it, ninside, nconv, PetscRealPart(eig[jmax] * gamma), - PetscImaginaryPart(eig[jmax] * gamma), rmax); - } - } - else - { - Mpi::Print(comm, " {:d} FEAST inside=0\n", it); - } - } - // Check convergence: All inside must be converged + any outside if user specified nev - // too large. - if ((check && nconv >= nev) || it == max_it) - { - break; - } - - // Update subspace with contour integral (accumulates to first M*m0 columns of Q). - Q.Scale(0.0); - for (const auto &op : opInv) - { - op.Mult(eig, *X, R, Q, *r0, converged, gamma); - } - it++; - } - - // Print some log information. - if (print > 0) - { - Mpi::Print(comm, - "\n FEAST {} eigensolve {} ({:d} eigenpairs); iterations {:d}\n" - " Total number of linear systems solved: {:d}\n" - " Total number of linear solver iterations: {:d}\n", - GetName(), (it == max_it) ? "finished" : "converged", nconv, it, - GetTotalKspMult(), GetTotalKspIter()); - } - if (it == max_it) - { - Mpi::Warning(comm, - "FEAST eigenvalue solver reached maximum {:d} " - "iterations!\nFound {:d} converged eigenvales of requested {:d}!\n", - it, nconv, nev); - } - - // Unscale and sort the eigenvalues in ascending order. - auto CompareAbs = [converged, this](const PetscInt &l, const PetscInt &r) - { - if (!converged[l] && converged[r]) - { - return false; - } - else if (converged[l] && !converged[r]) - { - return true; - } - return (PetscAbsScalar(eig[l]) < PetscAbsScalar(eig[r])); - }; - for (PetscInt j = 0; j < m0; j++) - { - eig[j] = eig[j] * gamma; - perm[j] = j; - } - std::sort(perm, perm + m0, CompareAbs); - - // Cleanup. - delete[] inside; - delete[] converged; - - // Reset for next solve. - info = 0; - return nconv; -} - -void FeastEigenSolver::CheckParameters() -{ - MFEM_VERIFY(nev > 0, "Number of requested modes is not positive!"); - MFEM_VERIFY(rtol > 0.0, "Eigensolver tolerance is not positive!"); - MFEM_VERIFY(!(bl == 0.0 && tr == 0.0), "Integration contour has not been defined!"); - if (max_it <= 0) - { - max_it = 15; - } -} - -RG FeastEigenSolver::ConfigureRG(PetscScalar *&z, PetscScalar *&w) -{ - int np = static_cast(opInv.size()); - if (np == 1) - { - z = new PetscScalar[np]; - w = new PetscScalar[np]; - z[0] = 0.5 * (bl + tr) / gamma; // User should pass in bl = tr = target - w[0] = 1.0; - return nullptr; - } - else - { - RG rg; - PalacePetscCall(RGCreate(PETSC_COMM_SELF, &rg)); - MFEM_VERIFY(PetscRealPart(tr - bl) > 0.0 && PetscImaginaryPart(tr - bl) > 0.0, - "Contour must have nonzero and finite aspect ratio!"); - PetscScalar c = 0.5 * (bl + tr) / gamma; - PetscReal r = 0.5 * PetscRealPart(tr - bl) / gamma; - PetscReal vscale = 0.5 * PetscImaginaryPart(tr - bl) / (r * gamma); - PalacePetscCall(RGSetType(rg, RGELLIPSE)); - PalacePetscCall(RGEllipseSetParameters(rg, c, r, vscale)); - // MFEM_VERIFY(opInv.size() % 4 == 0, - // "Number of contour quadrature points for rectangular region - // must be evenly divisible by 4!"); - // PalacePetscCall(RGSetType(rg, RGINTERVAL)); - // PalacePetscCall(RGIntervalSetEndpoints(rg, PetscRealPart(bl)/gamma, - // PetscRealPart(tr)/gamma, // PetscImaginaryPart(bl)/gamma, - // PetscImaginaryPart(tr)/gamma)); - - z = new PetscScalar[np]; - w = new PetscScalar[np]; - if (PetscImaginaryPart(c) == 0.0 || PetscRealPart(c) == 0.0) - { - // Contour is symmetric about an axis and we place the first quadrature point at θ - // = -π/2 (imaginary-axis symmetry) or θ = π (real-axis symmetry). - PetscReal shift = (PetscRealPart(c) == 0.0) ? -0.5 * PETSC_PI : PETSC_PI; - for (int k = 0; k < np; k++) - { - PetscReal theta = 2.0 * PETSC_PI * k / (PetscReal)np + shift; - z[k] = c + r * (PetscCosReal(theta) + PETSC_i * vscale * PetscSinReal(theta)); - w[k] = r * (vscale * PetscCosReal(theta) + PETSC_i * PetscSinReal(theta)) / - (PetscReal)np; - } - } - else - { - PetscScalar *zn = new PetscScalar[np]; - PalacePetscCall(RGComputeQuadrature(rg, RG_QUADRULE_TRAPEZOIDAL, np, z, zn, w)); - delete[] zn; - } - return rg; - } -} - -PetscInt *FeastEigenSolver::SortEigenvalues(const PetscScalar *eig_, PetscInt m) const -{ - PetscReal rthresh = (real_threshold) ? 0.01 * PetscRealPart(bl) / gamma : 0.0; - PetscReal ithresh = (imag_threshold) ? 0.01 * PetscImaginaryPart(bl) / gamma : 0.0; - PetscScalar target = 0.5 * (bl + tr) / gamma; - PetscReal vscale = - (bl == tr) ? 1.0 : PetscImaginaryPart(tr - bl) / PetscRealPart(tr - bl); - auto CompareTargetAbs = - [eig_, rthresh, ithresh, target, vscale](const PetscInt &l, const PetscInt &r) - { - PetscReal lr = PetscAbsReal(PetscRealPart(eig_[l])); - PetscReal li = PetscAbsReal(PetscImaginaryPart(eig_[l])); - PetscReal rr = PetscAbsReal(PetscRealPart(eig_[r])); - PetscReal ri = PetscAbsReal(PetscImaginaryPart(eig_[r])); - if ((li < ithresh && ri >= ithresh) || (lr < rthresh && rr >= rthresh)) - { - return false; - } - else if ((li >= ithresh && ri < ithresh) || (lr >= rthresh && rr < rthresh)) - { - return true; - } - PetscScalar dl = eig_[l] - target; - PetscScalar dr = eig_[r] - target; - PetscReal vl = PetscRealPart(dl) * PetscRealPart(dl) + - PetscImaginaryPart(dl) * PetscImaginaryPart(dl) / (vscale * vscale); - PetscReal vr = PetscRealPart(dr) * PetscRealPart(dr) + - PetscImaginaryPart(dr) * PetscImaginaryPart(dr) / (vscale * vscale); - return (vl < vr); - }; - PetscInt *perm_ = new PetscInt[m]; - for (PetscInt i = 0; i < m; i++) - { - perm_[i] = i; - } - std::sort(perm_, perm_ + m, CompareTargetAbs); - return perm_; -} - -void FeastEigenSolver::BVMatProjectInternal(const petsc::PetscDenseMatrix &Q, - const petsc::PetscParMatrix &A, - petsc::PetscDenseMatrix &Ar, - petsc::PetscDenseMatrix &R, - PetscReal scale) const -{ - // Compute Ar = Qᴴ A Q. We assume Q is real and thus the result is complex symmetric if A - // is symmetric. Ar is replicated across all processes(sequential mQ x mQ matrix). - MFEM_VERIFY(A.GetSymmetric() && Ar.GetSymmetric(), - "BVMatProjectInternal is specialized for symmetric matrices!"); - MFEM_VERIFY(Q.GetGlobalNumCols() == mQ && R.GetGlobalNumCols() == mQ && - Ar.GetNumRows() == mQ && Ar.GetNumCols() == mQ, - "Unexpected number of basis columns in FEAST solver!"); - mfem::Vector qr(Q.GetNumRows()); - for (PetscInt j = 0; j < mQ; j++) - { - const petsc::PetscParVector q = Q.GetColumnRead(j); - petsc::PetscParVector r = R.GetColumn(j); - q.GetToVector(qr); - A.Mult(qr, r); - Q.RestoreColumnRead(j, q); - R.RestoreColumn(j, r); - } - PetscInt n = A.GetNumRows(); - const PetscScalar *pQ = Q.GetArrayRead(), *pR = R.GetArrayRead(); - petsc::PetscDenseMatrix locQ(n, mQ, const_cast(pQ)); - petsc::PetscDenseMatrix locR(n, mQ, const_cast(pR)); - locQ.MatTransposeMult(locR, Ar); // Qᴴ = Qᵀ - Q.RestoreArrayRead(pQ); - R.RestoreArrayRead(pR); - - // Global reduction over all processes. - PetscScalar *pAr = Ar.GetArray(); - Mpi::GlobalSum(mQ * mQ, pAr, Q.GetComm()); - Ar.RestoreArray(pAr); - Ar.Scale(scale); -} - -int FeastEigenSolver::GetTotalKspMult() const -{ - int ksp_mult = 0; - for (const auto &op : opInv) - { - ksp_mult += op.ksp.GetTotalNumMult(); - } - return ksp_mult; -} - -int FeastEigenSolver::GetTotalKspIter() const -{ - int ksp_it = 0; - for (const auto &op : opInv) - { - ksp_it += op.ksp.GetTotalNumIter(); - } - return ksp_it; -} - -void FeastEigenSolver::GetEigenvalue(int i, double &eigr, double &eigi) const -{ - MFEM_VERIFY(eig && i >= 0 && i < m0, - "Out of range eigenpair requested (i = " << i << ", m0 = " << m0 << ")!"); - const int &j = perm[i]; - eigr = PetscRealPart(eig[j]); - eigi = PetscImaginaryPart(eig[j]); -} - -void FeastEigenSolver::GetEigenvector(int i, petsc::PetscParVector &v) const -{ - MFEM_VERIFY(eig && i >= 0 && i < m0, - "Out of range eigenpair requested (i = " << i << ", m0 = " << m0 << ")!"); - const int &j = perm[i]; - const petsc::PetscParVector x = X->GetColumnRead(j); - v.Copy(x); - X->RestoreColumnRead(j, x); -} - -void FeastEigenSolver::GetError(int i, EigenSolverBase::ErrorType type, double &err) const -{ - MFEM_VERIFY(eig && i >= 0 && i < m0, - "Out of range eigenpair requested (i = " << i << ", m0 = " << m0 << ")!"); - const int &j = perm[i]; - if (res[j] <= 0.0) - { - const petsc::PetscParVector x = X->GetColumnRead(j); - GetResidual(eig[j], x, *r0); - res[j] = r0->Norml2() / x.Norml2(); - X->RestoreColumnRead(j, x); - } - switch (type) - { - case ErrorType::ABSOLUTE: - err = res[j]; - break; - case ErrorType::RELATIVE: - err = res[j] / PetscAbsScalar(eig[j]); - break; - case ErrorType::BACKWARD: - err = res[j] / GetBackwardScaling(eig[j]); - break; - } -} - -// EPS specific methods - -FeastEPSSolver::FeastEPSSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, - int np, int print_lvl) - : FeastEigenSolver(comm, iodata, spaceop, np, print_lvl) -{ - opK = opM = nullptr; - normK = normM = 0.0; - AQ = BQ = XQ = XQ0 = nullptr; -} - -void FeastEPSSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_VERIFY(!opK || opK->GetNumRows() == K.GetNumRows(), - "Invalid modification of eigenvalue problem size!"); - bool first = (opK == nullptr); - opK = &K; - opM = &M; - if (first && type != ScaleType::NONE) - { - normK = opK->Norm2(); - normM = opM->Norm2(); - MFEM_VERIFY(normK > 0.0 && normM > 0.0, "Invalid matrix norms for EPS scaling!"); - gamma = normK / normM; // Store γ² for linear problem - delta = 2.0 / normK; - } -} - -int FeastEPSSolver::Solve() -{ - // Check inputs. - CheckParameters(); - MFEM_VERIFY(opK && opM, "Operators are not set for FeastEPSSolver!"); - - // Allocate storage for eigenvectors. - MPI_Comm comm = opK->GetComm(); - if (!X) - { - X = new petsc::PetscDenseMatrix(comm, opK->GetNumRows(), PETSC_DECIDE, PETSC_DECIDE, m0, - nullptr); - } - if (!r0) - { - r0 = new petsc::PetscParVector(*opK); - } - - // Allocate sequential matrices for the projected generalized eigenvalue problems at each - // iteration, and associated eigenvectors. - AQ = new petsc::PetscDenseMatrix(mQ, mQ, nullptr); - AQ->CopySymmetry(*opK); - BQ = new petsc::PetscDenseMatrix(mQ, mQ, nullptr); - BQ->CopySymmetry(*opM); - XQ = new petsc::PetscDenseMatrix(mQ, mQ, nullptr); - XQ0 = new petsc::PetscDenseMatrix(mQ, m0, nullptr); - - // Create region object for integration contour and configure the linear solvers at each - // integration point. The linear solves use the unscaled space. - PetscScalar *z, *w; - RG rg = ConfigureRG(z, w); - Mpi::Print(comm, "Quadrature points for FEAST contour\n"); - for (int k = 0; k < static_cast(opInv.size()); k++) - { - Mpi::Print(comm, " {:d}: zₖ = {:+.3e}{:+3e}i\n", k + 1, PetscRealPart(z[k]) * gamma, - PetscImaginaryPart(z[k]) * gamma); - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opM); -#if 0 - int l = 0; - for (; l < k; l++) - { - constexpr double atol = 1.0e-9; - if (PetscAbsReal(PetscAbsScalar(z[k]) - PetscAbsScalar(z[l])) < atol) - { - // Reuse preconditioner assembled for contour point with same real magnitude. - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opM, opInv[l].pc); - break; - } - } - if (l == k) - { - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opM); - } -#endif - } - Mpi::Print(comm, "\n"); - delete[] z; - delete[] w; - - // Solve the quadratic eigenvalue problem. - int nconv = SolveInternal(rg); - - // Cleanup. - PalacePetscCall(RGDestroy(&rg)); - delete AQ; - delete BQ; - delete XQ; - delete XQ0; - - return nconv; -} - -void FeastEPSSolver::SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, - petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) -{ - // Form mQ x mQ projected matrices. - // AQ->Scale(0.0); - // BQ->Scale(0.0); - BVMatProjectInternal(Q_, *opK, *AQ, R_, delta); - BVMatProjectInternal(Q_, *opM, *BQ, R_, delta * gamma); - - // Solve projected EVP using LAPACK wrapper. - PetscBLASInt info, n, lwork, lrwork; - PetscScalar *work, *alpha, *beta; - PetscReal *rwork; - PetscBLASIntCast(mQ, &n); - lwork = 2 * n; - lrwork = 8 * n; - work = new PetscScalar[lwork]; - rwork = new PetscReal[lrwork]; - alpha = new PetscScalar[n]; - beta = new PetscScalar[n]; - - PetscScalar *pAQ = AQ->GetArray(); - PetscScalar *pBQ = BQ->GetArray(); - PetscScalar *pXQ = XQ->GetArray(); - LAPACKggev_("N", "V", &n, pAQ, &n, pBQ, &n, alpha, beta, nullptr, &n, pXQ, &n, work, - &lwork, rwork, &info); - AQ->RestoreArray(pAQ); - BQ->RestoreArray(pBQ); - XQ->RestoreArray(pXQ); - - // Sort eigenpairs by distance to center. - for (PetscBLASInt i = 0; i < n; i++) - { - alpha[i] /= beta[i]; - } - - // Debug - // Mpi::Print(Q_.GetComm(), "Before sort, eigenvalues:\n"); - // for (PetscBLASInt i = 0; i < n; i++) - // { - // Mpi::Print(Q_.GetComm(), " {:+e}{:+e}i\n", - // PetscRealPart(alpha[i]*gamma), - // PetscImaginaryPart(alpha[i]*gamma)); - // } - - PetscInt *sort = SortEigenvalues(alpha, n); - for (PetscInt i = 0; i < m0; i++) - { - eig_[i] = alpha[sort[i]]; - const petsc::PetscParVector xq = XQ->GetColumnRead(sort[i]); - petsc::PetscParVector xq0 = XQ0->GetColumn(i); - xq0.Copy(xq); - XQ->RestoreColumnRead(sort[i], xq); - XQ0->RestoreColumn(i, xq0); - } - - // Cleanup. - delete[] sort; - delete[] work; - delete[] rwork; - delete[] alpha; - delete[] beta; - - // Reconstruct the first m0 high-dimensional eigenvectors. - const PetscScalar *pQ = Q_.GetArrayRead(); - PetscScalar *pX = X_.GetArray(); - petsc::PetscDenseMatrix locQ(X_.GetNumRows(), mQ, const_cast(pQ)); - petsc::PetscDenseMatrix locX(X_.GetNumRows(), m0, pX); - locQ.MatMult(*XQ0, locX); - Q_.RestoreArrayRead(pQ); - X_.RestoreArray(pX); -} - -void FeastEPSSolver::GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const -{ - // r = (K - λ M) x for eigenvalue λ. - opM->Mult(x_, r_); - r_.Scale(-eig_); - opK->MultAdd(x_, r_); -} - -PetscReal FeastEPSSolver::GetBackwardScaling(PetscScalar eig_) const -{ - // Make sure not to use norms from scaling as this can be confusing if they are different. - // Note that SLEPc uses ||.||∞, not Frobenius. - if (normK <= 0.0) - { - normK = opK->NormInf(); - } - if (normM <= 0.0) - { - normM = opM->NormInf(); - } - return normK + PetscAbsScalar(eig_) * normM; -} - -// PEP specific methods - -FeastPEPSolver::FeastPEPSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, - int np, int print_lvl) - : FeastEigenSolver(comm, iodata, spaceop, np, print_lvl) -{ - opK = opC = opM = nullptr; - normK = normC = normM = 0.0; - AQ = BQ = AQ0 = XQ = XQ0 = nullptr; -} - -void FeastPEPSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_VERIFY(!opK || opK->GetNumRows() == K.GetNumRows(), - "Invalid modification of eigenvalue problem size!"); - bool first = (opK == nullptr); - opK = &K; - opC = &C; - opM = &M; - if (first && type != ScaleType::NONE) - { - normK = opK->Norm2(); - normC = opC->Norm2(); - normM = opM->Norm2(); - MFEM_VERIFY(normK > 0.0 && normC > 0.0 && normM > 0.0, - "Invalid matrix norms for PEP scaling!"); - gamma = std::sqrt(normK / normM); - delta = 2.0 / (normK + gamma * normC); - } -} - -int FeastPEPSolver::Solve() -{ - // Check inputs. - CheckParameters(); - MFEM_VERIFY(opK && opC && opM, "Operators are not set for FeastPEPSolver!"); - - // Allocate storage for eigenvectors. - MPI_Comm comm = opK->GetComm(); - if (!X) - { - X = new petsc::PetscDenseMatrix(comm, opK->GetNumRows(), PETSC_DECIDE, PETSC_DECIDE, m0, - nullptr); - } - if (!r0) - { - r0 = new petsc::PetscParVector(*opK); - } - - // Allocate sequential matrices for the projected linearized generalized eigenvalue - // problems at each iteration, and associated eigenvectors. - AQ = new petsc::PetscDenseMatrix(2 * mQ, 2 * mQ, nullptr); - BQ = new petsc::PetscDenseMatrix(2 * mQ, 2 * mQ, nullptr); - AQ0 = new petsc::PetscDenseMatrix(mQ, mQ, nullptr); - AQ0->SetSymmetric(opK->GetSymmetric() && opC->GetSymmetric() && opM->GetSymmetric()); - XQ = new petsc::PetscDenseMatrix(2 * mQ, 2 * mQ, nullptr); - XQ0 = new petsc::PetscDenseMatrix(mQ, m0, nullptr); - - // Create region object for integration contour and configure the linear solvers at each - // integration point. The linear solves use the unscaled space. - PetscScalar *z, *w; - RG rg = ConfigureRG(z, w); - Mpi::Print(comm, "Quadrature points for FEAST contour\n"); - for (int k = 0; k < static_cast(opInv.size()); k++) - { - Mpi::Print(comm, " {:d}: zₖ = {:+.3e}{:+.3e}i\n", k + 1, PetscRealPart(z[k]) * gamma, - PetscImaginaryPart(z[k]) * gamma); - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opC, *opM); -#if 0 - int l = 0; - for (; l < k; l++) - { - constexpr double atol = 1.0e-9; - if (PetscAbsReal(PetscAbsScalar(z[k]) - PetscAbsScalar(z[l])) < atol) - { - // Reuse preconditioner assembled for contour point with same real magnitude. - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opC, *opM, opInv[l].pc); - break; - } - } - if (l == k) - { - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opC, *opM); - } -#endif - } - Mpi::Print(comm, "\n"); - delete[] z; - delete[] w; - - // Solve the quadratic eigenvalue problem. - int nconv = SolveInternal(rg); - - // Cleanup. - PalacePetscCall(RGDestroy(&rg)); - delete AQ; - delete BQ; - delete AQ0; - delete XQ; - delete XQ0; - - return nconv; -} - -void FeastPEPSolver::SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, - petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) -{ - // Form mQ x mQ projected matrices and construct the canonincal linearization: - // L₀ = [ 0 I ] L₁ = [ I 0 ] - // [ -K -C ] , [ 0 M ] . - AQ->Scale(0.0); - BQ->Scale(0.0); - PetscScalar *pAQ = AQ->GetArray(); - PetscScalar *pBQ = BQ->GetArray(); - for (PetscInt i = 0; i < mQ; i++) - { - pAQ[i + 2 * mQ * (i + mQ)] = 1.0; - } - { - // AQ0->Scale(0.0); - BVMatProjectInternal(Q_, *opK, *AQ0, R_, delta); - - const PetscScalar *pAQ0 = AQ0->GetArrayRead(); - for (PetscInt j = 0; j < mQ; j++) - { - for (PetscInt i = 0; i < mQ; i++) - { - pAQ[i + mQ + 2 * mQ * j] = -pAQ0[i + mQ * j]; - } - } - AQ0->RestoreArrayRead(pAQ0); - } - { - // AQ0->Scale(0.0); - BVMatProjectInternal(Q_, *opC, *AQ0, R_, delta * gamma); - - const PetscScalar *pAQ0 = AQ0->GetArrayRead(); - for (PetscInt j = 0; j < mQ; j++) - { - for (PetscInt i = 0; i < mQ; i++) - { - pAQ[i + mQ + 2 * mQ * (j + mQ)] = -pAQ0[i + mQ * j]; - } - } - AQ0->RestoreArrayRead(pAQ0); - } - for (PetscInt i = 0; i < mQ; i++) - { - pBQ[i + 2 * mQ * i] = 1.0; - } - { - // AQ0->Scale(0.0); - BVMatProjectInternal(Q_, *opM, *AQ0, R_, delta * gamma * gamma); - - const PetscScalar *pAQ0 = AQ0->GetArrayRead(); - for (PetscInt j = 0; j < mQ; j++) - { - PalacePetscCall(PetscArraycpy(pBQ + mQ + 2 * mQ * (j + mQ), pAQ0 + mQ * j, mQ)); - } - AQ0->RestoreArrayRead(pAQ0); - } - - // Solve projected EVP using LAPACK wrapper. - PetscBLASInt info, n, lwork, lrwork; - PetscScalar *work, *alpha, *beta; - PetscReal *rwork; - PetscBLASIntCast(2 * mQ, &n); - lwork = 2 * n; - lrwork = 8 * n; - work = new PetscScalar[lwork]; - rwork = new PetscReal[lrwork]; - alpha = new PetscScalar[n]; - beta = new PetscScalar[n]; - - PetscScalar *pXQ = XQ->GetArray(); - LAPACKggev_("N", "V", &n, pAQ, &n, pBQ, &n, alpha, beta, nullptr, &n, pXQ, &n, work, - &lwork, rwork, &info); - AQ->RestoreArray(pAQ); - BQ->RestoreArray(pBQ); - XQ->RestoreArray(pXQ); - - // Sort eigenpairs by distance to center. From the linearization, we extract the - // eigenvectors from the top block and normalize later on. - for (PetscBLASInt i = 0; i < n; i++) - { - alpha[i] /= beta[i]; - } - - // Debug - // Mpi::Print(Q_.GetComm(), "Before sort, eigenvalues:\n"); - // for (PetscBLASInt i = 0; i < n; i++) - // { - // Mpi::Print(Q_.GetComm(), " {:+e}{:+e}i\n", - // PetscRealPart(alpha[i]*gamma), - // PetscImaginaryPart(alpha[i]*gamma)); - // } - - PetscInt *sort = SortEigenvalues(alpha, n); - for (PetscInt i = 0; i < m0; i++) - { - eig_[i] = alpha[sort[i]]; - const PetscScalar *pXQ = XQ->GetArrayRead(); - PetscScalar *pXQ0 = XQ0->GetArray(); - PalacePetscCall(PetscArraycpy(pXQ0 + mQ * i, pXQ + 2 * mQ * sort[i], mQ)); - XQ->RestoreArrayRead(pXQ); - XQ0->RestoreArray(pXQ0); - } - - // Cleanup. - delete[] sort; - delete[] work; - delete[] rwork; - delete[] alpha; - delete[] beta; - - // Reconstruct the first m0 high-dimensional eigenvectors. - const PetscScalar *pQ = Q_.GetArrayRead(); - PetscScalar *pX = X_.GetArray(); - petsc::PetscDenseMatrix locQ(X_.GetNumRows(), mQ, const_cast(pQ)); - petsc::PetscDenseMatrix locX(X_.GetNumRows(), m0, pX); - locQ.MatMult(*XQ0, locX); - Q_.RestoreArrayRead(pQ); - X_.RestoreArray(pX); -} - -void FeastPEPSolver::GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const -{ - // r = P(λ) x = (K + λ C + λ² M) x for eigenvalue λ. - opM->Mult(x_, r_); - r_.Scale(eig_); - opC->MultAdd(x_, r_); - r_.Scale(eig_); - opK->MultAdd(x_, r_); -} - -PetscReal FeastPEPSolver::GetBackwardScaling(PetscScalar eig_) const -{ - // Make sure not to use norms from scaling as this can be confusing if they are different. - // Note that SLEPc uses ||.||∞, not the 2-norm. - if (normK <= 0.0) - { - normK = opK->Norm2(); - } - if (normC <= 0.0) - { - normC = opC->Norm2(); - } - if (normM <= 0.0) - { - normM = opM->Norm2(); - } - PetscReal t = PetscAbsScalar(eig_); - return normK + t * normC + t * t * normM; -} - -} // namespace palace::feast - -PetscErrorCode __mat_apply_FEAST_EPS(Mat A, Vec x, Vec y) -{ - // Apply the operator: K - zₖ M . - palace::feast::internal::FeastLinearSolver *feast; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&feast)); - MFEM_VERIFY(feast, "Invalid PETSc shell matrix context for FEAST!"); - { - feast->opM->Mult(xx, yy); - yy.Scale(-feast->zk); - feast->opK->MultAdd(xx, yy); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_apply_FEAST_PEP(Mat A, Vec x, Vec y) -{ - // Apply the operator: K + zₖ C + zₖ² M . - palace::feast::internal::FeastLinearSolver *feast; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&feast)); - MFEM_VERIFY(feast, "Invalid PETSc shell matrix context for FEAST!"); - { - feast->opM->Mult(xx, yy); - yy.Scale(feast->zk); - feast->opC->MultAdd(xx, yy); - yy.Scale(feast->zk); - feast->opK->MultAdd(xx, yy); - } - PetscFunctionReturn(0); -} - -#endif - -#endif diff --git a/palace/linalg/feast.hpp b/palace/linalg/feast.hpp deleted file mode 100644 index 7d63c082d..000000000 --- a/palace/linalg/feast.hpp +++ /dev/null @@ -1,291 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LINALG_FEAST_HPP -#define PALACE_LINALG_FEAST_HPP - -#if 0 // XX TODO DISABLE FEAST FOR NOW - -#if defined(PALACE_WITH_SLEPC) - -#include "linalg/petsc.hpp" - -#if !defined(PETSC_USE_COMPLEX) -#error "FEAST eigenvalue solver requires PETSc built with complex scalars!" -#endif - -#include -#include "linalg/eps.hpp" - -// Forward declarations of SLEPc objects. -typedef struct _p_RG *RG; - -namespace palace -{ - -class DivFreeSolver; -class IoData; -class KspSolver; -class SpaceOperator; - -namespace feast -{ - -namespace internal -{ - -class FeastLinearSolver; - -} // namespace internal - -// -// A native implementation of the FEAST eigenvalue solver, with residual- inverse iteration -// for linear and quadratic eigenvalue problems with complex- symmetric matrices. Depends -// on SLEPc for some functionality like quadrature and solving projected the eigenvalue -// problem. -// -class FeastEigenSolver : public EigenSolverBase -{ -protected: - // Control print level for debugging. - int print; - - // Status variable. - int info; - - // Number eigenvalues to be computed. Also the subspace and projected system dimensions. - PetscInt nev, m0, mQ; - - // Number of moments to consider for subspace construction. - PetscInt M; - - // Relative eigenvalue error convergence tolerance for the solver. - PetscReal rtol; - - // Maximum number of FEAST iterations. - PetscInt max_it; - - // Variables for scaling, from Higham et al., IJNME 2008. - PetscReal gamma, delta; - - // Parameters defining the integration contour. - PetscScalar bl, tr; - bool real_threshold, imag_threshold; - - // Storage for computed eigenvalues. - PetscScalar *eig; - PetscInt *perm; - - // Storage for computed eigenvectors. - petsc::PetscDenseMatrix *X; - - // Storage for computed residual norms. - mutable PetscReal *res; - - // Workspace vector for initial space and residual calculations. - mutable petsc::PetscParVector *r0; - - // Solvers for Linear systems for the different quadrature points along the contour. - std::vector opInv; - - // Reference to solver for projecting an intermediate vector onto a divergence-free space - // (not owned). - const DivFreeSolver *opProj; - - // Reference to matrix used for weighted inner products (not owned). May be nullptr, in - // which case identity is used. - const petsc::PetscParMatrix *opB; - - // Perform the FEAST solve. - int SolveInternal(RG rg); - - // Helper routine for parameter checking. - void CheckParameters(); - - // Helper routine to construct the integration contour. - RG ConfigureRG(PetscScalar *&z, PetscScalar *&w); - - // Helper routine for sorting eigenvalues of the projected problem. - PetscInt *SortEigenvalues(const PetscScalar *eig_, PetscInt m) const; - - // Helper routine for computing the Qᴴ A Q matrix product. - void BVMatProjectInternal(const petsc::PetscDenseMatrix &Q, - const petsc::PetscParMatrix &A, petsc::PetscDenseMatrix &Ar, - petsc::PetscDenseMatrix &R, PetscReal scale) const; - - // Helper routine for solving the projected eigenvalue problem. - virtual void SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, - petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) = 0; - - // Helper routine for computing the eigenpair residual. - virtual void GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const = 0; - - // Helper routine for computing the backward error. - virtual PetscReal GetBackwardScaling(PetscScalar eig_) const = 0; - - // Return problem type name. - virtual const char *GetName() const = 0; - -public: - FeastEigenSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, int np, - int print_lvl); - ~FeastEigenSolver() override; - - // Set operators for the generalized eigenvalue problem or for the quadratic polynomial - // eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, - ScaleType type) override; - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; - - // Set the projection operator for the divergence-free constraint. - void SetProjector(const DivFreeSolver &divfree) override; - - // Get scaling factors used by the solver. - double GetScalingGamma() const override { return (double)gamma; } - double GetScalingDelta() const override { return (double)delta; } - - // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override; - - // Set solver tolerance. - void SetTol(double tol) override; - - // Set maximum number of FEAST iterations. - void SetMaxIter(int maxits) override; - - // Set up region for contour integration. Region is defined by the bottom- left and - // top-right points in the complex plane. - void SetContour(double blr, double bli, double trr, double tri, - bool filter_small_real = false, bool filter_small_imag = false); - - // Set optional B matrix used for weighted inner products. This must be set explicitly - // even for generalized problems, otherwise the identity will be used. - void SetBMat(const petsc::PetscParMatrix &B) override; - - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; - - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. - int Solve() override = 0; - - // Return number of linear solves and linear solver iterations performed during the FEAST - // solve. - int GetTotalKspMult() const; - int GetTotalKspIter() const; - - // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override; - - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override; - - // Get the corresponding eigenpair error. - void GetError(int i, ErrorType type, double &err) const override; - - // Methods not relevant to the FEAST eigenvalue solver. - void SetLinearSolver(const KspSolver &ksp) override - { - MFEM_ABORT("SetLinearSolver not defined for FeastEigenSolver!"); - } - void SetWhichEigenpairs(WhichType type) override - { - MFEM_ABORT("SetWhichEigenpairs not defined for FeastEigenSolver!"); - } - void SetShiftInvert(double tr, double ti, bool precond = false) override - { - MFEM_ABORT("SetShiftInvert not defined for FeastEigenSolver!"); - } -}; - -// Generalized eigenvalue problem solver: K x = λ M x . -class FeastEPSSolver : public FeastEigenSolver -{ -private: - // References to matrices defining the generalized eigenvalue problem (not owned). - const petsc::PetscParMatrix *opK, *opM; - - // Operator norms for scaling. - mutable PetscReal normK, normM; - - // Sequential workspace matrices for projected problem. - petsc::PetscDenseMatrix *AQ, *BQ, *XQ, *XQ0; - -protected: - // Helper routine for solving the projected eigenvalue problem. - void SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) override; - - // Helper routine for computing the eigenpair residuals: R = K X - M X Λ . - void GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const override; - - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig_) const override; - - // Return problem type name. - const char *GetName() const override { return "EPS"; } - -public: - FeastEPSSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, int np, - int print_lvl); - - // Set operators for the generalized eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, - ScaleType type) override; - - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. - int Solve() override; -}; - -// Quadratic eigenvalue problem solver: P(λ) x = (K + λ C + λ² M) x = 0 . -class FeastPEPSolver : public FeastEigenSolver -{ -private: - // References to matrices defining the quadratic eigenvalue problem (not owned). - const petsc::PetscParMatrix *opK, *opC, *opM; - - // Operator norms for scaling. - mutable PetscReal normK, normC, normM; - - // Sequential workspace matrices for projected problem. - petsc::PetscDenseMatrix *AQ, *BQ, *AQ0, *XQ, *XQ0; - -protected: - // Helper routine for solving the projected eigenvalue problem. - void SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) override; - - // Helper routine for computing the eigenpair residuals: R = P(Λ, X) . - void GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const override; - - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig_) const override; - - // Return problem type name. - const char *GetName() const override { return "PEP"; } - -public: - FeastPEPSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, int np, - int print_lvl); - - // Set operators for the quadratic polynomial eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; - - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. - int Solve() override; -}; - -} // namespace feast - -} // namespace palace - -#endif - -#endif - -#endif // PALACE_LINALG_FEAST_HPP diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 2db83860f..5de18df9b 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -87,9 +87,6 @@ void GeometricMultigridSolver::SetOperator( } else { - - // XX TODO TEST IF THIS ACTUALLY WORKS AT RUNTIME... - B_[l]->SetOperator(*ops[l]); } } diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index cf35350c7..3485c0018 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -49,7 +49,11 @@ class ParOperator : public Operator // Construct the parallel operator, inheriting ownership of the local operator. ParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict = false); + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &fespace) + : ParOperator(std::move(A), fespace, fespace, false) + { + } // Get access to the underlying local (L-vector) operator. const Operator &LocalOperator() const @@ -91,6 +95,14 @@ class ParOperator : public Operator return trial_dbc_tdof_list_; } + // Get access to the finite element spaces associated with the operator. + const mfem::ParFiniteElementSpace &GetFESpace() const + { + MFEM_VERIFY(&trial_fespace_ == &test_fespace_ && height == width, + "GetFESpace should only be used for square ParOperator!"); + return trial_fespace_; + } + // A call to ParallelAssemble will typically free the memory associated with the local // operator as it is no longer required. When the save flag is set, the local operator // will not be deleted during parallel assembly. diff --git a/palace/linalg/petsc.cpp b/palace/linalg/petsc.cpp deleted file mode 100644 index 2dc08d0ba..000000000 --- a/palace/linalg/petsc.cpp +++ /dev/null @@ -1,2555 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "petsc.hpp" - -#include -#include -#include -// #include "linalg/hypre.hpp" -#include "linalg/slepc.hpp" -#include "utils/communication.hpp" - -static PetscErrorCode __mat_shell_init(Mat); -static PetscErrorCode __mat_shell_destroy(Mat); -static PetscErrorCode __mat_shell_apply(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_transpose(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_hermitian_transpose(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_add(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_transpose_add(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_hermitian_transpose_add(Mat, Vec, Vec); -#if defined(PETSC_USE_COMPLEX) -static PetscErrorCode __mat_shell_apply(Mat, const mfem::Vector &, Vec); -static PetscErrorCode __mat_shell_apply_transpose(Mat, const mfem::Vector &, Vec); -static PetscErrorCode __mat_shell_apply_hermitian_transpose(Mat, const mfem::Vector &, Vec); -#endif -static PetscErrorCode __mat_shell_get_diagonal(Mat, Vec); -// static PetscErrorCode __mat_shell_shift(Mat, PetscScalar); -// static PetscErrorCode __mat_shell_scale(Mat, PetscScalar); -// static PetscErrorCode __mat_shell_conj(Mat); -// static PetscErrorCode __mat_shell_axpy(Mat, PetscScalar, Mat, MatStructure); -// static PetscErrorCode __mat_shell_norm(Mat, NormType, PetscReal *); -static PetscErrorCode __mat_shell_real_part(Mat); -static PetscErrorCode __mat_shell_imag_part(Mat); -static PetscErrorCode __mat_convert_hypreParCSR_AIJ(hypre_ParCSRMatrix *, Mat *); -static PetscErrorCode __array_container_destroy(void *); - -namespace palace::petsc -{ - -using mfem::ForallWrap; - -void Initialize(int &argc, char **&argv, const char rc_file[], const char help[]) -{ - PalacePetscCall(PetscInitialize(&argc, &argv, rc_file, help)); -} - -void Finalize() -{ - PalacePetscCall(PetscFinalize()); -} - -// PetscScatter methods -PetscScatter::PetscScatter(PetscScatter::Type type, const PetscParVector &x, - std::unique_ptr &y) -{ - Vec yy; - if (type == Type::TO_ZERO) - { - PalacePetscCall(VecScatterCreateToZero(x, &ctx, &yy)); - } - else // type == Type::TO_ALL - { - PalacePetscCall(VecScatterCreateToAll(x, &ctx, &yy)); - } - y = std::make_unique(yy, false); -} - -PetscScatter::~PetscScatter() -{ - PalacePetscCall(VecScatterDestroy(&ctx)); -} - -void PetscScatter::Forward(const PetscParVector &x, PetscParVector &y) -{ - PalacePetscCall(VecScatterBegin(ctx, x, y, INSERT_VALUES, SCATTER_FORWARD)); - PalacePetscCall(VecScatterEnd(ctx, x, y, INSERT_VALUES, SCATTER_FORWARD)); -} - -void PetscScatter::Reverse(const PetscParVector &x, PetscParVector &y) -{ - PalacePetscCall(VecScatterBegin(ctx, x, y, INSERT_VALUES, SCATTER_REVERSE)); - PalacePetscCall(VecScatterEnd(ctx, x, y, INSERT_VALUES, SCATTER_REVERSE)); -} - -// PetscParVector methods - -PetscParVector::PetscParVector(const PetscParMatrix &A, bool transpose) -{ - if (!transpose) - { - PalacePetscCall(MatCreateVecs(A, &x, nullptr)); - } - else - { - PalacePetscCall(MatCreateVecs(A, nullptr, &x)); - } -} - -PetscParVector::PetscParVector(MPI_Comm comm, const mfem::Vector &y) -{ - PalacePetscCall(VecCreate(comm, &x)); - PalacePetscCall(VecSetSizes(x, y.Size(), PETSC_DECIDE)); - PalacePetscCall(VecSetType(x, VECSTANDARD)); - SetFromVector(y); -} - -PetscParVector::PetscParVector(const mfem::Vector &y) -{ - PalacePetscCall(VecCreateSeq(PETSC_COMM_SELF, y.Size(), &x)); - SetFromVector(y); -} - -#if defined(PETSC_USE_COMPLEX) -PetscParVector::PetscParVector(MPI_Comm comm, const mfem::Vector &yr, - const mfem::Vector &yi) -{ - MFEM_VERIFY(yr.Size() == yi.Size(), - "Mismatch in size of real and imaginary vector parts!"); - PalacePetscCall(VecCreate(comm, &x)); - PalacePetscCall(VecSetSizes(x, yr.Size(), PETSC_DECIDE)); - PalacePetscCall(VecSetType(x, VECSTANDARD)); - SetFromVectors(yr, yi); -} - -PetscParVector::PetscParVector(const mfem::Vector &yr, const mfem::Vector &yi) -{ - MFEM_VERIFY(yr.Size() == yi.Size(), - "Mismatch in size of real and imaginary vector parts!"); - PalacePetscCall(VecCreateSeq(PETSC_COMM_SELF, yr.Size(), &x)); - SetFromVectors(yr, yi); -} -#endif - -PetscParVector::PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N) -{ - PalacePetscCall(VecCreateMPI(comm, n, N, &x)); -} - -// PetscParVector::PetscParVector(PetscInt n) -// { -// PalacePetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &x)); -// } - -PetscParVector::PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N, PetscScalar *data) -{ - PalacePetscCall(VecCreateMPIWithArray(comm, 1, n, N, data, &x)); -} - -PetscParVector::PetscParVector(PetscInt n, PetscScalar *data) -{ - PalacePetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n, data, &x)); -} - -PetscParVector::PetscParVector(const PetscParVector &y) -{ - PalacePetscCall(VecDuplicate(y, &x)); - Copy(y); -} - -PetscParVector::PetscParVector(Vec y, bool ref) -{ - x = y; - if (ref) - { - PalacePetscCall(PetscObjectReference(reinterpret_cast(y))); - } -} - -PetscParVector::~PetscParVector() -{ - PalacePetscCall(VecDestroy(&x)); -} - -void PetscParVector::Copy(const PetscParVector &y) -{ - MFEM_VERIFY(GetSize() == y.GetSize(), "Invalid size!"); - PalacePetscCall(VecCopy(y, x)); -} - -void PetscParVector::GetToVector(mfem::Vector &v, PetscInt start, PetscInt end) const -{ - const PetscScalar *xv; - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetSize(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetSize() && v.Size() == end - start, - "Invalid start/end indices for vector extraction!"); - PalacePetscCall(VecGetArrayRead(x, &xv)); - auto vv = v.Write(); -#if defined(PETSC_USE_COMPLEX) - MFEM_FORALL(i, end - start, { vv[i] = PetscRealPart(xv[i + start]); }); -#else - MFEM_FORALL(i, end - start, { vv[i] = xv[i + start]; }); -#endif - PalacePetscCall(VecRestoreArrayRead(x, &xv)); -} - -void PetscParVector::SetFromVector(const mfem::Vector &v) -{ - PetscScalar *xv; - MFEM_VERIFY(GetSize() == v.Size(), "Invalid size!"); - PalacePetscCall(VecGetArray(x, &xv)); - const auto vv = v.Read(); - MFEM_FORALL(i, GetSize(), { xv[i] = vv[i]; }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -void PetscParVector::AddFromVector(const mfem::Vector &v) -{ - PetscScalar *xv; - MFEM_VERIFY(GetSize() == v.Size(), "Invalid size!"); - PalacePetscCall(VecGetArray(x, &xv)); - const auto vv = v.Read(); - MFEM_FORALL(i, GetSize(), { xv[i] += vv[i]; }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParVector::GetToVectors(mfem::Vector &vr, mfem::Vector &vi, PetscInt start, - PetscInt end) const -{ - const PetscScalar *xv; - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetSize(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetSize() && vr.Size() == end - start && - vi.Size() == end - start, - "Invalid start/end indices for vector extraction!"); - PalacePetscCall(VecGetArrayRead(x, &xv)); - auto vvr = vr.Write(); - auto vvi = vi.Write(); - MFEM_FORALL(i, end - start, { - vvr[i] = PetscRealPart(xv[i + start]); - vvi[i] = PetscImaginaryPart(xv[i + start]); - }); - PalacePetscCall(VecRestoreArrayRead(x, &xv)); -} - -void PetscParVector::SetFromVectors(const mfem::Vector &vr, const mfem::Vector &vi) -{ - PetscScalar *xv; - MFEM_VERIFY(GetSize() == vr.Size() || GetSize() == vi.Size(), "Invalid size!"); - PalacePetscCall(VecGetArray(x, &xv)); - const auto vvr = vr.Read(); - const auto vvi = vi.Read(); - MFEM_FORALL(i, GetSize(), { - // xv[i] = vvr[i] + PETSC_i * vvi[i]; - reinterpret_cast(&xv[i])[0] = vvr[i]; - reinterpret_cast(&xv[i])[1] = vvi[i]; - }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -void PetscParVector::AddFromVectors(const mfem::Vector &vr, const mfem::Vector &vi) -{ - PetscScalar *xv; - MFEM_VERIFY(GetSize() == vr.Size() || GetSize() == vi.Size(), "Invalid size!"); - PalacePetscCall(VecGetArray(x, &xv)); - const auto vvr = vr.Read(); - const auto vvi = vi.Read(); - MFEM_FORALL(i, GetSize(), { - // xv[i] += vvr[i] + PETSC_i * vvi[i]; - reinterpret_cast(&xv[i])[0] += vvr[i]; - reinterpret_cast(&xv[i])[1] += vvi[i]; - }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} -#endif - -PetscScalar *PetscParVector::GetArray() -{ - PetscScalar *data; - PalacePetscCall(VecGetArray(x, &data)); - return data; -} - -const PetscScalar *PetscParVector::GetArrayRead() const -{ - const PetscScalar *data; - PalacePetscCall(VecGetArrayRead(x, &data)); - return data; -} - -void PetscParVector::RestoreArray(PetscScalar *data) -{ - PalacePetscCall(VecRestoreArray(x, &data)); -} - -void PetscParVector::RestoreArrayRead(const PetscScalar *data) const -{ - PalacePetscCall(VecRestoreArrayRead(x, &data)); -} - -void PetscParVector::PlaceArray(const PetscScalar *data) -{ - PalacePetscCall(VecPlaceArray(x, data)); -} - -void PetscParVector::ResetArray() -{ - PalacePetscCall(VecResetArray(x)); -} - -PetscInt PetscParVector::GetSize() const -{ - PetscInt n; - PalacePetscCall(VecGetLocalSize(x, &n)); - return n; -} - -PetscInt PetscParVector::GetGlobalSize() const -{ - PetscInt N; - PalacePetscCall(VecGetSize(x, &N)); - return N; -} - -void PetscParVector::Resize(PetscInt n, bool copy) -{ - Vec y; - const PetscScalar *xv; - PetscScalar *yv; - PetscInt n0 = GetSize(); - VecType type; - if (n0 == n) - { - return; - } - PalacePetscCall(VecGetType(x, &type)); - PalacePetscCall(VecCreate(GetComm(), &y)); - PalacePetscCall(VecSetSizes(y, n, PETSC_DECIDE)); - PalacePetscCall(VecSetType(y, type)); - if (copy) - { - PalacePetscCall(VecGetArrayRead(x, &xv)); - PalacePetscCall(VecGetArray(y, &yv)); - MFEM_FORALL(i, std::min(n, n0), { yv[i] = xv[i]; }); - PalacePetscCall(VecRestoreArrayRead(x, &xv)); - PalacePetscCall(VecRestoreArray(y, &yv)); - } - PalacePetscCall(VecDestroy(&x)); - x = y; -} - -void PetscParVector::SetZero() -{ - PalacePetscCall(VecZeroEntries(x)); -} - -void PetscParVector::SetRandom() -{ - PetscRandom rand; - MPI_Comm comm = GetComm(); - PalacePetscCall(PetscRandomCreate(comm, &rand)); -#if defined(PETSC_USE_COMPLEX) - PalacePetscCall(PetscRandomSetInterval(rand, -1.0 - PETSC_i, 1.0 + PETSC_i)); -#else - PalacePetscCall(PetscRandomSetInterval(rand, -1.0, 1.0)); -#endif - PalacePetscCall(VecSetRandom(x, rand)); - PalacePetscCall(PetscRandomDestroy(&rand)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParVector::SetRandomReal() -{ - PetscRandom rand; - MPI_Comm comm = GetComm(); - PalacePetscCall(PetscRandomCreate(comm, &rand)); - PalacePetscCall(PetscRandomSetInterval(rand, -1.0, 1.0)); - PalacePetscCall(VecSetRandom(x, rand)); - PalacePetscCall(PetscRandomDestroy(&rand)); -} -#endif - -void PetscParVector::SetRandomSign(bool init) -{ - PetscScalar *xv; - if (!init) - { - SetRandomReal(); - } - PalacePetscCall(VecGetArray(x, &xv)); - MFEM_FORALL(i, GetSize(), { - // Leave zeros alone. - xv[i] = - (PetscRealPart(xv[i]) > 0.0) ? 1.0 : ((PetscRealPart(xv[i]) < 0.0) ? -1.0 : 0.0); - }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -PetscParVector &PetscParVector::operator=(PetscScalar s) -{ - PalacePetscCall(VecSet(x, s)); - return *this; -} - -void PetscParVector::Scale(PetscScalar s) -{ - PalacePetscCall(VecScale(x, s)); -} - -void PetscParVector::Shift(PetscScalar s) -{ - PalacePetscCall(VecShift(x, s)); -} - -void PetscParVector::Abs() -{ - PalacePetscCall(VecAbs(x)); -} - -void PetscParVector::SqrtAbs() -{ - PalacePetscCall(VecSqrtAbs(x)); -} - -void PetscParVector::Inv() -{ - PalacePetscCall(VecReciprocal(x)); -} - -void PetscParVector::InvSqrt() -{ - PalacePetscCall(VecPow(x, -0.5)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParVector::Conj() -{ - PalacePetscCall(VecConjugate(x)); -} - -void PetscParVector::GetRealPart() -{ - PalacePetscCall(VecRealPart(x)); -} - -void PetscParVector::GetImagPart() -{ - PalacePetscCall(VecImaginaryPart(x)); -} -#endif - -PetscReal PetscParVector::Normalize() -{ - PetscReal norm; - PalacePetscCall(VecNormalize(x, &norm)); - return norm; -} - -PetscReal PetscParVector::Normalize(const PetscParMatrix &B, PetscParVector &Bx) -{ - B.Mult(*this, Bx); - PetscReal norm = - PetscSqrtReal(PetscAbsScalar(Bx.Dot(*this))); // For SPD B, xᴴ B x is real - Scale(1.0 / norm); - return norm; -} - -PetscReal PetscParVector::Norml2() const -{ - PetscReal norm; - PalacePetscCall(VecNorm(x, NORM_2, &norm)); - return norm; -} - -PetscReal PetscParVector::Normlinf() const -{ - PetscReal norm; - PalacePetscCall(VecNorm(x, NORM_INFINITY, &norm)); - return norm; -} - -void PetscParVector::ZeroRows(const mfem::Array &rows) -{ - PetscScalar *xv; - PalacePetscCall(VecGetArray(x, &xv)); - MFEM_FORALL(i, rows.Size(), { xv[rows[i]] = 0.0; }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -void PetscParVector::PointwiseMult(const PetscParVector &y, bool replace_zeros) -{ - MFEM_VERIFY(GetSize() == y.GetSize(), "Invalid size!"); - if (replace_zeros) - { - PetscScalar *yv; - PalacePetscCall(VecGetArray(y, &yv)); - MFEM_FORALL(i, GetSize(), { - if (yv[i] == 0.0) - { - yv[i] = 1.0; - } - }); - PalacePetscCall(VecRestoreArray(y, &yv)); - } - PalacePetscCall(VecPointwiseMult(x, x, y)); -} - -void PetscParVector::AXPY(PetscScalar alpha, const PetscParVector &y) -{ - MFEM_VERIFY(GetSize() == y.GetSize(), "Invalid size!"); - PalacePetscCall(VecAXPY(x, alpha, y)); -} - -void PetscParVector::AXPBY(PetscScalar alpha, const PetscParVector &y, PetscScalar beta) -{ - MFEM_VERIFY(GetSize() == y.GetSize(), "Invalid size!"); - PalacePetscCall(VecAXPBY(x, alpha, beta, y)); -} - -void PetscParVector::AXPBYPCZ(PetscScalar alpha, const PetscParVector &y, PetscScalar beta, - const PetscParVector &z, PetscScalar gamma) -{ - MFEM_VERIFY(GetSize() == y.GetSize() && GetSize() == z.GetSize(), "Invalid size!"); - PalacePetscCall(VecAXPBYPCZ(x, alpha, beta, gamma, y, z)); -} - -PetscScalar PetscParVector::Dot(const PetscParVector &y) const -{ - PetscScalar val; - PalacePetscCall(VecDot(x, y, &val)); - return val; -} - -PetscScalar PetscParVector::TransposeDot(const PetscParVector &y) const -{ - PetscScalar val; - PalacePetscCall(VecTDot(x, y, &val)); - return val; -} - -void PetscParVector::Print(const char *fname, bool binary) const -{ - if (fname) - { - PetscViewer view; - if (binary) - { - PalacePetscCall( - PetscViewerBinaryOpen(PetscObjectComm(reinterpret_cast(x)), fname, - FILE_MODE_WRITE, &view)); - } - else - { - PalacePetscCall(PetscViewerASCIIOpen( - PetscObjectComm(reinterpret_cast(x)), fname, &view)); - } - PalacePetscCall(VecView(x, view)); - PalacePetscCall(PetscViewerDestroy(&view)); - } - else - { - PalacePetscCall(VecView(x, nullptr)); - } -} - -MPI_Comm PetscParVector::GetComm() const -{ - return x ? PetscObjectComm(reinterpret_cast(x)) : MPI_COMM_NULL; -} - -// PetscParMatrix methods - -PetscParMatrix::PetscParMatrix(const PetscParMatrix &B) -{ - PalacePetscCall(MatDuplicate(B, MAT_COPY_VALUES, &A)); -} - -PetscParMatrix::PetscParMatrix(Mat B, bool ref) -{ - A = B; - if (ref) - { - PalacePetscCall(PetscObjectReference(reinterpret_cast(B))); - } -} - -PetscParMatrix::~PetscParMatrix() -{ - PalacePetscCall(MatDestroy(&A)); -} - -void PetscParMatrix::SetSymmetric(bool sym) -{ - PalacePetscCall(MatSetOption(A, MAT_SYMMETRIC, sym ? PETSC_TRUE : PETSC_FALSE)); - PalacePetscCall(MatSetOption(A, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); -} - -void PetscParMatrix::SetHermitian(bool herm) -{ - PalacePetscCall(MatSetOption(A, MAT_HERMITIAN, herm ? PETSC_TRUE : PETSC_FALSE)); - PalacePetscCall(MatSetOption(A, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); -} - -bool PetscParMatrix::GetSymmetric() const -{ - PetscBool flg, sym; - PalacePetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - return (flg == PETSC_TRUE && sym == PETSC_TRUE); -} - -bool PetscParMatrix::GetHermitian() const -{ - PetscBool flg, herm; - PalacePetscCall(MatIsHermitianKnown(A, &flg, &herm)); - return (flg == PETSC_TRUE && herm == PETSC_TRUE); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParMatrix::SetRealSymmetric() -{ - PalacePetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); - PalacePetscCall(MatSetOption(A, MAT_HERMITIAN, PETSC_TRUE)); - PalacePetscCall(MatSetOption(A, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); -} -#endif - -void PetscParMatrix::CopySymmetry(const PetscParMatrix &B) -{ - PalacePetscCall(MatPropagateSymmetryOptions(B, A)); -} - -PetscInt PetscParMatrix::GetNumRows() const -{ - PetscInt m; - PalacePetscCall(MatGetLocalSize(A, &m, nullptr)); - return m; -} - -PetscInt PetscParMatrix::GetNumCols() const -{ - PetscInt n; - PalacePetscCall(MatGetLocalSize(A, nullptr, &n)); - return n; -} - -PetscInt PetscParMatrix::GetGlobalNumRows() const -{ - PetscInt M; - PalacePetscCall(MatGetSize(A, &M, nullptr)); - return M; -} - -PetscInt PetscParMatrix::GetGlobalNumCols() const -{ - PetscInt N; - PalacePetscCall(MatGetSize(A, nullptr, &N)); - return N; -} - -PetscInt PetscParMatrix::NNZ() const -{ - MatInfo info; - PalacePetscCall(MatGetInfo(A, MAT_GLOBAL_SUM, &info)); - return (PetscInt)info.nz_used; -} - -PetscReal PetscParMatrix::NormF() const -{ - PetscReal norm; - PalacePetscCall(MatNorm(A, NORM_FROBENIUS, &norm)); - return norm; -} - -PetscReal PetscParMatrix::NormInf() const -{ - PetscReal norm; - PalacePetscCall(MatNorm(A, NORM_INFINITY, &norm)); - return norm; -} - -PetscReal PetscParMatrix::Norm2(PetscReal tol, PetscInt maxits) const -{ - // XX TODO: Add separate if condition using ARPACK estimate before reverting to power - // iteration. - if (tol == PETSC_DEFAULT) - { - tol = 1.0e-4; - } - if (maxits == PETSC_DEFAULT) - { - maxits = 100; - } - // #if defined(PALACE_WITH_SLEPC) - // return slepc::GetMaxSingularValue(*this, tol, maxits); - // #else - // Power iteration loop: ||A||₂² = λₙ(Aᴴ A) . - PetscInt it = 0; - PetscReal res = 0.0; - PetscReal l, l0 = 0.0; - PetscParVector u(*this), v(*this); - u.SetRandom(); - u.Normalize(); - while (it < maxits) - { - Mult(u, v); - if (GetHermitian()) - { - u.Copy(v); - } - else - { - MultHermitianTranspose(v, u); - } - l = u.Normalize(); - if (it > 0) - { - res = PetscAbsReal(l - l0) / PetscAbsReal(l0); - if (res < tol) - { - break; - } - } - l0 = l; - it++; - } - if (it >= maxits) - { - Mpi::Warning(GetComm(), - "Power iteration did not converge in {:d} " - "iterations, res = {:.3e}, lambda = {:.3e}!\n", - it, res, l); - } - return GetHermitian() ? l : PetscSqrtReal(l); - // #endif -} - -void PetscParMatrix::Scale(PetscScalar s) -{ - PalacePetscCall(MatScale(A, s)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParMatrix::Conj() -{ - PalacePetscCall(MatConjugate(A)); -} - -void PetscParMatrix::GetRealPart() -{ - PalacePetscCall(MatRealPart(A)); -} - -void PetscParMatrix::GetImagPart() -{ - PalacePetscCall(MatImaginaryPart(A)); -} -#endif - -void PetscParMatrix::AXPY(PetscScalar alpha, const PetscParMatrix &B, - PetscParMatrix::NNZStructure struc) -{ - switch (struc) - { - case NNZStructure::DIFFERENT: - PalacePetscCall(MatAXPY(A, alpha, B, DIFFERENT_NONZERO_PATTERN)); - break; - case NNZStructure::SAME: - PalacePetscCall(MatAXPY(A, alpha, B, SAME_NONZERO_PATTERN)); - break; - case NNZStructure::SUBSET: - PalacePetscCall(MatAXPY(A, alpha, B, SUBSET_NONZERO_PATTERN)); - break; - } -} - -void PetscParMatrix::Mult(const PetscParVector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(::MatMult(A, x, y)); -} - -void PetscParMatrix::MultAdd(const PetscParVector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(MatMultAdd(A, x, y, y)); -} - -void PetscParMatrix::MultTranspose(const PetscParVector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumRows() && y.GetSize() == GetNumCols(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(::MatMultTranspose(A, (Vec)x, (Vec)y)); -} - -void PetscParMatrix::MultTransposeAdd(const PetscParVector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumRows() && y.GetSize() == GetNumCols(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(MatMultTransposeAdd(A, x, y, y)); -} - -void PetscParMatrix::MultHermitianTranspose(const PetscParVector &x, - PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumRows() && y.GetSize() == GetNumCols(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(MatMultHermitianTranspose(A, x, y)); -} - -void PetscParMatrix::MultHermitianTransposeAdd(const PetscParVector &x, - PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumRows() && y.GetSize() == GetNumCols(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(MatMultHermitianTransposeAdd(A, x, y, y)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParMatrix::Mult(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PetscParVector xx(GetComm(), x); - Mult(xx, y); -} - -void PetscParMatrix::MultTranspose(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PetscParVector xx(GetComm(), x); - MultTranspose(xx, y); -} - -void PetscParMatrix::MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PetscParVector xx(GetComm(), x); - MultHermitianTranspose(xx, y); -} -#endif - -void PetscParMatrix::Print(const char *fname, bool binary) const -{ - if (fname) - { - PetscViewer view; - if (binary) - { - PalacePetscCall( - PetscViewerBinaryOpen(PetscObjectComm(reinterpret_cast(A)), fname, - FILE_MODE_WRITE, &view)); - } - else - { - PalacePetscCall(PetscViewerASCIIOpen( - PetscObjectComm(reinterpret_cast(A)), fname, &view)); - } - PalacePetscCall(MatView(A, view)); - PalacePetscCall(PetscViewerDestroy(&view)); - } - else - { - PalacePetscCall(MatView(A, nullptr)); - } -} - -std::unique_ptr -#if defined(PETSC_USE_COMPLEX) -PetscParMatrix::GetHypreParMatrix(PetscParMatrix::ExtractStructure struc) const -#else -PetscParMatrix::GetHypreParMatrix() const -#endif -{ - HYPRE_BigInt M = GetGlobalNumRows(); - HYPRE_BigInt N = GetGlobalNumCols(); - std::unique_ptr rows, cols; - if (HYPRE_AssumedPartitionCheck()) - { - PetscInt start, end; - rows = std::make_unique(2); - PalacePetscCall(MatGetOwnershipRange(A, &start, &end)); - rows[0] = start; - rows[1] = end; - if (M != N) - { - cols = std::make_unique(2); - PalacePetscCall(MatGetOwnershipRangeColumn(A, &start, &end)); - cols[0] = start; - cols[1] = end; - } - } - else - { - PetscMPIInt comm_size; - const PetscInt *ranges; - MPI_Comm_size(GetComm(), &comm_size); - rows = std::make_unique(comm_size + 1); - PalacePetscCall(MatGetOwnershipRanges(A, &ranges)); - for (PetscMPIInt i = 0; i < comm_size + 1; i++) - { - rows[i] = ranges[i]; - } - if (M != N) - { - cols = std::make_unique(comm_size + 1); - PalacePetscCall(MatGetOwnershipRangesColumn(A, &ranges)); - for (PetscMPIInt i = 0; i < comm_size + 1; i++) - { - cols[i] = ranges[i]; - } - } - } - - // Count nonzeros. - MatInfo info; - PalacePetscCall(MatGetInfo(A, MAT_LOCAL, &info)); - PetscInt nnz = (PetscInt)info.nz_used; - - // Copy local CSR block of rows (columns in global numbering). - PetscInt rstart, rend, n; - const PetscInt *jj; - const PetscScalar *vals; - PalacePetscCall(MatGetOwnershipRange(A, &rstart, &rend)); - - int m = rend - rstart; - std::unique_ptr II = std::make_unique(m + 1); - std::unique_ptr JJ = std::make_unique(nnz); - std::unique_ptr data = std::make_unique(nnz); - nnz = 0; - - for (PetscInt i = rstart; i < rend; i++) - { - PalacePetscCall(MatGetRow(A, i, &n, &jj, &vals)); - II[i - rstart] = nnz; - for (PetscInt j = 0; j < n; j++) - { -#if defined(PETSC_USE_COMPLEX) - if (struc == ExtractStructure::REAL) - { - data[nnz] = PetscRealPart(vals[j]); - } - else if (struc == ExtractStructure::IMAGINARY) - { - data[nnz] = PetscImaginaryPart(vals[j]); - } - else // struc == ExtractStructure::SUM - { - data[nnz] = PetscRealPart(vals[j]) + PetscImaginaryPart(vals[j]); - } -#else - data[nnz] = vals[j]; -#endif - JJ[nnz++] = jj[j]; - } - PalacePetscCall(MatRestoreRow(A, i, &n, &jj, &vals)); - } - II[m] = nnz; - - // Create the HypreParMatrix (copies all inputs so memory of local variables is released - // after return). - if (M == N) - { - return std::make_unique(GetComm(), m, M, N, II.get(), JJ.get(), - data.get(), rows.get(), rows.get()); - } - else - { - return std::make_unique(GetComm(), m, M, N, II.get(), JJ.get(), - data.get(), rows.get(), cols.get()); - } -} - -PetscErrorCode Convert_Array_IS(MPI_Comm comm, bool islist, const mfem::Array &list, - PetscInt start, IS *is) -{ - // Converts from a list (or a marked Array if islist is false) to an IS. The offset where - // to start numbering is given as start. - PetscInt n = list.Size(), *idxs; - const auto *data = list.HostRead(); - PetscFunctionBeginUser; - - if (islist) - { - PetscCall(PetscMalloc1(n, &idxs)); - for (PetscInt i = 0; i < n; i++) - { - idxs[i] = data[i] + start; - } - } - else - { - PetscInt cum = 0; - for (PetscInt i = 0; i < n; i++) - { - if (data[i]) - { - cum++; - } - } - PetscCall(PetscMalloc1(cum, &idxs)); - cum = 0; - for (PetscInt i = 0; i < n; i++) - { - if (data[i]) - { - idxs[cum++] = i + start; - } - } - n = cum; - } - PetscCall(ISCreateGeneral(comm, n, idxs, PETSC_OWN_POINTER, is)); - PetscFunctionReturn(0); -} - -std::unique_ptr PetscParMatrix::GetSubMatrix(const mfem::Array &rows, - const mfem::Array &cols) -{ - PetscInt rst, cst; - IS row_is, col_is; - Mat B; - PalacePetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); - // Rows need to be in global numbering. - PalacePetscCall(MatGetOwnershipRange(A, &rst, nullptr)); - PalacePetscCall(MatGetOwnershipRange(A, &cst, nullptr)); - PalacePetscCall(Convert_Array_IS(GetComm(), true, rows, rst, &row_is)); - PalacePetscCall(Convert_Array_IS(GetComm(), true, cols, cst, &col_is)); - PalacePetscCall(MatCreateSubMatrix(A, row_is, col_is, MAT_INITIAL_MATRIX, &B)); - PalacePetscCall(ISDestroy(&row_is)); - PalacePetscCall(ISDestroy(&col_is)); - return std::make_unique(B, false); -} - -std::unique_ptr PetscParMatrix::GetSequentialMatrix(bool create) -{ - IS row_is, col_is; - PetscInt nmat = create ? 1 : 0; - Mat *pB = nullptr, B = nullptr; - if (create) - { - PetscInt M = GetGlobalNumRows(), N = GetGlobalNumCols(); - PalacePetscCall(ISCreateStride(PETSC_COMM_SELF, M, 0, 1, &row_is)); - PalacePetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &col_is)); - } - PalacePetscCall(MatCreateSubMatrices(A, nmat, &row_is, &col_is, MAT_INITIAL_MATRIX, &pB)); - if (create) - { - PalacePetscCall(ISDestroy(&row_is)); - PalacePetscCall(ISDestroy(&col_is)); - B = pB[0]; - } - PalacePetscCall(PetscFree(pB)); - return (B) ? std::make_unique(B, false) : nullptr; -} - -MPI_Comm PetscParMatrix::GetComm() const -{ - return A ? PetscObjectComm(reinterpret_cast(A)) : MPI_COMM_NULL; -} - -PetscShellMatrix::PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&B) -{ - // Wrap the MFEM Operator as a PETSc shell, which inherets the underlying matrix storage - // (when the PETSc matrix is destroyed, so is the Hypre one). - MFEM_VERIFY(B, "Cannot construct PETSc shell from an empty matrix!"); - PetscInt m = (PetscInt)B->Height(); - PetscInt n = (PetscInt)B->Width(); - - PetscMatShellCtx *ctx = new PetscMatShellCtx; - ctx->Ar = std::move(B); -#if defined(PETSC_USE_COMPLEX) - ctx->Ai = nullptr; - ctx->x.SetSize(2 * n); - ctx->y.SetSize(2 * m); -#else - ctx->x.SetSize(n); - ctx->y.SetSize(m); -#endif - - PalacePetscCall(MatCreateShell(comm, m, n, PETSC_DECIDE, PETSC_DECIDE, (void *)ctx, &A)); - __mat_shell_init(A); -} - -#if defined(PETSC_USE_COMPLEX) -PetscShellMatrix::PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&Br, - std::unique_ptr &&Bi) -{ - MFEM_VERIFY(Br || Bi, "Cannot construct PETSc shell from an empty matrix!"); - MFEM_VERIFY((!Br || !Bi) || (Br->Height() == Bi->Height() && Br->Width() == Bi->Width()), - "Mismatch in dimension of real and imaginary matrix parts!"); - PetscInt m, n; - if (Br) - { - m = (PetscInt)Br->Height(); - n = (PetscInt)Br->Width(); - } - else - { - m = (PetscInt)Bi->Height(); - n = (PetscInt)Bi->Width(); - } - - PetscMatShellCtx *ctx = new PetscMatShellCtx; - ctx->Ar = std::move(Br); - ctx->Ai = std::move(Bi); - ctx->x.SetSize(2 * n); - ctx->y.SetSize(2 * m); - - PalacePetscCall(MatCreateShell(comm, m, n, PETSC_DECIDE, PETSC_DECIDE, (void *)ctx, &A)); - __mat_shell_init(A); -} -#endif - -PetscMatShellCtx *PetscShellMatrix::GetContext() const -{ - PetscMatShellCtx *ctx; - PalacePetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - return ctx; -} - -PetscInt PetscShellMatrix::NNZ() const -{ - HYPRE_BigInt nnz; - PetscMatShellCtx *ctx = GetContext(); -#if defined(PETSC_USE_COMPLEX) - MFEM_VERIFY(!(ctx->Ar && ctx->Ai), "Use NNZReal/NNZImag methods for complex matrices!"); - nnz = (ctx->Ar) ? dynamic_cast(*ctx->Ar).NNZ() - : ((ctx->Ai) ? dynamic_cast(*ctx->Ai).NNZ() : 0); -#else - nnz = (ctx->Ar) ? dynamic_cast(*ctx->Ar).NNZ() : 0; -#endif - return (PetscInt)nnz; -} - -#if defined(PETSC_USE_COMPLEX) -PetscInt PetscShellMatrix::NNZReal() const -{ - HYPRE_BigInt nnz; - PetscMatShellCtx *ctx = GetContext(); - nnz = (ctx->Ar) ? dynamic_cast(*ctx->Ar).NNZ() : 0; - return (PetscInt)nnz; -} - -PetscInt PetscShellMatrix::NNZImag() const -{ - HYPRE_BigInt nnz; - PetscMatShellCtx *ctx = GetContext(); - nnz = (ctx->Ai) ? dynamic_cast(*ctx->Ai).NNZ() : 0; - return (PetscInt)nnz; -} -#endif - -#if defined(PETSC_USE_COMPLEX) -PetscReal PetscShellMatrix::NormFReal() const -{ - HYPRE_Real norm; - PetscMatShellCtx *ctx = GetContext(); - norm = (ctx->Ar) ? hypre_ParCSRMatrixFnorm(dynamic_cast(*ctx->Ar)) - : 0.0; - return norm; -} - -PetscReal PetscShellMatrix::NormFImag() const -{ - HYPRE_Real norm; - PetscMatShellCtx *ctx = GetContext(); - norm = (ctx->Ai) ? hypre_ParCSRMatrixFnorm(dynamic_cast(*ctx->Ai)) - : 0.0; - return norm; -} - -PetscReal PetscShellMatrix::NormInfReal() const -{ - HYPRE_Real norm; - PetscMatShellCtx *ctx = GetContext(); - if (ctx->Ar) - { - hypre_ParCSRMatrixInfNorm(dynamic_cast(*ctx->Ar), &norm); - } - else - { - norm = 0.0; - } - return norm; -} - -PetscReal PetscShellMatrix::NormInfImag() const -{ - HYPRE_Real norm; - PetscMatShellCtx *ctx = GetContext(); - if (ctx->Ai) - { - hypre_ParCSRMatrixInfNorm(dynamic_cast(*ctx->Ai), &norm); - } - else - { - norm = 0.0; - } - return norm; -} -#endif - -#if defined(PETSC_USE_COMPLEX) -void PetscShellMatrix::Mult(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - __mat_shell_apply(A, x, y); -} - -void PetscShellMatrix::MultTranspose(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - __mat_shell_apply_transpose(A, x, y); -} - -void PetscShellMatrix::MultHermitianTranspose(const mfem::Vector &x, - PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - __mat_shell_apply_hermitian_transpose(A, x, y); -} -#endif - -void PetscShellMatrix::Print(const char *fname, bool binary) const -{ - MFEM_VERIFY( - fname && !binary, - "PetscShellMatrix::Print only works with a specified filename and binary = false!") - PetscMatShellCtx *ctx = GetContext(); -#if defined(PETSC_USE_COMPLEX) - MFEM_VERIFY(!(ctx->Ar && ctx->Ai), - "Use PrintReal/PrintImag methods for complex matrices!"); - if (ctx->Ar) - { - dynamic_cast(*ctx->Ar).Print(fname); - } - else if (ctx->Ai) - { - dynamic_cast(*ctx->Ai).Print(fname); - } -#else - if (ctx->Ar) - { - dynamic_cast(*ctx->Ar).Print(fname); - } -#endif -} - -#if defined(PETSC_USE_COMPLEX) -void PetscShellMatrix::PrintReal(const char *fname) const -{ - PetscMatShellCtx *ctx = GetContext(); - if (ctx->Ar) - { - dynamic_cast(*ctx->Ar).Print(fname); - } -} - -void PetscShellMatrix::PrintImag(const char *fname) const -{ - PetscMatShellCtx *ctx = GetContext(); - if (ctx->Ai) - { - dynamic_cast(*ctx->Ai).Print(fname); - } -} -#endif - -#if defined(PETSC_USE_COMPLEX) -bool PetscShellMatrix::HasReal() const -{ - PetscMatShellCtx *ctx = GetContext(); - return (ctx->Ar != nullptr); -} - -bool PetscShellMatrix::HasImag() const -{ - PetscMatShellCtx *ctx = GetContext(); - return (ctx->Ai != nullptr); -} -#endif - -const mfem::Operator * -#if defined(PETSC_USE_COMPLEX) -PetscShellMatrix::GetOperator(PetscParMatrix::ExtractStructure struc) const -#else -PetscShellMatrix::GetOperator() const -#endif -{ - PetscMatShellCtx *ctx = GetContext(); -#if defined(PETSC_USE_COMPLEX) - if (struc == ExtractStructure::REAL) - { - MFEM_VERIFY(ctx->Ar, "Invalid use of GetOperator, no real matrix component defined!"); - return ctx->Ar.get(); - } - else if (struc == ExtractStructure::IMAGINARY) - { - MFEM_VERIFY(ctx->Ai, - "Invalid use of GetOperator, no imaginary matrix component defined!"); - return ctx->Ai.get(); - } - MFEM_ABORT("ExtractStructure::SUM is not implemented for PetscShellMatrix!"); - return nullptr; -#else - MFEM_VERIFY(ctx->Ar, "Invalid use of GetOperator, no matrix defined!"); - return ctx->Ar.get(); -#endif -} - -PetscAijMatrix::PetscAijMatrix(const mfem::Operator &B) -{ - auto hB = dynamic_cast(&B); - MFEM_VERIFY(hB, "PetscAijMatrix constructor requires Operator of type HypreParMatrix!"); - PalacePetscCall(__mat_convert_hypreParCSR_AIJ(*hB, &A)); -} - -#if defined(PETSC_USE_COMPLEX) -PetscAijMatrix::PetscAijMatrix(const mfem::Operator &Br, const mfem::Operator &Bi) -{ - Mat Ai; - auto hBr = dynamic_cast(&Br); - auto hBi = dynamic_cast(&Bi); - MFEM_VERIFY(hBr && hBi, - "PetscAijMatrix constructor requires Operator of type HypreParMatrix!"); - PalacePetscCall(__mat_convert_hypreParCSR_AIJ(*hBr, &A)); - PalacePetscCall(__mat_convert_hypreParCSR_AIJ(*hBi, &Ai)); - PalacePetscCall(MatAXPY(A, PETSC_i, Ai, UNKNOWN_NONZERO_PATTERN)); - PalacePetscCall(MatDestroy(&Ai)); -} -#endif - -PetscDenseMatrix::PetscDenseMatrix(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, - PetscInt N, PetscScalar *data) -{ - PalacePetscCall(MatCreateDense(comm, m, n, M, N, data, &A)); -} - -PetscDenseMatrix::PetscDenseMatrix(PetscInt m, PetscInt n, PetscScalar *data) -{ - PalacePetscCall(MatCreateSeqDense(PETSC_COMM_SELF, m, n, data, &A)); -} - -void PetscDenseMatrix::Resize(PetscInt m, PetscInt n, bool copy) -{ - Mat B; - PetscScalar *Aj, *Bj; - PetscInt m0 = GetNumRows(), n0 = GetNumCols(); - if (m0 == m && n0 == n) - { - return; - } - PalacePetscCall(MatCreateDense(GetComm(), m, n, PETSC_DECIDE, PETSC_DECIDE, nullptr, &B)); - if (copy) - { - for (PetscInt j = 0; j < std::min(n, n0); j++) - { - PalacePetscCall(MatDenseGetColumn(A, j, &Aj)); - PalacePetscCall(MatDenseGetColumn(B, j, &Bj)); - for (PetscInt i = 0; i < std::min(m, m0); i++) - { - Bj[i] = Aj[i]; - } - PalacePetscCall(MatDenseRestoreColumn(A, &Aj)); - PalacePetscCall(MatDenseRestoreColumn(B, &Bj)); - } - } - PalacePetscCall(MatPropagateSymmetryOptions(A, B)); - PalacePetscCall(MatDestroy(&A)); - A = B; -} - -PetscParVector PetscDenseMatrix::GetColumn(PetscInt j) -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - Vec v; - PalacePetscCall(MatDenseGetColumnVec(A, j, &v)); - return PetscParVector(v, true); -} - -const PetscParVector PetscDenseMatrix::GetColumnRead(PetscInt j) const -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - Vec v; - PalacePetscCall(MatDenseGetColumnVecRead(A, j, &v)); - return PetscParVector(v, true); -} - -void PetscDenseMatrix::RestoreColumn(PetscInt j, PetscParVector &v) -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - Vec u = v; - PalacePetscCall(MatDenseRestoreColumnVec(A, j, &u)); -} - -void PetscDenseMatrix::RestoreColumnRead(PetscInt j, const PetscParVector &v) const -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - Vec u = v; - PalacePetscCall(MatDenseRestoreColumnVecRead(A, j, &u)); -} - -PetscScalar *PetscDenseMatrix::GetArray() -{ - PetscScalar *data; - PalacePetscCall(MatDenseGetArray(A, &data)); - return data; -} - -const PetscScalar *PetscDenseMatrix::GetArrayRead() const -{ - const PetscScalar *data; - PalacePetscCall(MatDenseGetArrayRead(A, &data)); - return data; -} - -void PetscDenseMatrix::RestoreArray(PetscScalar *data) -{ - PalacePetscCall(MatDenseRestoreArray(A, &data)); -} - -void PetscDenseMatrix::RestoreArrayRead(const PetscScalar *data) const -{ - PalacePetscCall(MatDenseRestoreArrayRead(A, &data)); -} - -void PetscDenseMatrix::SetRandom(PetscInt start, PetscInt end) -{ - PetscRandom rand; - MPI_Comm comm = GetComm(); - PalacePetscCall(PetscRandomCreate(comm, &rand)); -#if defined(PETSC_USE_COMPLEX) - PalacePetscCall(PetscRandomSetInterval(rand, -1.0 - PETSC_i, 1.0 + PETSC_i)); -#else - PalacePetscCall(PetscRandomSetInterval(rand, -1.0, 1.0)); -#endif - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetGlobalNumCols(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetGlobalNumCols(), - "Invalid start/end columns for SetRandom!"); - for (PetscInt j = start; j < end; j++) - { - PetscParVector v = GetColumn(j); - PalacePetscCall(VecSetRandom(v, rand)); - RestoreColumn(j, v); - } - PalacePetscCall(PetscRandomDestroy(&rand)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscDenseMatrix::SetRandomReal(PetscInt start, PetscInt end) -{ - PetscRandom rand; - MPI_Comm comm = GetComm(); - PalacePetscCall(PetscRandomCreate(comm, &rand)); - PalacePetscCall(PetscRandomSetInterval(rand, -1.0, 1.0)); - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetGlobalNumCols(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetGlobalNumCols(), - "Invalid start/end columns for SetRandom!"); - for (PetscInt j = start; j < end; j++) - { - PetscParVector v = GetColumn(j); - PalacePetscCall(VecSetRandom(v, rand)); - RestoreColumn(j, v); - } - PalacePetscCall(PetscRandomDestroy(&rand)); -} -#endif - -void PetscDenseMatrix::SetRandomSign(PetscInt start, PetscInt end, bool init) -{ - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetGlobalNumCols(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetGlobalNumCols(), - "Invalid start/end columns for SetRandom!"); - if (!init) - { - SetRandomReal(start, end); - } - for (PetscInt j = start; j < end; j++) - { - PetscParVector v = GetColumn(j); - v.SetRandomSign(true); - RestoreColumn(j, v); - } -} - -PetscReal PetscDenseMatrix::OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2) -{ - - // XX TODO MOVE INTO REAL-VALUED BASIS CLASS... - - auto Dot = [](const PetscParVector &v, const PetscParVector &w) -> PetscScalar - { return v.Dot(w); }; - auto VecDot = [](const PetscParVector &v, const PetscParMatrix &A, - PetscParVector &dot) -> void { A.MultHermitianTranspose(v, dot); }; - auto Normalize = [](PetscParVector &v) -> PetscReal { return v.Normalize(); }; - return OrthonormalizeColumnInternal(j, mgs, cgs2, Dot, VecDot, Normalize); -} - -PetscReal PetscDenseMatrix::OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2, - const PetscParMatrix &B, - PetscParVector &Bv) -{ - MFEM_VERIFY(Bv.GetSize() == B.GetNumRows(), - "Workspace error for B-matrix orthonormalization!"); - auto Dot = [&B, &Bv](const PetscParVector &v, const PetscParVector &w) -> PetscScalar - { - B.Mult(v, Bv); - return Bv.Dot(w); - }; - auto VecDot = [&B, &Bv](const PetscParVector &v, const PetscParMatrix &A, - PetscParVector &dot) -> void - { - B.Mult(v, Bv); - A.MultHermitianTranspose(Bv, dot); - }; - auto Normalize = [&B, &Bv](PetscParVector &v) -> PetscReal { return v.Normalize(B, Bv); }; - return OrthonormalizeColumnInternal(j, mgs, cgs2, Dot, VecDot, Normalize); -} - -PetscReal PetscDenseMatrix::OrthonormalizeColumnInternal( - PetscInt j, bool mgs, bool cgs2, - const std::function &Dot, - const std::function - &VecDot, - const std::function &Normalize) -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - PetscParVector v = GetColumn(j); - if (j > 0) - { - if (mgs) - { - // We can't call GetColumn twice. - PetscScalar *pA = GetArray(); - for (int i = 0; i < j; i++) - { - PetscParVector w(GetComm(), GetNumRows(), PETSC_DECIDE, pA + i * GetNumRows()); - PetscScalar dot = Dot(v, w); - v.AXPY(-dot, w); - } - RestoreArray(pA); - } - else - { - int refine = (cgs2) ? 2 : 1; - PetscScalar *pA = GetArray(); - for (int l = 0; l < refine; l++) - { - PetscDenseMatrix Aj(GetComm(), GetNumRows(), PETSC_DECIDE, PETSC_DECIDE, j, pA); - PetscParVector dot(Aj); - VecDot(v, Aj, dot); - dot.Scale(-1.0); - Aj.MultAdd(dot, v); - } - RestoreArray(pA); - } - } - PetscReal norm = Normalize(v); - MFEM_VERIFY(norm > 0.0, - "Linearly dependent column encountered during vector orthonormalization!"); - RestoreColumn(j, v); - // { - // // Debug - // Mpi::Print(GetComm(), "Orthogonality error (j = {:d}):\n", j); - // for (int ii = 0; ii <= j; ii++) - // { - // PetscParVector vv = GetColumn(ii); - // PetscScalar err = Dot(vv, vv); - // Mpi::Print(GetComm(), " ({:d}, {:d}): {:e}{:+e}i\n", ii, ii, PetscRealPart(err), - // PetscImaginaryPart(err)); - // PetscScalar *pA = GetArray(); - // for (int jj = ii + 1; jj <= j; jj++) - // { - // // We can't call GetColumn twice. - // PetscParVector ww(GetComm(), GetNumRows(), PETSC_DECIDE, pA + jj * GetNumRows()); - // err = Dot(vv, ww); - // Mpi::Print(GetComm(), " ({:d}, {:d}): {:e}{:+e}i\n", ii, jj, PetscRealPart(err), - // PetscImaginaryPart(err)); - // } - // RestoreArray(pA); - // RestoreColumn(ii, vv); - // } - // } - return norm; -} - -void PetscDenseMatrix::MatMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const -{ - MFEM_VERIFY(X.GetNumRows() == GetNumCols() && Y.GetNumRows() == GetNumRows(), - "Incorrect matrix sizes for matrix-matrix product!"); - MFEM_VERIFY(Mpi::Size(GetComm()) == 1, - "PetscDenseMatrix::MatMult is only implemented for sequential " - "matrices!"); - const PetscScalar *pA, *pX; - PetscScalar *pY; - PetscInt lda; - PetscBLASInt m, k, n, ldaA, ldaX, ldaY; - PetscScalar One = 1.0, Zero = 0.0; - PetscBLASIntCast(Y.GetNumRows(), &m); - PetscBLASIntCast(Y.GetNumCols(), &n); - PetscBLASIntCast(GetNumCols(), &k); - - PalacePetscCall(MatDenseGetLDA(A, &lda)); - PetscBLASIntCast(lda, &ldaA); - PalacePetscCall(MatDenseGetLDA(X, &lda)); - PetscBLASIntCast(lda, &ldaX); - PalacePetscCall(MatDenseGetLDA(Y, &lda)); - PetscBLASIntCast(lda, &ldaY); - - PalacePetscCall(MatDenseGetArrayRead(A, &pA)); - PalacePetscCall(MatDenseGetArrayRead(X, &pX)); - PalacePetscCall(MatDenseGetArrayWrite(Y, &pY)); - BLASgemm_("N", "N", &m, &n, &k, &One, pA, &ldaA, pX, &ldaX, &Zero, pY, &ldaY); - PalacePetscCall(MatDenseRestoreArrayRead(A, &pA)); - PalacePetscCall(MatDenseRestoreArrayRead(X, &pX)); - PalacePetscCall(MatDenseRestoreArrayWrite(Y, &pY)); -} - -void PetscDenseMatrix::MatMultTranspose(const PetscDenseMatrix &X, - PetscDenseMatrix &Y) const -{ - MFEM_VERIFY(X.GetNumCols() == GetNumCols() && Y.GetNumRows() == GetNumRows(), - "Incorrect matrix sizes for matrix-matrix product!"); - MFEM_VERIFY(Mpi::Size(GetComm()) == 1, - "PetscDenseMatrix::MatMultTranspose is only implemented for " - "sequential matrices!"); - const PetscScalar *pA, *pX; - PetscScalar *pY; - PetscInt lda; - PetscBLASInt m, k, n, ldaA, ldaX, ldaY; - PetscScalar One = 1.0, Zero = 0.0; - PetscBLASIntCast(Y.GetNumRows(), &m); - PetscBLASIntCast(Y.GetNumCols(), &n); - PetscBLASIntCast(GetNumCols(), &k); - - PalacePetscCall(MatDenseGetLDA(A, &lda)); - PetscBLASIntCast(lda, &ldaA); - PalacePetscCall(MatDenseGetLDA(X, &lda)); - PetscBLASIntCast(lda, &ldaX); - PalacePetscCall(MatDenseGetLDA(Y, &lda)); - PetscBLASIntCast(lda, &ldaY); - - PalacePetscCall(MatDenseGetArrayRead(A, &pA)); - PalacePetscCall(MatDenseGetArrayRead(X, &pX)); - PalacePetscCall(MatDenseGetArrayWrite(Y, &pY)); - BLASgemm_("N", "T", &m, &n, &k, &One, pA, &ldaA, pX, &ldaX, &Zero, pY, &ldaY); - PalacePetscCall(MatDenseRestoreArrayRead(A, &pA)); - PalacePetscCall(MatDenseRestoreArrayRead(X, &pX)); - PalacePetscCall(MatDenseRestoreArrayWrite(Y, &pY)); -} - -void PetscDenseMatrix::MatTransposeMult(const PetscDenseMatrix &X, - PetscDenseMatrix &Y) const -{ - MFEM_VERIFY(X.GetNumRows() == GetNumRows() && Y.GetNumRows() == GetNumCols(), - "Incorrect matrix sizes for matrix-matrix product!"); - MFEM_VERIFY(Mpi::Size(GetComm()) == 1, - "PetscDenseMatrix::MatTransposeMult is only implemented for " - "sequential matrices!"); - const PetscScalar *pA, *pX; - PetscScalar *pY; - PetscInt lda; - PetscBLASInt m, k, n, ldaA, ldaX, ldaY; - PetscScalar One = 1.0, Zero = 0.0; - PetscBLASIntCast(Y.GetNumRows(), &m); - PetscBLASIntCast(Y.GetNumCols(), &n); - PetscBLASIntCast(GetNumRows(), &k); - - PalacePetscCall(MatDenseGetLDA(A, &lda)); - PetscBLASIntCast(lda, &ldaA); - PalacePetscCall(MatDenseGetLDA(X, &lda)); - PetscBLASIntCast(lda, &ldaX); - PalacePetscCall(MatDenseGetLDA(Y, &lda)); - PetscBLASIntCast(lda, &ldaY); - - PalacePetscCall(MatDenseGetArrayRead(A, &pA)); - PalacePetscCall(MatDenseGetArrayRead(X, &pX)); - PalacePetscCall(MatDenseGetArrayWrite(Y, &pY)); - BLASgemm_("T", "N", &m, &n, &k, &One, pA, &ldaA, pX, &ldaX, &Zero, pY, &ldaY); - PalacePetscCall(MatDenseRestoreArrayRead(A, &pA)); - PalacePetscCall(MatDenseRestoreArrayRead(X, &pX)); - PalacePetscCall(MatDenseRestoreArrayWrite(Y, &pY)); -} - -} // namespace palace::petsc - -PetscErrorCode __mat_shell_init(Mat A) -{ - PetscFunctionBeginUser; - - PalacePetscCall(MatShellSetManageScalingShifts(A)); - PalacePetscCall(MatShellSetOperation(A, MATOP_DESTROY, (void (*)())__mat_shell_destroy)); - PetscCall(MatShellSetOperation( - A, MATOP_MULT, - (void (*)()) static_cast(&__mat_shell_apply))); - PetscCall( - MatShellSetOperation(A, MATOP_MULT_TRANSPOSE, - (void (*)()) static_cast( - &__mat_shell_apply_transpose))); - PetscCall( - MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANSPOSE, - (void (*)()) static_cast( - &__mat_shell_apply_hermitian_transpose))); - PetscCall(MatShellSetOperation( - A, MATOP_MULT_ADD, - (void (*)()) static_cast(&__mat_shell_apply_add))); - PetscCall( - MatShellSetOperation(A, MATOP_MULT_TRANSPOSE_ADD, - (void (*)()) static_cast( - &__mat_shell_apply_transpose_add))); - PetscCall( - MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANS_ADD, - (void (*)()) static_cast( - &__mat_shell_apply_hermitian_transpose_add))); - PetscCall( - MatShellSetOperation(A, MATOP_GET_DIAGONAL, (void (*)())__mat_shell_get_diagonal)); - // PetscCall(MatShellSetOperation(A, MATOP_SHIFT, (void (*)())__mat_shell_shift)); - // PetscCall(MatShellSetOperation(A, MATOP_SCALE, (void (*)())__mat_shell_scale)); - // PetscCall(MatShellSetOperation(A, MATOP_CONJUGATE, (void (*)())__mat_shell_conj)); - // PetscCall(MatShellSetOperation(A, MATOP_AXPY, (void (*)())__mat_shell_axpy)); - // PetscCall(MatShellSetOperation(A, MATOP_NORM, (void (*)())__mat_shell_norm)); - PetscCall(MatShellSetOperation(A, MATOP_REAL_PART, (void (*)())__mat_shell_real_part)); - PetscCall( - MatShellSetOperation(A, MATOP_IMAGINARY_PART, (void (*)())__mat_shell_imag_part)); - PetscCall(MatSetUp(A)); - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_destroy(Mat A) -{ - palace::petsc::PetscMatShellCtx *ctx; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - delete ctx; - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_add(Mat A, Vec x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -#if defined(PETSC_USE_COMPLEX) - { - mfem::Vector xr, xi, yr, yi; - xr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - xi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - yr.MakeRef(ctx->y, 0, ctx->y.Size() / 2); - yi.MakeRef(ctx->y, ctx->y.Size() / 2, ctx->y.Size() / 2); - xx.GetToVectors(xr, xi); - if (ctx->Ar) - { - ctx->Ar->Mult(xr, yr); - ctx->Ar->Mult(xi, yi); - } - else - { - yr = 0.0; - yi = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->AddMult(xi, yr, -1.0); - ctx->Ai->AddMult(xr, yi, 1.0); - } - yy.AddFromVectors(yr, yi); - } -#else - { - xx.GetToVector(ctx->x); - if (ctx->Ar) - { - ctx->Ar->Mult(ctx->x, ctx->y); - } - else - { - ctx->y = 0.0; - } - yy.AddFromVector(ctx->y); - } -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply(Mat A, Vec x, Vec y) -{ - PetscCall(VecZeroEntries(y)); - return __mat_shell_apply_add(A, x, y); -} - -PetscErrorCode __mat_shell_apply_transpose_add(Mat A, Vec x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscBool flg, sym; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - PetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - PetscCall(__mat_shell_apply_add(A, x, y)); - PetscFunctionReturn(0); - } -#if defined(PETSC_USE_COMPLEX) - { - mfem::Vector xr, xi, yr, yi; - xr.MakeRef(ctx->y, 0, ctx->y.Size() / 2); - xi.MakeRef(ctx->y, ctx->y.Size() / 2, ctx->y.Size() / 2); - yr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - yi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - xx.GetToVectors(xr, xi); - if (ctx->Ar) - { - ctx->Ar->MultTranspose(xr, yr); - ctx->Ar->MultTranspose(xi, yi); - } - else - { - yr = 0.0; - yi = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->AddMultTranspose(xi, yr, -1.0); - ctx->Ai->AddMultTranspose(xr, yi, 1.0); - } - yy.AddFromVectors(yr, yi); - } -#else - { - xx.GetToVector(ctx->y); - if (ctx->Ar) - { - ctx->Ar->MultTranspose(ctx->y, ctx->x); - } - else - { - ctx->x = 0.0; - } - yy.AddFromVector(ctx->x); - } -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_transpose(Mat A, Vec x, Vec y) -{ - PetscCall(VecZeroEntries(y)); - return __mat_shell_apply_transpose_add(A, x, y); -} - -PetscErrorCode __mat_shell_apply_hermitian_transpose_add(Mat A, Vec x, Vec y) -{ -#if defined(PETSC_USE_COMPLEX) - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscBool flg, sym; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - PetscCall(MatIsHermitianKnown(A, &flg, &sym)); - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - PetscCall(__mat_shell_apply_add(A, x, y)); - PetscFunctionReturn(0); - } - if (!ctx->Ai) - { - PetscCall(__mat_shell_apply_transpose_add(A, x, y)); - PetscFunctionReturn(0); - } - PetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - { - mfem::Vector xr, xi, yr, yi; - xr.MakeRef(ctx->y, 0, ctx->y.Size() / 2); - xi.MakeRef(ctx->y, ctx->y.Size() / 2, ctx->y.Size() / 2); - yr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - yi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - xx.GetToVectors(xr, xi); - if (ctx->Ar) - { - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - ctx->Ar->Mult(xr, yr); - ctx->Ar->Mult(xi, yi); - } - else - { - ctx->Ar->MultTranspose(xr, yr); - ctx->Ar->MultTranspose(xi, yi); - } - } - else - { - yr = 0.0; - yi = 0.0; - } - if (ctx->Ai) - { - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - ctx->Ai->AddMult(xi, yr, 1.0); - ctx->Ai->AddMult(xr, yi, -1.0); - } - else - { - ctx->Ai->AddMultTranspose(xi, yr, 1.0); - ctx->Ai->AddMultTranspose(xr, yi, -1.0); - } - } - yy.AddFromVectors(yr, yi); - } -#else - PetscCall(__mat_shell_apply_transpose_add(A, x, y)); -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_hermitian_transpose(Mat A, Vec x, Vec y) -{ - PetscCall(VecZeroEntries(y)); - return __mat_shell_apply_hermitian_transpose_add(A, x, y); -} - -#if defined(PETSC_USE_COMPLEX) -PetscErrorCode __mat_shell_apply(Mat A, const mfem::Vector &x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - { - mfem::Vector yr, yi; - yr.MakeRef(ctx->y, 0, ctx->y.Size() / 2); - yi.MakeRef(ctx->y, ctx->y.Size() / 2, ctx->y.Size() / 2); - if (ctx->Ar) - { - ctx->Ar->Mult(x, yr); - } - else - { - yr = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->Mult(x, yi); - } - else - { - yi = 0.0; - } - yy.SetFromVectors(yr, yi); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_transpose(Mat A, const mfem::Vector &x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector yy(y, true); - PetscBool flg, sym; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - PetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - PetscCall(__mat_shell_apply(A, x, y)); - PetscFunctionReturn(0); - } - { - mfem::Vector yr, yi; - yr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - yi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - if (ctx->Ar) - { - ctx->Ar->MultTranspose(x, yr); - } - else - { - yr = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->MultTranspose(x, yi); - } - else - { - yi = 0.0; - } - yy.SetFromVectors(yr, yi); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_hermitian_transpose(Mat A, const mfem::Vector &x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector yy(y, true); - PetscBool flg, sym; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - PetscCall(MatIsHermitianKnown(A, &flg, &sym)); - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - PetscCall(__mat_shell_apply(A, x, y)); - PetscFunctionReturn(0); - } - if (!ctx->Ai) - { - PetscCall(__mat_shell_apply_transpose(A, x, y)); - PetscFunctionReturn(0); - } - { - mfem::Vector yr, yi; - yr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - yi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - PetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - if (ctx->Ar) - { - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - ctx->Ar->Mult(x, yr); - } - else - { - ctx->Ar->MultTranspose(x, yr); - } - } - else - { - yr = 0.0; - } - if (ctx->Ai) - { - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - ctx->Ai->Mult(x, yi); - } - else - { - ctx->Ai->MultTranspose(x, yi); - } - yi.Neg(); - } - else - { - yi = 0.0; - } - yy.SetFromVectors(yr, yi); - } - PetscFunctionReturn(0); -} -#endif - -PetscErrorCode __mat_shell_get_diagonal(Mat A, Vec diag) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector ddiag(diag, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -#if defined(PETSC_USE_COMPLEX) - { - mfem::Vector xr, xi; - xr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - xi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - if (ctx->Ar) - { - ctx->Ar->AssembleDiagonal(xr); - } - else - { - xr = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->AssembleDiagonal(xi); - } - else - { - xi = 0.0; - } - ddiag.SetFromVectors(xr, xi); - } -#else - { - if (ctx->Ar) - { - ctx->Ar->AssembleDiagonal(ctx->x); - } - else - { - ctx->x = 0.0; - } - ddiag.SetFromVector(ctx->x); - } -#endif - PetscFunctionReturn(0); -} - -// PetscErrorCode __mat_shell_shift(Mat Y, PetscScalar a) -// { -// palace::petsc::PetscMatShellCtx *ctx; -// HYPRE_Real as; -// PetscFunctionBeginUser; - -// PetscCall(MatShellGetContext(Y, (void **)&ctx)); -// MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -// as = PetscRealPart(a); // Does nothing if not PETSC_USE_COMPLEX -// if (std::abs(as) > 0.0) -// { -// auto hAr = dynamic_cast(ctx->Ar.get()); -// MFEM_VERIFY(hAr, "Invalid real shift with no real matrix part!"); -// int n = hAr->Height(); -// const hypre_ParCSRMatrix *A = *hAr; -// const HYPRE_Int *A_diag_i = A->diag->i; -// HYPRE_Real *A_diag_d = A->diag->data; -// for (int j = 0; j < n; j++) -// { -// A_diag_d[A_diag_i[j]] += as; -// } -// } -// #if defined(PETSC_USE_COMPLEX) -// as = PetscImaginaryPart(a); -// if (std::abs(as) > 0.0) -// { -// auto hAi = dynamic_cast(ctx->Ai.get()); -// MFEM_VERIFY(hAi, "Invalid imaginary shift with no imaginary matrix part!"); -// int n = hAi->Height(); -// const hypre_ParCSRMatrix *A = *hAi; -// const HYPRE_Int *A_diag_i = A->diag->i; -// HYPRE_Real *A_diag_d = A->diag->data; -// for (int j = 0; j < n; j++) -// { -// A_diag_d[A_diag_i[j]] += as; -// } -// } -// #endif -// PetscFunctionReturn(0); -// } - -// PetscErrorCode __mat_shell_scale(Mat Y, PetscScalar a) -// { -// palace::petsc::PetscMatShellCtx *ctx; -// PetscFunctionBeginUser; - -// PetscCall(MatShellGetContext(Y, (void **)&ctx)); -// MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -// if (a == 0.0) -// { -// ctx->Ar.reset(); -// #if defined(PETSC_USE_COMPLEX) -// ctx->Ai.reset(); -// #endif -// } -// else -// { -// #if defined(PETSC_USE_COMPLEX) -// HYPRE_Real ar, ai; -// ar = PetscRealPart(a); -// ai = PetscImaginaryPart(a); -// if (std::abs(ar) > 0.0 && std::abs(ai) == 0.0) -// { -// if (ctx->Ar) -// { -// *ctx->Ar *= ar; -// } -// if (ctx->Ai) -// { -// *ctx->Ai *= ar; -// } -// } -// else if (std::abs(ai) > 0.0 && std::abs(ar) == 0.0) -// { -// ctx->Ar.swap(ctx->Ai); -// if (ctx->Ar) -// { -// *ctx->Ar *= -ai; -// } -// if (ctx->Ai) -// { -// *ctx->Ai *= ai; -// } -// } -// else -// { -// // General complex coefficient case. -// mfem::HypreParMatrix *aYr, *aYi; -// if (ctx->Ar && ctx->Ai) -// { -// aYr = mfem::Add(ar, *ctx->Ar, -ai, *ctx->Ai); -// aYi = mfem::Add(ai, *ctx->Ar, ar, *ctx->Ai); -// ctx->Ar.reset(aYr); -// ctx->Ai.reset(aYi); -// } -// else if (!ctx->Ar) -// { -// ctx->Ar = std::make_unique(*ctx->Ai); -// *ctx->Ar *= -ai; -// *ctx->Ai *= ar; -// } -// else // !ctx->Ai -// { -// ctx->Ai = std::make_unique(*ctx->Ar); -// *ctx->Ar *= ar; -// *ctx->Ai *= ai; -// } -// } -// #else -// if (ctx->Ar) -// { -// *ctx->Ar *= a; -// } -// #endif -// } -// PetscFunctionReturn(0); -// } - -// PetscErrorCode __mat_shell_conj(Mat Y) -// { -// palace::petsc::PetscMatShellCtx *ctx; -// PetscFunctionBeginUser; - -// PetscCall(MatShellGetContext(Y, (void **)&ctx)); -// MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -// #if defined(PETSC_USE_COMPLEX) -// if (ctx->Ai) -// { -// *ctx->Ai *= -1.0; -// } -// #endif -// PetscFunctionReturn(0); -// } - -// PetscErrorCode __mat_shell_axpy(Mat Y, PetscScalar a, Mat X, MatStructure str) -// { -// palace::petsc::PetscMatShellCtx *ctxY, *ctxX; -// #if defined(PETSC_USE_COMPLEX) -// HYPRE_Real ar, ai; -// #endif -// PetscFunctionBeginUser; - -// auto Add = [&str](std::unique_ptr &Y, HYPRE_Real a, -// const std::unique_ptr &X) -// { -// if (Y) -// { -// if (str == SAME_NONZERO_PATTERN) -// { -// Y->Add(a, *X); -// } -// else -// { -// Y.reset(mfem::Add(1.0, *Y, a, *X)); -// } -// } -// else -// { -// Y = std::unique_ptr(*X); -// *Y *= a; -// } -// }; -// PetscCall(MatShellGetContext(Y, (void **)&ctxY)); -// PetscCall(MatShellGetContext(X, (void **)&ctxX)); -// MFEM_VERIFY(ctxY && ctxX, "Invalid PETSc shell matrix contexts!"); -// #if defined(PETSC_USE_COMPLEX) -// ar = PetscRealPart(a); -// ai = PetscImaginaryPart(a); -// if (std::abs(ar) > 0.0) -// { -// if (ctxX->Ar) -// { -// Add(ctxY->Ar, ar, ctxX->Ar); -// } -// if (ctxX->Ai) -// { -// Add(ctxY->Ai, ar, ctxX->Ai); -// } -// } -// else if (std::abs(ai) > 0.0) -// { -// if (ctxX->Ai) -// { -// Add(ctxY->Ar, -ai, ctxX->Ai); -// } -// if (ctxX->Ar) -// { -// Add(ctxY->Ai, ai, ctxX->Ar); -// } -// } -// #else -// if (std::abs(a) > 0.0 && ctxX->Ar) -// { -// Add(ctxY->Ar, a, ctxX->Ar); -// } -// #endif -// PetscFunctionReturn(0); -// } - -// PetscErrorCode __mat_shell_norm(Mat A, NormType type, PetscReal *norm) -// { -// palace::petsc::PetscMatShellCtx *ctx; -// PetscFunctionBeginUser; - -// PetscCall(MatShellGetContext(A, (void **)&ctx)); -// MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -// switch (type) -// { -// case NORM_FROBENIUS: -// #if defined(PETSC_USE_COMPLEX) -// *norm = std::hypot((ctx->Ar) ? hypre_ParCSRMatrixFnorm(*ctx->Ar) : 0.0, -// (ctx->Ai) ? hypre_ParCSRMatrixFnorm(*ctx->Ai) : 0.0); -// #else -// *norm = (ctx->Ar) ? hypre_ParCSRMatrixFnorm(*ctx->Ar) : 0.0; -// #endif -// break; -// case NORM_INFINITY: // Max absolute row sum -// #if defined(PETSC_USE_COMPLEX) -// if (!ctx->Ar && !ctx->Ai) -// { -// *norm = 0.0; -// } -// else if (ctx->Ar && !ctx->Ai) -// { -// hypre_ParCSRMatrixInfNorm(*ctx->Ar, norm); -// } -// else if (ctx->Ai && !ctx->Ar) -// { -// hypre_ParCSRMatrixInfNorm(*ctx->Ai, norm); -// } -// else -// { -// // Need to consider both real and imaginary parts of the matrix. -// hypre::hypreParCSRInfNorm(*ctx->Ar, *ctx->Ai, norm); -// } -// #else -// if (ctx->Ar) -// { -// hypre_ParCSRMatrixInfNorm(*ctx->Ar, norm); -// } -// else -// { -// *norm = 0.0; -// } -// #endif -// break; -// case NORM_1: // Max absolute column sum (not supported yet) -// MFEM_ABORT("Unsupported matrix norm type!"); -// } -// PetscFunctionReturn(0); -// } - -PetscErrorCode __mat_shell_real_part(Mat Y) -{ - palace::petsc::PetscMatShellCtx *ctx; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(Y, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -#if defined(PETSC_USE_COMPLEX) - ctx->Ai.reset(); -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_imag_part(Mat Y) -{ - palace::petsc::PetscMatShellCtx *ctx; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(Y, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -#if defined(PETSC_USE_COMPLEX) - ctx->Ar = std::move(ctx->Ai); -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_convert_hypreParCSR_AIJ(hypre_ParCSRMatrix *hA, Mat *pA) -{ - // Same as PETSc's MatConvert_HYPRE_AIJ function with mtype == MATAIJ, reuse == - // MAT_INITIAL_MATRIX, and sameint = true. Slightly modified to allow for using real - // Hypre matrices (double type) to construct a PETSc matrix with general complex entires - // (if PETSC_USE_COMPLEX is enabled). See also MFEM's MatConvert_hypreParCSR_AIJ which is - // a copy of the PETSc version. - hypre_CSRMatrix *hdiag, *hoffd; - MPI_Comm comm; - HYPRE_Int dnnz, onnz, m, n; - PetscScalar *da, *oa, *aptr; - PetscInt *dii, *djj, *oii, *ojj, *iptr; - PetscInt i; - PetscMPIInt size; - PetscBool sameint = (PetscBool)(sizeof(PetscInt) == sizeof(HYPRE_Int)); - PetscFunctionBeginUser; - - comm = hypre_ParCSRMatrixComm(hA); - MPI_Comm_size(comm, &size); - hdiag = hypre_ParCSRMatrixDiag(hA); - hoffd = hypre_ParCSRMatrixOffd(hA); - m = hypre_CSRMatrixNumRows(hdiag); - n = hypre_CSRMatrixNumCols(hdiag); - dnnz = hypre_CSRMatrixNumNonzeros(hdiag); - onnz = hypre_CSRMatrixNumNonzeros(hoffd); - PetscCall(PetscMalloc1(m + 1, &dii)); - PetscCall(PetscMalloc1(dnnz, &djj)); - PetscCall(PetscMalloc1(dnnz, &da)); - // MFEM_VERIFY(sizeof(HYPRE_Int) == sizeof(PetscInt), - // "Index size mismatch inf Hypre-PETSc MatConvert!"); - if (sameint) - { - PetscCall(PetscArraycpy(dii, hypre_CSRMatrixI(hdiag), m + 1)); - PetscCall(PetscArraycpy(djj, hypre_CSRMatrixJ(hdiag), dnnz)); - } - else - { - for (i = 0; i < m + 1; i++) - { - dii[i] = (PetscInt)(hypre_CSRMatrixI(hdiag)[i]); - } - for (i = 0; i < dnnz; i++) - { - djj[i] = (PetscInt)(hypre_CSRMatrixJ(hdiag)[i]); - } - } - // This loop replaces the call to PetscArraycpy to convert HYPRE_Complex to PetscScalar - // values. - for (i = 0; i < dnnz; i++) - { - da[i] = (PetscScalar)(hypre_CSRMatrixData(hdiag)[i]); - } - iptr = djj; - aptr = da; - for (i = 0; i < m; i++) - { - PetscInt nc = dii[i + 1] - dii[i]; - PetscCall(PetscSortIntWithScalarArray(nc, iptr, aptr)); - iptr += nc; - aptr += nc; - } - if (size > 1) - { - HYPRE_BigInt *coffd; - PetscCall(PetscMalloc1(m + 1, &oii)); - PetscCall(PetscMalloc1(onnz, &ojj)); - PetscCall(PetscMalloc1(onnz, &oa)); - if (sameint) - { - PetscCall(PetscArraycpy(oii, hypre_CSRMatrixI(hoffd), m + 1)); - } - else - { - for (i = 0; i < m + 1; i++) - { - oii[i] = (PetscInt)(hypre_CSRMatrixI(hoffd)[i]); - } - } - coffd = hypre_ParCSRMatrixColMapOffd(hA); - for (i = 0; i < onnz; i++) - { - ojj[i] = (PetscInt)coffd[hypre_CSRMatrixJ(hoffd)[i]]; - } - for (i = 0; i < onnz; i++) - { - oa[i] = (PetscScalar)(hypre_CSRMatrixData(hoffd)[i]); - } - iptr = ojj; - aptr = oa; - for (i = 0; i < m; i++) - { - PetscInt nc = oii[i + 1] - oii[i]; - PetscCall(PetscSortIntWithScalarArray(nc, iptr, aptr)); - iptr += nc; - aptr += nc; - } - PetscCall(MatCreateMPIAIJWithSplitArrays(comm, m, n, PETSC_DECIDE, PETSC_DECIDE, dii, - djj, da, oii, ojj, oa, pA)); - } - else - { - oii = ojj = nullptr; - oa = nullptr; - PetscCall(MatCreateSeqAIJWithArrays(comm, m, n, dii, djj, da, pA)); - } - // We are responsible to free the CSR arrays. However, since we can take references of a - // PetscParMatrix but we cannot take reference of PETSc arrays, we need to create a - // PetscContainer object to take reference of these arrays in reference objects. - void *ptrs[6] = {dii, djj, da, oii, ojj, oa}; - const char *names[6] = {"_csr_dii", "_csr_djj", "_csr_da", - "_csr_oii", "_csr_ojj", "_csr_oa"}; - for (i = 0; i < 6; i++) - { - PetscContainer c; - PetscCall(PetscContainerCreate(comm, &c)); - PetscCall(PetscContainerSetPointer(c, ptrs[i])); - PetscCall(PetscContainerSetUserDestroy(c, __array_container_destroy)); - PetscCall(PetscObjectCompose(reinterpret_cast(*pA), names[i], - reinterpret_cast(c))); - PetscCall(PetscContainerDestroy(&c)); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __array_container_destroy(void *ptr) -{ - PetscFunctionBeginUser; - - PetscCall(PetscFree(ptr)); - PetscFunctionReturn(0); -} diff --git a/palace/linalg/petsc.hpp b/palace/linalg/petsc.hpp index 82e2052f3..b62f5e3fb 100644 --- a/palace/linalg/petsc.hpp +++ b/palace/linalg/petsc.hpp @@ -31,590 +31,6 @@ typedef struct _p_Mat *Mat; // PETSC_COMM_WORLD communicator. #define PalacePetscCall(...) PetscCallAbort(PETSC_COMM_WORLD, __VA_ARGS__) -// namespace palace::petsc -// { - -// // -// // A minimal implementation of MFEM's PETSc wrappers to support PETSc built with complex -// // numbers. -// // - -// class PetscParMatrix; -// class PetscParVector; - -// // Wrappers for PetscInitialize/PetscFinalize. -// void Initialize(int &argc, char **&argv, const char rc_file[], const char help[]); -// void Finalize(); - -// // Wrapper for PETSc's vector scatter class. -// class PetscScatter -// { -// public: -// enum class Type -// { -// TO_ZERO, -// TO_ALL -// }; - -// private: -// // The actual PETSc object. -// VecScatter ctx; - -// public: -// // Creates a scatter context that copies all entries from the parallel vector to either -// // all processes or to the root process. Allocates the -// PetscScatter(Type type, const PetscParVector &x, std::unique_ptr &y); - -// // Calls PETSc's destroy function. -// ~PetscScatter(); - -// // Routines for forward/reverse scattering. -// void Forward(const PetscParVector &x, PetscParVector &y); -// void Reverse(const PetscParVector &x, PetscParVector &y); -// }; - -// // Wrapper for PETSc's vector class. -// class PetscParVector -// { -// private: -// // The actual PETSc object. -// Vec x; - -// public: -// // Creates vector compatible with (i.e. in the domain of) A or Aᵀ. -// PetscParVector(const PetscParMatrix &A, bool transpose = false); - -// // Parallel and serial copy constructors from MFEM's Vector object. -// PetscParVector(MPI_Comm comm, const mfem::Vector &y); -// PetscParVector(const mfem::Vector &y); -// #if defined(PETSC_USE_COMPLEX) -// PetscParVector(MPI_Comm comm, const mfem::Vector &yr, const mfem::Vector &yi); -// PetscParVector(const mfem::Vector &yr, const mfem::Vector &yi); -// #endif - -// // Create a parallel or sequential PETSc vector with the provided dimension. -// PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N); -// // PetscParVector(PetscInt n); - -// // Create a parallel or sequential PETSc vector with a data array. -// PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N, PetscScalar *data); -// PetscParVector(PetscInt n, PetscScalar *data); - -// // Copy constructor, calls VecDuplicate. -// PetscParVector(const PetscParVector &y); - -// // Constructor which wraps an existing PETSc Vec object and takes over ownership unless -// // ref is true. -// PetscParVector(Vec y, bool ref); - -// // Calls PETSc's destroy function. -// virtual ~PetscParVector(); - -// // Copy to/from MFEM's Vector type. -// void GetToVector(mfem::Vector &v, PetscInt start = -1, PetscInt end = -1) const; -// void SetFromVector(const mfem::Vector &v); -// void AddFromVector(const mfem::Vector &v); -// #if defined(PETSC_USE_COMPLEX) -// void GetToVectors(mfem::Vector &vr, mfem::Vector &vi, PetscInt start = -1, -// PetscInt end = -1) const; -// void SetFromVectors(const mfem::Vector &vr, const mfem::Vector &vi); -// void AddFromVectors(const mfem::Vector &vr, const mfem::Vector &vi); -// #endif - -// // Access the data array of the vector. -// PetscScalar *GetArray(); -// const PetscScalar *GetArrayRead() const; -// void RestoreArray(PetscScalar *data); -// void RestoreArrayRead(const PetscScalar *data) const; - -// // Temporarily replace the data array of the vector. -// void PlaceArray(const PetscScalar *data); -// void ResetArray(); - -// // Copy entries of y to x. -// void Copy(const PetscParVector &y); - -// // Returns the local vector size. -// PetscInt GetSize() const; - -// // Returns the global vector size. -// PetscInt GetGlobalSize() const; - -// // Set the (local) vector dimension to n, copying previous contents to the upper block. -// void Resize(PetscInt n, bool copy = false); - -// // Zero all entries of the vector. -// void SetZero(); - -// // Sets all entries of the vector to random numbers sampled from the range [-1-i, 1+i], -// or -// // [-1, 1]. -// void SetRandom(); -// #if defined(PETSC_USE_COMPLEX) -// void SetRandomReal(); -// #else -// void SetRandomReal() { SetRandom(); } -// #endif -// void SetRandomSign(bool init = false); - -// // Set all entries to s. -// PetscParVector &operator=(PetscScalar s); - -// // Scale all entries by s. -// void Scale(PetscScalar s); - -// // Shift all entries by +s. -// void Shift(PetscScalar s); - -// // Compute pointwise |x|. -// void Abs(); - -// // Compute pointwise sqrt(|x|). -// void SqrtAbs(); - -// // Compute pointwise 1/x. -// void Inv(); - -// // Compute pointwise 1/sqrt(x). -// void InvSqrt(); - -// #if defined(PETSC_USE_COMPLEX) -// // Replace entries with complex conjugate. -// void Conj(); - -// // Zero the imaginary part of the vector. -// void GetRealPart(); - -// // Move the imaginary part to the real part of the vector. -// void GetImagPart(); -// #endif - -// // Normalize the vector. -// PetscReal Normalize(); -// PetscReal Normalize(const PetscParMatrix &B, PetscParVector &Bv); - -// // Calculate the vector 2-norm. -// PetscReal Norml2() const; - -// // Calculate the vector infinity-norm. -// PetscReal Normlinf() const; - -// // Zero specified (local) rows of the vector. -// void ZeroRows(const mfem::Array &rows); - -// // Pointwise multiplication x *= y. -// void PointwiseMult(const PetscParVector &y, bool replace_zeros); - -// // In-place addition x += alpha * y. -// void AXPY(PetscScalar alpha, const PetscParVector &y); - -// // In-place addition x = alpha * y + beta * x. -// void AXPBY(PetscScalar alpha, const PetscParVector &y, PetscScalar beta); - -// // In-place addition x = alpha * y + beta * z + gamma * x. -// void AXPBYPCZ(PetscScalar alpha, const PetscParVector &y, PetscScalar beta, -// const PetscParVector &z, PetscScalar gamma); - -// // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. -// PetscScalar Dot(const PetscParVector &y) const; -// PetscScalar TransposeDot(const PetscParVector &y) const; - -// // Prints the vector (to stdout if fname is nullptr). -// void Print(const char *fname = nullptr, bool binary = false) const; - -// // Get the associated MPI communicator. -// MPI_Comm GetComm() const; - -// // Typecasting to PETSc's Vec type. -// operator Vec() const { return x; } - -// // Typecasting to PETSc object. -// operator PetscObject() const { return reinterpret_cast(x); } -// }; - -// // Base wrapper for PETSc's matrix class. -// class PetscParMatrix -// { -// public: -// enum class NNZStructure -// { -// DIFFERENT, -// SAME, -// SUBSET -// }; - -// #if defined(PETSC_USE_COMPLEX) -// enum class ExtractStructure -// { -// REAL, -// IMAGINARY, -// SUM -// }; -// #endif - -// protected: -// // The actual PETSc object. -// Mat A; - -// // Default constructor for derived classes. -// PetscParMatrix() : A(nullptr) {} - -// public: -// // Copy constructor, calls MatDuplicate. -// PetscParMatrix(const PetscParMatrix &B); - -// // Constructor which wraps an existing PETSc Mat object and takes over ownership unless -// // ref is true. -// PetscParMatrix(Mat B, bool ref); - -// // Calls PETSc's destroy function. -// virtual ~PetscParMatrix(); - -// // Get/set symmetric or Hermitian flags for the matrix. When setting the flags, it is -// // assumed the structure does not change for the lifetime of the matrix(unless -// explicitly -// // set again). -// void SetSymmetric(bool sym = true); -// void SetHermitian(bool herm = true); -// bool GetSymmetric() const; -// bool GetHermitian() const; -// #if defined(PETSC_USE_COMPLEX) -// void SetRealSymmetric(); -// #endif -// void CopySymmetry(const PetscParMatrix &B); - -// // Returns the local number of rows. -// PetscInt GetNumRows() const; -// PetscInt Height() const { return GetNumRows(); } - -// // Returns the local number of columns. -// PetscInt GetNumCols() const; -// PetscInt Width() const { return GetNumCols(); } - -// // Returns the global number of rows. -// PetscInt GetGlobalNumRows() const; - -// // Returns the global number of columns. -// PetscInt GetGlobalNumCols() const; - -// // Returns the number of nonzeros. -// virtual PetscInt NNZ() const; -// #if defined(PETSC_USE_COMPLEX) -// virtual PetscInt NNZReal() const -// { -// MFEM_ABORT("NNZReal is not supported for base class PetscParMatrix!"); -// return 0; -// } -// virtual PetscInt NNZImag() const -// { -// MFEM_ABORT("NNZImag is not supported for base class PetscParMatrix!"); -// return 0; -// } -// #endif - -// // Calculate matrix Frobenius and infinity norms. -// PetscReal NormF() const; -// PetscReal NormInf() const; -// #if defined(PETSC_USE_COMPLEX) -// virtual PetscReal NormFReal() const -// { -// MFEM_ABORT("NormFReal is not supported for base class PetscParMatrix!"); -// return 0.0; -// } -// virtual PetscReal NormFImag() const -// { -// MFEM_ABORT("NormFImag is not supported for base class PetscParMatrix!"); -// return 0.0; -// } -// virtual PetscReal NormInfReal() const -// { -// MFEM_ABORT("NormInfReal is not supported for base class PetscParMatrix!"); -// return 0.0; -// } -// virtual PetscReal NormInfImag() const -// { -// MFEM_ABORT("NormInfImag is not supported for base class PetscParMatrix!"); -// return 0.0; -// } -// #endif - -// // Estimate matrix 2-norm (spectral norm) using power iteration. -// PetscReal Norm2(PetscReal tol = PETSC_DEFAULT, PetscInt maxits = PETSC_DEFAULT) const; - -// // Scale all entries by s. -// void Scale(PetscScalar s); - -// #if defined(PETSC_USE_COMPLEX) -// // Replace entries with complex conjugate. -// void Conj(); - -// // Zero the imaginary part of the matrix. -// void GetRealPart(); - -// // Move the imaginary part to the real part of the matrix. -// void GetImagPart(); -// #endif - -// // In-place addition A += alpha * B. -// void AXPY(PetscScalar alpha, const PetscParMatrix &B, NNZStructure struc); - -// // Matrix-vector multiplication. -// void Mult(const PetscParVector &x, PetscParVector &y) const; -// void MultAdd(const PetscParVector &x, PetscParVector &y) const; -// void MultTranspose(const PetscParVector &x, PetscParVector &y) const; -// void MultTransposeAdd(const PetscParVector &x, PetscParVector &y) const; -// void MultHermitianTranspose(const PetscParVector &x, PetscParVector &y) const; -// void MultHermitianTransposeAdd(const PetscParVector &x, PetscParVector &y) const; - -// #if defined(PETSC_USE_COMPLEX) -// // Multiplication with a real-valued vector. -// virtual void Mult(const mfem::Vector &x, PetscParVector &y) const; -// virtual void MultTranspose(const mfem::Vector &x, PetscParVector &y) const; -// virtual void MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const; -// #endif - -// // Prints the matrix (to stdout if fname is nullptr). -// virtual void Print(const char *fname = nullptr, bool binary = false) const; -// #if defined(PETSC_USE_COMPLEX) -// virtual void PrintReal(const char *fname) const -// { -// MFEM_ABORT("PrintReal is not supported for base class PetscParMatrix!"); -// } -// virtual void PrintImag(const char *fname) const -// { -// MFEM_ABORT("PrintImag is not supported for base class PetscParMatrix!"); -// } -// #endif - -// // Returns a (real) MFEM Operator from the underlying shell matrix data. When complex -// // scalars are used, the parameter controls which part of the matrix to extract. -// #if defined(PETSC_USE_COMPLEX) -// virtual const mfem::Operator *GetOperator(ExtractStructure struc) const -// #else -// virtual const mfem::Operator *GetOperator() const -// #endif -// { -// MFEM_ABORT("GetOperator is not supported for base class PetscParMatrix!"); -// return nullptr; -// } - -// // Test whether or not a shell matrix has a real or imaginary parts. -// #if defined(PETSC_USE_COMPLEX) -// virtual bool HasReal() const -// { -// MFEM_ABORT("HasReal is not supported for base class PetscParMatrix!"); -// return false; -// } -// virtual bool HasImag() const -// { -// MFEM_ABORT("HasImag is not supported for base class PetscParMatrix!"); -// return false; -// } -// #endif - -// // Constructs a (real) HypreParMatrix from the PETSc matrix data. When complex scalars -// // are used, the parameter controls which part of the matrix to extract. -// #if defined(PETSC_USE_COMPLEX) -// virtual std::unique_ptr -// GetHypreParMatrix(ExtractStructure struc) const; -// #else -// virtual std::unique_ptr GetHypreParMatrix() const; -// #endif - -// // Create a submatrix on the same number of processors as the original matrix, -// // corresponding to the provided rows and columns which are the selected(local) -// indices. virtual std::unique_ptr GetSubMatrix(const mfem::Array -// &rows, -// const mfem::Array &cols); - -// // Create a sequential gathered matrix corresponding to the parallel matrix. All -// processes -// // on the original communicator must call this function, but if the argument is false, -// no -// // matrix is created (returned pointer is nullptr). -// virtual std::unique_ptr GetSequentialMatrix(bool create); - -// // Get the associated MPI communicator. -// MPI_Comm GetComm() const; - -// // Typecasting to PETSc's Mat type. -// operator Mat() const { return A; } - -// // Typecasting to PETSc object. -// operator PetscObject() const { return reinterpret_cast(A); } -// }; - -// // Context data for PETSc shell matrices. These store complex matrices as -// // Ar + i Ai and perform matrix-vector products. -// struct PetscMatShellCtx -// { -// std::unique_ptr Ar; -// mfem::Vector x, y; -// #if defined(PETSC_USE_COMPLEX) -// std::unique_ptr Ai; -// #endif -// }; - -// // Wrapper for PETSc's MATSHELL matrix class. -// class PetscShellMatrix : public PetscParMatrix -// { -// private: -// // Returns the shell matrix context. -// PetscMatShellCtx *GetContext() const; - -// public: -// // Create a PETSc shell matrix wrapping an MFEM Operator. Ownership of the operator is -// // transfered to the PETSc shell. When PETSc is compiled with complex numbers support, -// // the shell matrix wraps the real and imaginary parts to act on complex PETSc Vec -// // objects. -// PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&B); -// #if defined(PETSC_USE_COMPLEX) -// PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&Br, -// std::unique_ptr &&Bi); -// #endif - -// // Returns the number of nonzeros. -// PetscInt NNZ() const override; -// #if defined(PETSC_USE_COMPLEX) -// PetscInt NNZReal() const override; -// PetscInt NNZImag() const override; -// #endif - -// // Calculate matrix Frobenius and infinity norms. -// #if defined(PETSC_USE_COMPLEX) -// PetscReal NormFReal() const override; -// PetscReal NormFImag() const override; -// PetscReal NormInfReal() const override; -// PetscReal NormInfImag() const override; -// #endif - -// #if defined(PETSC_USE_COMPLEX) -// // Multiplication with a real-valued vector. -// void Mult(const mfem::Vector &x, PetscParVector &y) const override; -// void MultTranspose(const mfem::Vector &x, PetscParVector &y) const override; -// void MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const override; -// #endif - -// // Prints the locally owned matrix rows in parallel. -// void Print(const char *fname = nullptr, bool binary = false) const override; -// #if defined(PETSC_USE_COMPLEX) -// void PrintReal(const char *fname) const override; -// void PrintImag(const char *fname) const override; -// #endif - -// // Test whether or not a shell matrix has a real or imaginary parts. -// #if defined(PETSC_USE_COMPLEX) -// bool HasReal() const override; -// bool HasImag() const override; -// #endif - -// // Returns a (real) MFEM Operator from the underlying shell matrix data. When complex -// // scalars are used, the parameter controls which part of the matrix to extract. -// #if defined(PETSC_USE_COMPLEX) -// const mfem::Operator *GetOperator(ExtractStructure struc) const override; -// #else -// const mfem::Operator *GetOperator() const override; -// #endif - -// // These methods are not supported for MATSHELL. -// #if defined(PETSC_USE_COMPLEX) -// std::unique_ptr -// GetHypreParMatrix(ExtractStructure struc) const override -// #else -// std::unique_ptr GetHypreParMatrix() const override -// #endif -// { -// MFEM_ABORT("GetHypreParMatrix is not supported for PetscShellMatrix!"); -// return {}; -// } -// std::unique_ptr GetSubMatrix(const mfem::Array &, -// const mfem::Array &) override -// { -// MFEM_ABORT("GetSubMatrix is not supported for PetscShellMatrix!"); -// return {}; -// } -// std::unique_ptr GetSequentialMatrix(bool) override -// { -// MFEM_ABORT("GetSequentialMatrix is not supported for PetscShellMatrix!"); -// return {}; -// } -// }; - -// // Wrapper for PETSc's MATIJ matrix class. -// class PetscAijMatrix : public PetscParMatrix -// { -// public: -// // Create a PETSc matrix explicitly converted from an MFEM Operator. -// PetscAijMatrix(const mfem::Operator &B); -// #if defined(PETSC_USE_COMPLEX) -// PetscAijMatrix(const mfem::Operator &Br, const mfem::Operator &Bi); -// #endif -// }; - -// // Wrapper for PETSc's MATDENSE matrix class. -// class PetscDenseMatrix : public PetscParMatrix -// { -// private: -// // Helper method for column orthonormalization. -// PetscReal OrthonormalizeColumnInternal( -// PetscInt j, bool mgs, bool cgs2, -// const std::function &Dot, -// const std::function -// &VecDot, -// const std::function &Normalize); - -// public: -// // Create a parallel or sequential PETSc dense matrix. Option to specify an existing -// data -// // array. -// PetscDenseMatrix(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, -// PetscScalar *data); -// PetscDenseMatrix(PetscInt m, PetscInt n, PetscScalar *data); - -// // Set the (local) matrix dimensions to m x n, copying previous contents to the -// upper-left -// // block. -// void Resize(PetscInt m, PetscInt n, bool copy = false); - -// // Access methods for columns of the dense matrix. -// PetscParVector GetColumn(PetscInt j); -// const PetscParVector GetColumnRead(PetscInt j) const; -// void RestoreColumn(PetscInt j, PetscParVector &v); -// void RestoreColumnRead(PetscInt j, const PetscParVector &v) const; - -// // Access the data array of the dense matrix. -// PetscScalar *GetArray(); -// const PetscScalar *GetArrayRead() const; -// void RestoreArray(PetscScalar *data); -// void RestoreArrayRead(const PetscScalar *data) const; - -// // Sets all entries of the matrix to random numbers sampled from the range [-1-i, 1+i], -// or -// // [-1, 1]. -// void SetRandom(PetscInt start = -1, PetscInt end = -1); -// #if defined(PETSC_USE_COMPLEX) -// void SetRandomReal(PetscInt start = -1, PetscInt end = -1); -// #else -// void SetRandomReal(PetscInt start = -1, PetscInt end = -1) { SetRandom(start, end); } -// #endif -// void SetRandomSign(PetscInt start = -1, PetscInt end = -1, bool init = false); - -// // Orthonormalize column j of the matrix against the preceeding columns, using -// classical -// // or modified Gram-Schmidt. -// PetscReal OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2); -// PetscReal OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2, const PetscParMatrix -// &B, -// PetscParVector &Bv); - -// // Dense matrix-matrix multiplication. -// void MatMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; -// void MatMultTranspose(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; -// void MatTransposeMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; -// }; - -// } // namespace palace::petsc - #endif #endif // PALACE_LINALG_PETSC_HPP diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index 31c9a72c5..f483155a1 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -122,8 +122,19 @@ void ConfigurePCShell(ST st, void *ctx, PetscErrorCode (*__pc_apply)(PC, Vec, Ve PalacePetscCall(STGetKSP(st, &ksp)); PalacePetscCall(KSPGetPC(ksp, &pc)); PalacePetscCall(PCSetType(pc, PCSHELL)); - PalacePetscCall(PCShellSetContext(pc, (void *)ctx)); - PalacePetscCall(PCShellSetApply(pc, (PetscErrorCode(*)(PC, Vec, Vec)) & __pc_apply)); + PalacePetscCall(PCShellSetContext(pc, ctx)); + PalacePetscCall(PCShellSetApply(pc, __pc_apply)); +} + +void ConfigureRG(RG rg, PetscReal lr, PetscReal ur, PetscReal li, PetscReal ui, + bool complement = false) +{ + PalacePetscCall(RGSetType(rg, RGINTERVAL)); + PalacePetscCall(RGIntervalSetEndpoints(rg, lr, ur, li, ui)); + if (complement) + { + PalacePetscCall(RGSetComplement(rg, PETSC_TRUE)); + } } } // namespace @@ -155,8 +166,7 @@ PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm PetscInt n = A.Height() / 2; PalacePetscCall( MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)&ctx, &A0)); - PalacePetscCall( - MatShellSetOperation(A0, MATOP_MULT, (void (*)(void)) & __mat_apply_shell)); + PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_shell)); if (herm) { EPS eps; @@ -189,10 +199,10 @@ PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm else { PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT_TRANSPOSE, - (void (*)(void)) & __mat_apply_transpose_shell)); + (void (*)(void))__mat_apply_transpose_shell)); PalacePetscCall( MatShellSetOperation(A0, MATOP_MULT_HERMITIAN_TRANSPOSE, - (void (*)(void)) & __mat_apply_hermitian_transpose_shell)); + (void (*)(void))__mat_apply_hermitian_transpose_shell)); SVD svd; PetscInt num_conv; @@ -319,13 +329,13 @@ void SlepcEigenSolver::Customize() { // Configure the KSP object for non-preconditioned spectral transformations. PetscBool precond; - KSP ksp; ST st = GetST(); - PalacePetscCall(STGetKSP(st, &ksp)); PalacePetscCall( PetscObjectTypeCompare(reinterpret_cast(st), STPRECOND, &precond)); if (!precond) { + KSP ksp; + PalacePetscCall(STGetKSP(st, &ksp)); PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); } @@ -334,57 +344,39 @@ void SlepcEigenSolver::Customize() { if (PetscImaginaryPart(sigma) == 0.0) { - if (PetscRealPart(sigma) > 0.0) + PetscReal sr = PetscRealPart(sigma); + if (sr > 0.0) { - SetRegion(PetscRealPart(sigma) / gamma - PETSC_i * mfem::infinity(), - mfem::infinity() + PETSC_i * mfem::infinity()); + ConfigureRG(GetRG(), sr / gamma, mfem::infinity(), -mfem::infinity(), + mfem::infinity()); } - else if (PetscRealPart(sigma) < 0.0) + else if (sr < 0.0) { - SetRegion(-mfem::infinity() - PETSC_i * mfem::infinity(), - PetscRealPart(sigma) / gamma + PETSC_i * mfem::infinity()); + ConfigureRG(GetRG(), -mfem::infinity(), sr / gamma, -mfem::infinity(), + mfem::infinity()); } } else if (PetscRealPart(sigma) == 0.0) { - if (PetscImaginaryPart(sigma) > 0.0) + PetscReal si = PetscImaginaryPart(sigma); + if (si > 0.0) { - SetRegion(-mfem::infinity() + PETSC_i * PetscImaginaryPart(sigma) / gamma, - mfem::infinity() + PETSC_i * mfem::infinity()); + ConfigureRG(GetRG(), -mfem::infinity(), mfem::infinity(), si / gamma, + mfem::infinity()); } - else if (PetscImaginaryPart(sigma) < 0.0) + else if (si < 0.0) { - SetRegion(-mfem::infinity() - PETSC_i * mfem::infinity(), - PetscImaginaryPart(sigma) / gamma + PETSC_i * mfem::infinity()); + ConfigureRG(GetRG(), -mfem::infinity(), mfem::infinity(), -mfem::infinity(), + si / gamma); } } else { - MFEM_ABORT("Shift-and-invert with general complex eigenvalue target " - "is unsupported!"); + MFEM_ABORT("Shift-and-invert with general complex eigenvalue target is unsupported!"); } } } -void SlepcEigenSolver::SetRegion(PetscScalar lower_left, PetscScalar upper_right, - bool complement) -{ - RG rg = GetRG(); - PalacePetscCall(RGSetType(rg, RGINTERVAL)); - PalacePetscCall(RGIntervalSetEndpoints( - rg, PetscRealPart(lower_left), PetscRealPart(upper_right), - PetscImaginaryPart(lower_left), PetscImaginaryPart(upper_right))); - if (complement) - { - PalacePetscCall(RGSetComplement(rg, PETSC_TRUE)); - } -} - -PetscScalar SlepcEigenSolver::GetBackTransform(PetscScalar l) const -{ - return gamma * l; -} - PetscReal SlepcEigenSolver::GetError(int i, EigenvalueSolver::ErrorType type) const { switch (type) @@ -602,7 +594,7 @@ int SlepcEPSSolverBase::Solve() } // Compute and store the eigenpair residuals. - res = std::make_unique(num_conv); + res = std::make_unique(num_conv); for (int i = 0; i < num_conv; i++) { res.get()[i] = GetResidualNorm(i); @@ -614,7 +606,7 @@ PetscScalar SlepcEPSSolverBase::GetEigenvalue(int i) const { PetscScalar l; PalacePetscCall(EPSGetEigenvalue(eps, i, &l, nullptr)); - return GetBackTransform(l); + return l * gamma; } void SlepcEPSSolverBase::GetEigenvector(int i, ComplexVector &x) const @@ -676,10 +668,8 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); PalacePetscCall( MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); - PalacePetscCall( - MatShellSetOperation(A0, MATOP_MULT, (void (*)(void)) & __mat_apply_EPS_A0)); - PalacePetscCall( - MatShellSetOperation(A1, MATOP_MULT, (void (*)(void)) & __mat_apply_EPS_A1)); + PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A0)); + PalacePetscCall(MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A1)); PalacePetscCall(EPSSetOperators(eps, A0, A1)); if (first && type != ScaleType::NONE) @@ -717,8 +707,7 @@ void SlepcEPSSolver::SetBMat(const Operator &B) PetscInt n = B.Height(); PalacePetscCall( MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); - PalacePetscCall( - MatShellSetOperation(B0, MATOP_MULT, (void (*)(void)) & __mat_apply_EPS_B)); + PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)(void))__mat_apply_EPS_B)); BV bv = GetBV(); PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); @@ -774,9 +763,9 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); PalacePetscCall( - MatShellSetOperation(A0, MATOP_MULT, (void (*)(void)) & __mat_apply_PEPLinear_L0)); + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L0)); PalacePetscCall( - MatShellSetOperation(A1, MATOP_MULT, (void (*)(void)) & __mat_apply_PEPLinear_L1)); + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L1)); PalacePetscCall(EPSSetOperators(eps, A0, A1)); if (first && type != ScaleType::NONE) @@ -818,7 +807,7 @@ void SlepcPEPLinearSolver::SetBMat(const Operator &B) PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); PalacePetscCall( - MatShellSetOperation(B0, MATOP_MULT, (void (*)(void)) & __mat_apply_PEPLinear_B)); + MatShellSetOperation(B0, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_B)); BV bv = GetBV(); PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); @@ -857,22 +846,22 @@ void SlepcPEPLinearSolver::GetEigenvector(int i, ComplexVector &x) const v0, "Must call SetOperators before using GetEigenvector for SLEPc eigenvalue solver!"); PalacePetscCall(EPSGetEigenvector(eps, i, v0, nullptr)); - PetscInt n; PalacePetscCall(VecGetLocalSize(v0, &n)); - MFEM_VERIFY(2 * x1.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + MFEM_VERIFY(2 * x.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); const PetscScalar *pv0; PalacePetscCall(VecGetArrayRead(v0, &pv0)); - x1.Set(pv0, n / 2); + x.Set(pv0, n / 2); PalacePetscCall(VecRestoreArrayRead(v0, &pv0)); + if (opB) { - linalg::Normalize(GetComm(), x1, *opB, y1); + linalg::Normalize(GetComm(), x, *opB, y1); } else { - linalg::Normalize(GetComm(), x1); + linalg::Normalize(GetComm(), x); } } @@ -1106,7 +1095,7 @@ int SlepcPEPSolverBase::Solve() } // Compute and store the eigenpair residuals. - res = std::make_unique(num_conv); + res = std::make_unique(num_conv); for (int i = 0; i < num_conv; i++) { res.get()[i] = GetResidualNorm(i); @@ -1118,7 +1107,7 @@ PetscScalar SlepcPEPSolverBase::GetEigenvalue(int i) const { PetscScalar l; PalacePetscCall(PEPGetEigenpair(pep, i, &l, nullptr, nullptr, nullptr)); - return GetBackTransform(l); + return l * gamma; } void SlepcPEPSolverBase::GetEigenvector(int i, ComplexVector &x) const @@ -1184,12 +1173,9 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); PalacePetscCall( MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2)); - PalacePetscCall( - MatShellSetOperation(A0, MATOP_MULT, (void (*)(void)) & __mat_apply_PEP_A0)); - PalacePetscCall( - MatShellSetOperation(A1, MATOP_MULT, (void (*)(void)) & __mat_apply_PEP_A1)); - PalacePetscCall( - MatShellSetOperation(A2, MATOP_MULT, (void (*)(void)) & __mat_apply_PEP_A2)); + PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A0)); + PalacePetscCall(MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A1)); + PalacePetscCall(MatShellSetOperation(A2, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A2)); Mat A[3] = {A0, A1, A2}; PalacePetscCall(PEPSetOperators(pep, 3, A)); @@ -1229,8 +1215,7 @@ void SlepcPEPSolver::SetBMat(const Operator &B) PetscInt n = B.Height(); PalacePetscCall( MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); - PalacePetscCall( - MatShellSetOperation(B0, MATOP_MULT, (void (*)(void)) & __mat_apply_PEP_B)); + PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)(void))__mat_apply_PEP_B)); BV bv = GetBV(); PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); @@ -1286,6 +1271,7 @@ PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y) PetscCall(VecRestoreArrayRead(x, &px)); ctx->opK->Mult(ctx->x, ctx->y); + ctx->y *= ctx->delta; PetscScalar *py; PetscCall(VecGetArrayWrite(y, &py)); @@ -1311,6 +1297,7 @@ PetscErrorCode __mat_apply_EPS_A1(Mat A, Vec x, Vec y) PetscCall(VecRestoreArrayRead(x, &px)); ctx->opM->Mult(ctx->x, ctx->y); + ctx->y *= ctx->delta * ctx->gamma; PetscScalar *py; PetscCall(VecGetArrayWrite(y, &py)); @@ -1337,6 +1324,7 @@ PetscErrorCode __mat_apply_EPS_B(Mat A, Vec x, Vec y) ctx->opB->Mult(ctx->x.Real(), ctx->y.Real()); ctx->opB->Mult(ctx->x.Imag(), ctx->y.Imag()); + ctx->y *= ctx->delta * ctx->gamma; PetscScalar *py; PetscCall(VecGetArrayWrite(y, &py)); diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp index 22d4eb521..3e2821961 100644 --- a/palace/linalg/slepc.hpp +++ b/palace/linalg/slepc.hpp @@ -87,7 +87,7 @@ class SlepcEigenSolver : public EigenvalueSolver bool sinvert, region; // Storage for computed residual norms. - std::unique_ptr res; + std::unique_ptr res; // Reference to linear solver used for operator action for M⁻¹ (with no spectral // transformation) or (K - σ M)⁻¹ (generalized EVP with shift-and- invert) or P(σ)⁻¹ @@ -112,14 +112,6 @@ class SlepcEigenSolver : public EigenvalueSolver // Customize object with command line options set. virtual void Customize(); - // Specify rectangular region of the complex plane in which to constrain eigenvalue - // search. - void SetRegion(PetscScalar lower_left, PetscScalar upper_right, bool complement = false); - - // Perform the back-transformation from the spectrally transformed eigenvalue back to the - // original problem. - PetscScalar GetBackTransform(PetscScalar l) const; - // Helper routine for computing the eigenpair residual. virtual PetscReal GetResidualNorm(int i) const = 0; diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index 5c3e235be..63ca6872d 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -91,6 +91,8 @@ void SuperLUSolver::SetOperator(const Operator &op) A = std::make_unique(op); } solver.SetOperator(*A); + height = solver.Height(); + width = solver.Width(); } } // namespace palace diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index 994bf8bcd..4f40a2841 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -147,7 +147,7 @@ void CurlCurlOperator::GetStiffnessMatrix(std::vector(std::move(k), nd_fespace_l, nd_fespace_l)); + K.push_back(std::make_unique(std::move(k), nd_fespace_l)); K.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); } print_hdr = false; diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index 531a9b8d9..ef9a8c289 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -170,7 +170,7 @@ void LaplaceOperator::GetStiffnessMatrix(std::vector(std::move(k), h1_fespace_l, h1_fespace_l)); + K.push_back(std::make_unique(std::move(k), h1_fespace_l)); K.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); } // Save local (uneliminated) operator after parallel assembly for RHS BC elimination. diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 466a0630b..8754c5a5f 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -243,7 +243,7 @@ SpaceOperator::GetSystemMatrix(SpaceOperator::OperatorType type, a->SetAssemblyLevel(assembly_level); a->Assemble(skip_zeros); a->Finalize(skip_zeros); - auto A = std::make_unique(std::move(a), GetNDSpace(), GetNDSpace()); + auto A = std::make_unique(std::move(a), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return A; } @@ -313,8 +313,7 @@ SpaceOperator::GetComplexSystemMatrix(SpaceOperator::OperatorType type, double o return {}; } auto A = std::make_unique( - std::make_unique(std::move(ar), std::move(ai)), GetNDSpace(), - GetNDSpace()); + std::make_unique(std::move(ar), std::move(ai)), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return A; } @@ -355,7 +354,7 @@ std::unique_ptr SpaceOperator::GetSystemMatrix(double a0, double a1 { sum->AddOperator(M->LocalOperator(), a2); } - auto A = std::make_unique(std::move(sum), GetNDSpace(), GetNDSpace()); + auto A = std::make_unique(std::move(sum), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); return A; } @@ -405,7 +404,7 @@ std::unique_ptr SpaceOperator::GetComplexSystemMatrix( { sum->AddOperator(A2->LocalOperator(), 1.0); } - auto A = std::make_unique(std::move(sum), GetNDSpace(), GetNDSpace()); + auto A = std::make_unique(std::move(sum), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); return A; } @@ -469,7 +468,7 @@ void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, dou Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); Mpi::Print(", {:d} NNZ (LOR)\n", nnz); } - B_.push_back(std::make_unique(std::move(b_lor), fespace_l, fespace_l)); + B_.push_back(std::make_unique(std::move(b_lor), fespace_l)); } else { @@ -489,7 +488,7 @@ void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, dou Mpi::Print("\n"); } } - B_.push_back(std::make_unique(std::move(b), fespace_l, fespace_l)); + B_.push_back(std::make_unique(std::move(b), fespace_l)); } B_.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); @@ -631,7 +630,7 @@ bool SpaceOperator::GetExcitationVector(Vector &RHS) bool SpaceOperator::GetExcitationVector(double omega, ComplexVector &RHS) { // Frequency domain excitation vector: RHS = iω RHS1 + RHS2(ω). - RHS.SetSize(GetNDSpace().GetTrueVSize()); + RHS.SetSize(2 * GetNDSpace().GetTrueVSize()); RHS = std::complex(0.0, 0.0); bool nnz1 = AddExcitationVector1Internal(RHS.Real()); RHS *= 1i * omega; @@ -646,7 +645,7 @@ bool SpaceOperator::GetExcitationVector1(ComplexVector &RHS1) { // Assemble the frequency domain excitation term with linear frequency dependence // (coefficient iω, see GetExcitationVector above, is accounted for later). - RHS1.SetSize(GetNDSpace().GetTrueVSize()); + RHS1.SetSize(2 * GetNDSpace().GetTrueVSize()); RHS1 = std::complex(0.0, 0.0); bool nnz1 = AddExcitationVector1Internal(RHS1.Real()); RHS1.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); @@ -656,7 +655,7 @@ bool SpaceOperator::GetExcitationVector1(ComplexVector &RHS1) bool SpaceOperator::GetExcitationVector2(double omega, ComplexVector &RHS2) { - RHS2.SetSize(GetNDSpace().GetTrueVSize()); + RHS2.SetSize(2 * GetNDSpace().GetTrueVSize()); RHS2 = std::complex(0.0, 0.0); bool nnz2 = AddExcitationVector2Internal(omega, RHS2); RHS2.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); @@ -691,7 +690,7 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH { // Assemble the contribution of wave ports to the frequency domain excitation term at the // specified frequency. - MFEM_VERIFY(RHS2.Size() == GetNDSpace().GetTrueVSize(), + MFEM_VERIFY(RHS2.Size() == 2 * GetNDSpace().GetTrueVSize(), "Invalid T-vector size for AddExcitationVector2Internal!"); SumVectorCoefficient fbr(GetNDSpace().GetParMesh()->SpaceDimension()), fbi(GetNDSpace().GetParMesh()->SpaceDimension()); @@ -714,7 +713,7 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH void SpaceOperator::GetConstantInitialVector(ComplexVector &v) { - v.SetSize(GetNDSpace().GetTrueVSize()); + v.SetSize(2 * GetNDSpace().GetTrueVSize()); v = std::complex(1.0, 0.0); v.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); v.SyncAlias(); @@ -722,7 +721,7 @@ void SpaceOperator::GetConstantInitialVector(ComplexVector &v) void SpaceOperator::GetRandomInitialVector(ComplexVector &v) { - v.SetSize(GetNDSpace().GetTrueVSize()); + v.SetSize(2 * GetNDSpace().GetTrueVSize()); linalg::SetRandom(GetNDSpace().GetComm(), v); v.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); v.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index c93bf0090..ec7de000c 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -98,7 +98,7 @@ std::unique_ptr GetBtt(const MaterialOperator &mat_op, btt->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); btt->Assemble(skip_zeros); btt->Finalize(skip_zeros); - return std::make_unique(std::move(btt), nd_fespace, nd_fespace); + return std::make_unique(std::move(btt), nd_fespace); } std::unique_ptr GetBtn(const MaterialOperator &mat_op, @@ -115,7 +115,7 @@ std::unique_ptr GetBtn(const MaterialOperator &mat_op, btn->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); btn->Assemble(skip_zeros); btn->Finalize(skip_zeros); - return std::make_unique(std::move(btn), h1_fespace, nd_fespace); + return std::make_unique(std::move(btn), h1_fespace, nd_fespace, false); } std::array, 3> GetBnn(const MaterialOperator &mat_op, @@ -144,9 +144,8 @@ std::array, 3> GetBnn(const MaterialOperator &mat_o // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). if (!mat_op.HasLossTangent()) { - return {std::make_unique(std::move(bnn1), h1_fespace, h1_fespace), - std::make_unique(std::move(bnn2r), h1_fespace, h1_fespace), - nullptr}; + return {std::make_unique(std::move(bnn1), h1_fespace), + std::make_unique(std::move(bnn2r), h1_fespace), nullptr}; } constexpr MaterialPropertyType MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; NormalProjectedCoefficient negepstandelta_func( @@ -157,9 +156,9 @@ std::array, 3> GetBnn(const MaterialOperator &mat_o bnn2i->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); bnn2i->Assemble(skip_zeros); bnn2i->Finalize(skip_zeros); - return {std::make_unique(std::move(bnn1), h1_fespace, h1_fespace), - std::make_unique(std::move(bnn2r), h1_fespace, h1_fespace), - std::make_unique(std::move(bnn2i), h1_fespace, h1_fespace)}; + return {std::make_unique(std::move(bnn1), h1_fespace), + std::make_unique(std::move(bnn2r), h1_fespace), + std::make_unique(std::move(bnn2i), h1_fespace)}; } std::array, 3> GetAtt(const MaterialOperator &mat_op, @@ -188,9 +187,8 @@ std::array, 3> GetAtt(const MaterialOperator &mat_o // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). if (!mat_op.HasLossTangent()) { - return {std::make_unique(std::move(att1), nd_fespace, nd_fespace), - std::make_unique(std::move(att2r), nd_fespace, nd_fespace), - nullptr}; + return {std::make_unique(std::move(att1), nd_fespace), + std::make_unique(std::move(att2r), nd_fespace), nullptr}; } constexpr MaterialPropertyType MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; MaterialPropertyCoefficient negepstandelta_func(mat_op); @@ -200,9 +198,9 @@ std::array, 3> GetAtt(const MaterialOperator &mat_o att2i->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); att2i->Assemble(skip_zeros); att2i->Finalize(skip_zeros); - return {std::make_unique(std::move(att1), nd_fespace, nd_fespace), - std::make_unique(std::move(att2r), nd_fespace, nd_fespace), - std::make_unique(std::move(att2i), nd_fespace, nd_fespace)}; + return {std::make_unique(std::move(att1), nd_fespace), + std::make_unique(std::move(att2r), nd_fespace), + std::make_unique(std::move(att2i), nd_fespace)}; } std::array, 6> From a51b25c050cad6fcec740b888b6f80abffca759c Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Thu, 18 May 2023 17:31:27 -0700 Subject: [PATCH 10/41] Debugging: Wave ports and some attempts at performance improvements at least until SubMesh is supported for ND spaces --- palace/fem/lumpedelement.hpp | 2 +- palace/linalg/strumpack.cpp | 1 + palace/linalg/superlu.cpp | 2 +- palace/linalg/superlu.hpp | 2 + palace/models/curlcurloperator.cpp | 5 +- palace/models/spaceoperator.cpp | 8 +- palace/models/surfacepostoperator.cpp | 2 +- palace/models/waveportoperator.cpp | 281 +++++++++++++++----------- palace/models/waveportoperator.hpp | 3 +- 9 files changed, 179 insertions(+), 127 deletions(-) diff --git a/palace/fem/lumpedelement.hpp b/palace/fem/lumpedelement.hpp index a6bcb7ca2..4c27e38f8 100644 --- a/palace/fem/lumpedelement.hpp +++ b/palace/fem/lumpedelement.hpp @@ -29,7 +29,7 @@ class LumpedElementData double GetArea(mfem::ParFiniteElementSpace &fespace) { mfem::ParGridFunction ones(&fespace); - ones.mfem::Vector::operator=(1.0); + ones = 1.0; mfem::ParLinearForm s(&fespace); mfem::ConstantCoefficient one_func(1.0); s.AddBoundaryIntegrator(new BoundaryLFIntegrator(one_func), attr_marker); diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index 18e5946bc..4ffeca81e 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -72,6 +72,7 @@ StrumpackSolverBase::StrumpackSolverBase( { // Use default } + // this->SetMatching(strumpack::MatchingJob::NONE); this->SetReorderingReuse(true); // Repeated calls use same sparsity pattern // Configure compression. diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index 63ca6872d..d4b803d09 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -67,7 +67,7 @@ SuperLUSolver::SuperLUSolver(MPI_Comm comm, config::LinearSolverData::SymFactTyp { // Use default } - solver.SetRowPermutation(mfem::superlu::NOROWPERM); + // solver.SetRowPermutation(mfem::superlu::NOROWPERM); solver.SetIterativeRefine(mfem::superlu::NOREFINE); solver.SetSymmetricPattern(true); // Always symmetric sparsity pattern } diff --git a/palace/linalg/superlu.hpp b/palace/linalg/superlu.hpp index 74e857423..51febe601 100644 --- a/palace/linalg/superlu.hpp +++ b/palace/linalg/superlu.hpp @@ -35,6 +35,8 @@ class SuperLUSolver : public mfem::Solver { } + mfem::SuperLUSolver &GetSolver() { return solver; } + void SetOperator(const Operator &op) override; void Mult(const Vector &x, Vector &y) const override { solver.Mult(x, y); } diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index 4f40a2841..5733f3514 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -117,8 +117,9 @@ void CurlCurlOperator::GetStiffnessMatrix(std::vector &attr_marker, const mfem::Array &dbc_marker, mfem::Array &nd_dbc_tdof_list, - mfem::Array &h1_dbc_tdof_list) + mfem::Array &h1_dbc_tdof_list, + HYPRE_BigInt attr_tdof_sizes[2]) { // Mark all ND and H1 dofs which are not on the port, and then mark PEC boundaries on // the port as well. - mfem::Array nd_tdof_list, h1_tdof_list; - nd_fespace.GetEssentialTrueDofs(attr_marker, nd_tdof_list); - h1_fespace.GetEssentialTrueDofs(attr_marker, h1_tdof_list); + mfem::Array nd_attr_tdof_list, h1_attr_tdof_list; + nd_fespace.GetEssentialTrueDofs(attr_marker, nd_attr_tdof_list); + h1_fespace.GetEssentialTrueDofs(attr_marker, h1_attr_tdof_list); nd_fespace.GetEssentialTrueDofs(dbc_marker, nd_dbc_tdof_list); h1_fespace.GetEssentialTrueDofs(dbc_marker, h1_dbc_tdof_list); + attr_tdof_sizes[0] = nd_attr_tdof_list.Size(); + attr_tdof_sizes[1] = h1_attr_tdof_list.Size(); + Mpi::GlobalSum(2, attr_tdof_sizes, nd_fespace.GetComm()); mfem::Array nd_dbc_tdof_marker(nd_fespace.GetTrueVSize()), h1_dbc_tdof_marker(h1_fespace.GetTrueVSize()); nd_dbc_tdof_marker = 1; h1_dbc_tdof_marker = 1; - for (auto tdof : nd_tdof_list) + for (auto tdof : nd_attr_tdof_list) { nd_dbc_tdof_marker[tdof] = 0; } @@ -54,7 +58,7 @@ void GetEssentialTrueDofs(mfem::ParFiniteElementSpace &nd_fespace, { nd_dbc_tdof_marker[tdof] = 1; } - for (auto tdof : h1_tdof_list) + for (auto tdof : h1_attr_tdof_list) { h1_dbc_tdof_marker[tdof] = 0; } @@ -210,26 +214,9 @@ GetSystemMatrices(std::unique_ptr Btt, std::unique_ptr std::unique_ptr Att2r, std::unique_ptr Att2i, mfem::Array &nd_dbc_tdof_list, mfem::Array &h1_dbc_tdof_list) { - // Construct the 2x2 block matrices for the eigenvalue problem. We pre-compute the - // eigenvalue problem matrices such that: - // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. - Btt->SetEssentialTrueDofs(nd_dbc_tdof_list, Operator::DIAG_ZERO); - Btn->SetEssentialTrueDofs(&h1_dbc_tdof_list, &nd_dbc_tdof_list, Operator::DIAG_ZERO); - - Bnn1->SetEssentialTrueDofs(h1_dbc_tdof_list, Operator::DIAG_ZERO); - Bnn2r->SetEssentialTrueDofs(h1_dbc_tdof_list, Operator::DIAG_ZERO); - if (Bnn2i) - { - Bnn2i->SetEssentialTrueDofs(h1_dbc_tdof_list, Operator::DIAG_ZERO); - } - - Att1->SetEssentialTrueDofs(nd_dbc_tdof_list, Operator::DIAG_ONE); - Att2r->SetEssentialTrueDofs(nd_dbc_tdof_list, Operator::DIAG_ZERO); - if (Att2i) - { - Att2i->SetEssentialTrueDofs(nd_dbc_tdof_list, Operator::DIAG_ZERO); - } - + // Construct the 2x2 block matrices for the eigenvalue problem A e = λ B e. We pre-compute + // the matrices such that: + // A = A₁ - ω² A₂, B = A₁ - ω² A₂ + 1/Θ² B₃ - ω²/Θ² B₄. std::unique_ptr BtnT(Btn->ParallelAssemble().Transpose()); mfem::Array2D blocks(2, 2); @@ -254,18 +241,14 @@ GetSystemMatrices(std::unique_ptr Btt, std::unique_ptr A2i.reset(mfem::HypreParMatrixFromBlocks(blocks)); } - auto &Inn = Bnn1->ParallelAssemble(); - Inn *= 0.0; - Inn.EliminateZeroRows(); // Sets diagonal entries to 1 + auto &Znn = Bnn1->ParallelAssemble(); + Znn *= 0.0; blocks = nullptr; blocks(0, 0) = &Att1->ParallelAssemble(); - blocks(1, 1) = &Inn; + blocks(1, 1) = &Znn; std::unique_ptr B3(mfem::HypreParMatrixFromBlocks(blocks)); - auto &Znn = Inn; - Znn *= 0.0; - blocks(0, 0) = &Att2r->ParallelAssemble(); blocks(1, 1) = &Znn; std::unique_ptr B4r(mfem::HypreParMatrixFromBlocks(blocks)); @@ -277,6 +260,42 @@ GetSystemMatrices(std::unique_ptr Btt, std::unique_ptr B4i.reset(mfem::HypreParMatrixFromBlocks(blocks)); } + // Eliminate boundary tdofs not associated with this wave port or constrained by Dirichlet + // BCs. It is not guaranteed that any HypreParMatrix has a full diagonal in its sparsity + // pattern, so we add a zero diagonal before elimination to guarantee this for A1 and B3. + mfem::Array dbc_tdof_list; + int nd_tdof_offset = Btt->Height(); + dbc_tdof_list.Reserve(nd_dbc_tdof_list.Size() + h1_dbc_tdof_list.Size()); + for (auto tdof : nd_dbc_tdof_list) + { + dbc_tdof_list.Append(tdof); + } + for (auto tdof : h1_dbc_tdof_list) + { + dbc_tdof_list.Append(tdof + nd_tdof_offset); + } + + mfem::Vector d(B3->Height()); + d = 0.0; + mfem::SparseMatrix diag(d); + mfem::HypreParMatrix Diag(B3->GetComm(), B3->GetGlobalNumRows(), B3->GetRowStarts(), + &diag); + A1.reset(mfem::Add(1.0, *A1, 1.0, Diag)); + B3.reset(mfem::Add(1.0, *B3, 1.0, Diag)); + + A1->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + A2r->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + if (A2i) + { + A2i->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + } + B3->EliminateBC(dbc_tdof_list, Operator::DIAG_ONE); + B4r->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + if (B4i) + { + B4i->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + } + return {std::move(A1), std::move(A2r), std::move(A2i), std::move(B3), std::move(B4r), std::move(B4i)}; } @@ -356,30 +375,30 @@ class BdrHVectorCoefficient : public mfem::VectorCoefficient // Compute Re/Im{-1/i (ikₙ Eₜ + ∇ₜ Eₙ)}. T.SetIntPoint(&ip); - if (imaginary) + mfem::Vector U; + if (!imaginary) { - gridfunc_t.imag().GetVectorValue(T, ip, V); - V *= -kn.real(); + gridfunc_t.real().GetVectorValue(T, ip, U); + U *= -kn.real(); - mfem::Vector Vn; - gridfunc_n.real().GetGradient(T, Vn); - V += Vn; + mfem::Vector dU; + gridfunc_n.imag().GetGradient(T, dU); + U -= dU; } else { - gridfunc_t.real().GetVectorValue(T, ip, V); - V *= -kn.real(); + gridfunc_t.imag().GetVectorValue(T, ip, U); + U *= -kn.real(); - mfem::Vector Vn; - gridfunc_n.imag().GetGradient(T, Vn); - V -= Vn; + mfem::Vector dU; + gridfunc_n.real().GetGradient(T, dU); + U += dU; } // Scale by 1/(ωμ) with μ evaluated in the neighboring element. - mfem::Vector t(V.Size()); + V.SetSize(U.Size()); + mat_op.GetInvPermeability(mesh.GetAttribute(iel1)).Mult(U, V); V *= (1.0 / omega); - mat_op.GetInvPermeability(mesh.GetAttribute(iel1)).Mult(V, t); - V = std::move(t); } void SetFrequency(double w, std::complex k) @@ -418,46 +437,61 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera double c_min = mfem::infinity(); for (auto attr : nd_fespace.GetParMesh()->attributes) { - double s = mat_op.GetLightSpeedMin(attr); - if (s < c_min) - { - c_min = s; - } + c_min = std::min(c_min, mat_op.GetLightSpeedMin(attr)); } - MFEM_VERIFY(c_min > 0.0, "Invalid material speed of light detected in WavePortOperator!"); + MFEM_VERIFY(c_min > 0.0 && c_min < mfem::infinity(), + "Invalid material speed of light detected in WavePortOperator!"); mu_eps_max = 1.0 / (c_min * c_min); // Pre-compute problem matrices such that: - // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. + // A = A₁ - ω² A₂, B = A₁ - 1 / (μₘ εₘ) B₄ - ω² A₂ + 1/Θ² B₃ . mfem::Array nd_dbc_tdof_list, h1_dbc_tdof_list; GetEssentialTrueDofs(nd_fespace, h1_fespace, attr_marker, dbc_marker, nd_dbc_tdof_list, - h1_dbc_tdof_list); - attr_tdof_sizes[0] = nd_fespace.GetTrueVSize() - nd_dbc_tdof_list.Size(); - attr_tdof_sizes[1] = h1_fespace.GetTrueVSize() - h1_dbc_tdof_list.Size(); - Mpi::GlobalSum(2, attr_tdof_sizes, nd_fespace.GetComm()); + h1_dbc_tdof_list, attr_tdof_sizes); { auto Btt = GetBtt(mat_op, nd_fespace, attr_marker); auto Btn = GetBtn(mat_op, nd_fespace, h1_fespace, attr_marker); auto [Bnn1, Bnn2r, Bnn2i] = GetBnn(mat_op, h1_fespace, attr_marker); auto [Att1, Att2r, Att2i] = GetAtt(mat_op, nd_fespace, attr_marker); + std::unique_ptr A1, B4r, B4i; std::tie(A1, A2r, A2i, B3, B4r, B4i) = GetSystemMatrices(std::move(Btt), std::move(Btn), std::move(Bnn1), std::move(Bnn2r), std::move(Bnn2i), std::move(Att1), std::move(Att2r), std::move(Att2i), nd_dbc_tdof_list, h1_dbc_tdof_list); - } - // Allocate storage for the eigenvalue problem operators. We have sparsity(A2) ⊆ - // sparsity(A1), sparsity(B3) = sparsity(B4) ⊆ sparsity(A1) - { + // Allocate storage for the eigenvalue problem operators. We have sparsity(A2) = + // sparsity(B3) = sparsity(B4) ⊆ sparsity(A1). Precompute the frequency independent + // contributions to A and B. P = std::make_unique(*A1); - *P *= 0.0; - A = std::make_unique( - std::make_unique(*P), - std::make_unique(*P)); - B = std::make_unique( - std::make_unique(*P), - std::make_unique(*P)); + if (A2i) + { + A = std::make_unique( + std::make_unique(*A1), + std::make_unique(*A2i)); + B = std::make_unique( + std::make_unique(*A1), + std::make_unique(*A2i)); + + auto &Br = *static_cast(&B->Real()); + Br.Add(-1.0 / mu_eps_max, *B4r); + + auto &Ai = *static_cast(&A->Imag()); + auto &Bi = *static_cast(&B->Imag()); + Ai *= 0.0; + Bi *= 0.0; + Bi.Add(-1.0 / mu_eps_max, *B4i); + } + else + { + A = std::make_unique( + std::make_unique(*A1), nullptr); + B = std::make_unique( + std::make_unique(*A1), nullptr); + + auto &Br = *static_cast(&B->Real()); + Br.Add(-1.0 / mu_eps_max, *B4r); + } } // Create vector for initial space for eigenvalue solves (for nullspace of [Aₜₜ 0] @@ -473,8 +507,17 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera { // Define the linear solver to be used for solving systems associated with the // generalized eigenvalue problem. - constexpr int print = 0; - config::LinearSolverData::Type pc_type = config::LinearSolverData::Type::DEFAULT; + constexpr int ksp_print = 0; + constexpr double ksp_tol = 1.0e-8; + constexpr double ksp_max_it = 100; + auto gmres = std::make_unique(nd_fespace.GetComm()); + gmres->iterative_mode = false; + gmres->SetRelTol(ksp_tol); + gmres->SetMaxIter(ksp_max_it); + gmres->SetKDim(ksp_max_it); + gmres->SetPrintLevel(ksp_print); + + config::LinearSolverData::Type pc_type; #if defined(MFEM_USE_SUPERLU) pc_type = config::LinearSolverData::Type::SUPERLU; #elif defined(MFEM_USE_STRUMPACK) @@ -488,28 +531,37 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera if (pc_type == config::LinearSolverData::Type::SUPERLU) { #if defined(MFEM_USE_SUPERLU) - pc = std::make_unique(nd_fespace.GetComm(), 0, false, print); + auto slu = + std::make_unique(nd_fespace.GetComm(), 0, false, ksp_print - 1); + slu->GetSolver().SetColumnPermutation(mfem::superlu::NATURAL); + pc = std::move(slu); #endif } - if (pc_type == config::LinearSolverData::Type::STRUMPACK) + else if (pc_type == config::LinearSolverData::Type::STRUMPACK) { #if defined(MFEM_USE_STRUMPACK) - pc = std::make_unique( - nd_fespace.GetComm(), 0, strumpack::CompressionType::NONE, 0.0, 0, 0, print); + auto strumpack = std::make_unique(nd_fespace.GetComm(), 0, + strumpack::CompressionType::NONE, + 0.0, 0, 0, ksp_print - 1); + strumpack->SetReorderingStrategy(strumpack::ReorderingStrategy::NATURAL); + pc = std::move(strumpack); #endif } else // config::LinearSolverData::Type::MUMPS { #if defined(MFEM_USE_MUMPS) - pc = std::make_unique( - nd_fespace.GetComm(), mfem::MUMPSSolver::SYMMETRIC_INDEFINITE, 0, 0.0, print); + auto mumps = std::make_unique(nd_fespace.GetComm(), + mfem::MUMPSSolver::SYMMETRIC_INDEFINITE, 0, + 0.0, ksp_print - 1); + mumps->SetReorderingStrategy(mfem::MUMPSSolver::AMD); + pc = std::move(mumps); #endif } - ksp = std::make_unique( - std::make_unique(nd_fespace.GetComm()), std::move(pc)); + ksp = std::make_unique(std::move(gmres), std::move(pc)); // Define the eigenvalue solver. - config::EigenSolverData::Type type = config::EigenSolverData::Type::DEFAULT; + constexpr int print = 0; + config::EigenSolverData::Type type; #if defined(PALACE_WITH_SLEPC) type = config::EigenSolverData::Type::SLEPC; #elif defined(PALACE_WITH_ARPACK) @@ -547,6 +599,14 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera E0n = std::make_unique(&h1_fespace); nxH0r_func = std::make_unique(*E0t, *E0n, mat_op, false); nxH0i_func = std::make_unique(*E0t, *E0n, mat_op, true); + sr = std::make_unique(&nd_fespace); + si = std::make_unique(&nd_fespace); + sr->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0r_func), attr_marker); + si->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0i_func), attr_marker); + sr->UseFastAssembly(false); + si->UseFastAssembly(false); + ones = std::make_unique(&nd_fespace); + *ones = 1.0; } // namespace palace void WavePortData::Initialize(double omega) @@ -560,37 +620,24 @@ void WavePortData::Initialize(double omega) // the desired wave port mode. double theta2 = mu_eps_max * omega * omega; { - auto &Ar = dynamic_cast(A->Real()); - auto &Ai = dynamic_cast(A->Imag()); - auto &Br = dynamic_cast(B->Real()); - auto &Bi = dynamic_cast(B->Imag()); + *P *= 0.0; - Ar *= 0.0; - Ar.Add(1.0, *A1); - Ar.Add(-omega * omega, *A2r); + auto &Ar = *static_cast(&A->Real()); + auto &Br = *static_cast(&B->Real()); + Ar.Add(-omega * omega + omega0 * omega0, *A2r); + Br.Add(-omega * omega + omega0 * omega0, *A2r); + Br.Add(1.0 / theta2 - (omega0 == 0.0 ? 0.0 : 1.0 / (mu_eps_max * omega0 * omega0)), + *B3); + P->Add(1.0, Br); if (A2i) { - Ai *= 0.0; - Ai.Add(-omega * omega, *A2i); + auto &Ai = *static_cast(&A->Imag()); + auto &Bi = *static_cast(&B->Imag()); + Ai.Add(-omega * omega + omega0 * omega0, *A2i); + Bi.Add(-omega * omega + omega0 * omega0, *A2i); + P->Add(1.0, Bi); } - - Br *= 0.0; - Br.Add(1.0, Ar); - Br.Add(1.0 / theta2, *B3); - Br.Add(-omega * omega / theta2, *B4r); - - if (B4i) - { - // When B4i is nonzero, so is A2i. - Bi *= 0.0; - Bi.Add(1.0, Ai); - Bi.Add(-omega * omega / theta2, *B4i); - } - - *P *= 0.0; - P->Add(1.0, Br); - P->Add(1.0, Bi); } // Configure and solve the eigenvalue problem for the desired boundary mode. @@ -600,6 +647,9 @@ void WavePortData::Initialize(double omega) int num_conv = eigen->Solve(); MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); std::complex lambda = eigen->GetEigenvalue(mode_idx - 1); + // Mpi::Print(" ... Wave port eigensolver error = {} (bkwd), {} (abs)\n", + // eigen->GetError(mode_idx - 1, EigenvalueSolver::ErrorType::BACKWARD), + // eigen->GetError(mode_idx - 1, EigenvalueSolver::ErrorType::ABSOLUTE)); // Extract the eigenmode solution and postprocess. The extracted eigenvalue is λ = // Θ² / (Θ² - kₙ²). @@ -608,8 +658,8 @@ void WavePortData::Initialize(double omega) << "(λ = " << lambda << ")!"); kn0 = std::sqrt(theta2 - theta2 / lambda); omega0 = omega; - dynamic_cast(*nxH0r_func).SetFrequency(omega0, kn0); - dynamic_cast(*nxH0i_func).SetFrequency(omega0, kn0); + static_cast(nxH0r_func.get())->SetFrequency(omega0, kn0); + static_cast(nxH0i_func.get())->SetFrequency(omega0, kn0); // Separate the computed field out into eₜ and eₙ and and transform back to true electric // field variables: Eₜ = eₜ/kₙ and Eₙ = ieₙ. @@ -639,17 +689,14 @@ void WavePortData::Initialize(double omega) // make results for the same port consistent between frequencies/meshes. { // |E x H⋆| ⋅ n = |E ⋅ (-n x H⋆)| - sr = std::make_unique(E0t->ParFESpace()); - si = std::make_unique(E0t->ParFESpace()); - sr->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0r_func), attr_marker); - si->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0i_func), attr_marker); - sr->UseFastAssembly(false); - si->UseFastAssembly(false); + *sr = 0.0; + *si = 0.0; sr->Assemble(); si->Assemble(); + double sign = ((*sr)(*ones) > 0.0) ? 1.0 : -1.0; std::complex s0(-(*sr)(E0t->real()) - (*si)(E0t->imag()), -(*sr)(E0t->imag()) + (*si)(E0t->real())); - double scale = std::copysign(1.0 / std::sqrt(std::abs(s0)), s0.real()); + double scale = sign / std::sqrt(std::abs(s0)); E0t->real() *= scale; // This updates the n x H coefficients depending on Et, En too E0t->imag() *= scale; E0n->real() *= scale; @@ -885,8 +932,8 @@ void WavePortOperator::Initialize(double omega) if (first) { Mpi::Print(" Number of global unknowns for port {:d}:\n" - " ND: {:d}, H1: {:d}\n", - data.GlobalTrueNDSize(), data.GlobalTrueH1Size()); + " H1: {:d}, ND: {:d}\n", + idx, data.GlobalTrueH1Size(), data.GlobalTrueNDSize()); } double k0 = 1.0 / iodata.DimensionalizeValue(IoData::ValueType::LENGTH, 1.0); Mpi::Print(" Port {:d}, mode {:d}: kₙ = {:.3e}{:+.3e}i m⁻¹\n", idx, diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp index 3c8247f2d..f95891730 100644 --- a/palace/models/waveportoperator.hpp +++ b/palace/models/waveportoperator.hpp @@ -44,7 +44,7 @@ class WavePortData // Operator storage for repeated boundary mode eigenvalue problem solves. double mu_eps_max; HYPRE_BigInt attr_tdof_sizes[2]; - std::unique_ptr A1, A2r, A2i, B3, B4r, B4i, P; + std::unique_ptr A2r, A2i, B3, P; std::unique_ptr A, B; ComplexVector v0, e0, e0t, e0n; @@ -62,6 +62,7 @@ class WavePortData // postprocessing integrated quantities on the port. std::unique_ptr nxH0r_func, nxH0i_func; std::unique_ptr sr, si; + std::unique_ptr ones; public: WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op, From be95ec5b5f34de77f862ed0bc3c74b19fd321c78 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Mon, 22 May 2023 20:30:13 -0700 Subject: [PATCH 11/41] Minor cleanup to simplify custom coefficients with partial template specialization --- palace/fem/coefficient.hpp | 100 +++++++++------------------------ palace/fem/integrator.hpp | 2 +- palace/models/postoperator.cpp | 18 ++---- 3 files changed, 33 insertions(+), 87 deletions(-) diff --git a/palace/fem/coefficient.hpp b/palace/fem/coefficient.hpp index 18b53de55..c5626267f 100644 --- a/palace/fem/coefficient.hpp +++ b/palace/fem/coefficient.hpp @@ -309,7 +309,7 @@ inline double DielectricInterfaceCoefficient::Eval( // Substrate-air interface: 0.5 * t * (ϵ_SA * |E_t|² + 1 / ϵ_MS * |E_n|²) . double Vn = V * nor; - add(V, -Vn, nor, V); + mfem::Vector::add(V, -Vn, nor, V); return 0.5 * ts * (epsilon * (V * V) + (Vn * Vn) / epsilon); } @@ -454,35 +454,21 @@ inline void MaterialPropertyCoefficient +template class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctionCoefficient { private: - typedef - typename std::conditional::type - GridFunctionType; const GridFunctionType &U; const MaterialOperator &mat_op; mutable mfem::Vector V; + const mfem::DenseMatrix &GetMaterialProperty(int attr) const; + double GetLocalEnergyDensity(mfem::ElementTransformation &T, - const mfem::IntegrationPoint &ip, int attr) - { - MFEM_ABORT( - "EnergyDensityCoefficient::GetLocalEnergyDensity() is not implemented for this " - "value type!"); - return 0.0; - } + const mfem::IntegrationPoint &ip, int attr); public: EnergyDensityCoefficient(const GridFunctionType &gf, const MaterialOperator &op, @@ -522,75 +508,41 @@ class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctio } }; -template <> -inline double -EnergyDensityCoefficient:: - GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - int attr) +template +inline const mfem::DenseMatrix & +EnergyDensityCoefficient::GetMaterialProperty(int attr) const { - // Only the real part of the permittivity contributes to the energy (imaginary part - // cancels out in the inner product due to symmetry). - U.real().GetVectorValue(T, ip, V); - double res = mat_op.GetPermittivityReal(attr).InnerProduct(V, V); - U.imag().GetVectorValue(T, ip, V); - res += mat_op.GetPermittivityReal(attr).InnerProduct(V, V); - return 0.5 * res; + return mat_op.GetPermittivityReal(attr); } -template <> -inline double -EnergyDensityCoefficient:: - GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - int attr) +template +inline const mfem::DenseMatrix & +EnergyDensityCoefficient::GetMaterialProperty(int attr) const { - U.GetVectorValue(T, ip, V); - return 0.5 * mat_op.GetPermittivityReal(attr).InnerProduct(V, V); + return mat_op.GetInvPermeability(attr); } -template <> -inline double EnergyDensityCoefficient:: - GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - int attr) -{ - U.real().GetVectorValue(T, ip, V); - double res = mat_op.GetPermittivityImag(attr).InnerProduct(V, V); - U.imag().GetVectorValue(T, ip, V); - res += mat_op.GetPermittivityImag(attr).InnerProduct(V, V); - return -0.5 * res; -} - -template <> -inline double -EnergyDensityCoefficient:: - GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - int attr) -{ - U.GetVectorValue(T, ip, V); - return -0.5 * mat_op.GetPermittivityImag(attr).InnerProduct(V, V); -} - -template <> -inline double -EnergyDensityCoefficient:: - GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - int attr) +template +inline double EnergyDensityCoefficient::GetLocalEnergyDensity( + mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr) { + // Only the real part of the permittivity contributes to the energy (imaginary part + // cancels out in the inner product due to symmetry). U.real().GetVectorValue(T, ip, V); - double res = mat_op.GetInvPermeability(attr).InnerProduct(V, V); + double res = GetMaterialProperty(attr).InnerProduct(V, V); U.imag().GetVectorValue(T, ip, V); - res += mat_op.GetInvPermeability(attr).InnerProduct(V, V); + res += GetMaterialProperty(attr).InnerProduct(V, V); return 0.5 * res; } -template <> -inline double -EnergyDensityCoefficient:: - GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - int attr) +template +inline double EnergyDensityCoefficient::GetLocalEnergyDensity( + mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr) { U.GetVectorValue(T, ip, V); - return 0.5 * mat_op.GetInvPermeability(attr).InnerProduct(V, V); + return 0.5 * GetMaterialProperty(attr).InnerProduct(V, V); } // Returns the local field evaluated on a boundary element. For internal boundary elements, diff --git a/palace/fem/integrator.hpp b/palace/fem/integrator.hpp index 78b87c9ca..546c71f84 100644 --- a/palace/fem/integrator.hpp +++ b/palace/fem/integrator.hpp @@ -104,7 +104,7 @@ class BoundaryLFIntegrator : public mfem::LinearFormIntegrator, fe.CalcShape(ip, shape); double val = ip.weight * Tr.Weight() * Q.Eval(Tr, ip); - add(elvect, val, shape, elvect); + mfem::Vector::add(elvect, val, shape, elvect); } } }; diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp index 8e1cba350..8c9f87797 100644 --- a/palace/models/postoperator.cpp +++ b/palace/models/postoperator.cpp @@ -73,20 +73,16 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &spaceop, Bsi = std::make_unique(B->imag(), mat_op, local_to_shared); Jsi = std::make_unique(B->imag(), mat_op, local_to_shared); Qsi = std::make_unique(E->imag(), mat_op, local_to_shared); - Ue = std::make_unique>( + Ue = std::make_unique>( *E, mat_op, local_to_shared); - Um = std::make_unique>( + Um = std::make_unique>( *B, mat_op, local_to_shared); } else { - Ue = std::make_unique>( + Ue = std::make_unique>( E->real(), mat_op, local_to_shared); - Um = std::make_unique>( + Um = std::make_unique>( B->real(), mat_op, local_to_shared); } @@ -122,8 +118,7 @@ PostOperator::PostOperator(const IoData &iodata, LaplaceOperator &laplaceop, // etc.), since only V and E fields are supplied. Esr = std::make_unique(E->real(), mat_op, local_to_shared); Vs = std::make_unique(*V, mat_op, local_to_shared); - Ue = std::make_unique< - EnergyDensityCoefficient>( + Ue = std::make_unique>( E->real(), mat_op, local_to_shared); Qsr = std::make_unique(E->real(), mat_op, local_to_shared); @@ -150,8 +145,7 @@ PostOperator::PostOperator(const IoData &iodata, CurlCurlOperator &curlcurlop, // etc.), since only the B field is supplied. Bsr = std::make_unique(B->real(), mat_op, local_to_shared); As = std::make_unique(*A, mat_op, local_to_shared); - Um = std::make_unique< - EnergyDensityCoefficient>( + Um = std::make_unique>( B->real(), mat_op, local_to_shared); Jsr = std::make_unique(B->real(), mat_op, local_to_shared); From 25b779c3d17e419443d5e8d7b6196673f8ef2bc0 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Mon, 22 May 2023 20:50:51 -0700 Subject: [PATCH 12/41] WIP: Add CG, GMRES, and FGMRES solver implementations for both real- and complex-valued operators, as part of a new Solver class --- palace/linalg/CMakeLists.txt | 4 +- palace/linalg/complex.cpp | 963 ----------------------------------- palace/linalg/complex.hpp | 519 ------------------- palace/linalg/iterative.cpp | 634 +++++++++++++++++++++++ palace/linalg/iterative.hpp | 193 +++++++ palace/linalg/orthog.hpp | 63 +++ palace/linalg/solver.cpp | 41 ++ palace/linalg/solver.hpp | 81 +++ 8 files changed, 1015 insertions(+), 1483 deletions(-) delete mode 100644 palace/linalg/complex.cpp delete mode 100644 palace/linalg/complex.hpp create mode 100644 palace/linalg/iterative.cpp create mode 100644 palace/linalg/iterative.hpp create mode 100644 palace/linalg/orthog.hpp create mode 100644 palace/linalg/solver.cpp create mode 100644 palace/linalg/solver.hpp diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt index 195d2d2cc..8a4647129 100644 --- a/palace/linalg/CMakeLists.txt +++ b/palace/linalg/CMakeLists.txt @@ -11,16 +11,18 @@ target_sources(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/ams.cpp ${CMAKE_CURRENT_SOURCE_DIR}/arpack.cpp ${CMAKE_CURRENT_SOURCE_DIR}/chebyshev.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/complex.cpp ${CMAKE_CURRENT_SOURCE_DIR}/curlcurl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/distrelaxation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/divfree.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gmg.cpp ${CMAKE_CURRENT_SOURCE_DIR}/jacobi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ksp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/iterative.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/operator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/rap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/slepc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/solver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/strumpack.cpp ${CMAKE_CURRENT_SOURCE_DIR}/superlu.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vector.cpp diff --git a/palace/linalg/complex.cpp b/palace/linalg/complex.cpp deleted file mode 100644 index 073e7cc78..000000000 --- a/palace/linalg/complex.cpp +++ /dev/null @@ -1,963 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "complex.hpp" - -#include - -namespace palace -{ - -ComplexVector::ComplexVector(int n) : Vector(n) -{ - xr_.MakeRef(*this, 0, n / 2); - xi_.MakeRef(*this, n / 2, n / 2); -} - -ComplexVector::ComplexVector(const ComplexVector &x) : Vector(x.Size()) -{ - xr_.MakeRef(*this, 0, x.Size() / 2); - xi_.MakeRef(*this, x.Size() / 2, x.Size() / 2); - Set(x.Real(), x.Imag()); -} - -ComplexVector::ComplexVector(const Vector &xr, const Vector &xi) : Vector(2 * xr.Size()) -{ - MFEM_VERIFY(xr.Size() == xi.Size(), - "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); - xr_.MakeRef(*this, 0, xr.Size()); - xi_.MakeRef(*this, xr.Size(), xr.Size()); - Set(xr, xi); -} - -ComplexVector::ComplexVector(const std::complex *px, int n) : Vector(2 * n) -{ - xr_.MakeRef(*this, 0, n); - xi_.MakeRef(*this, n, n); - Set(px, n); -} - -void ComplexVector::SetSize(int n) -{ - Vector::SetSize(n); - xr_.MakeRef(*this, 0, n / 2); - xi_.MakeRef(*this, n / 2, n / 2); -} - -ComplexVector &ComplexVector::operator=(const ComplexVector &y) -{ - Set(y.Real(), y.Imag()); - return *this; -} - -void ComplexVector::Set(const Vector &yr, const Vector &yi) -{ - MFEM_VERIFY(yr.Size() == yi.Size() && 2 * yr.Size() == Size(), - "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); - Real() = yr; - Imag() = yi; - RestoreReal(); - RestoreImag(); -} - -void ComplexVector::Set(const std::complex *py, int n) -{ - MFEM_VERIFY(2 * n == Size(), - "Mismatch in dimension for array of std::complex in ComplexVector!"); - Vector y(reinterpret_cast(const_cast *>(py)), 2 * n); - const int N = n; - const auto *Y = y.Read(); - auto *XR = Real().Write(); - auto *XI = Imag().Write(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - XR[i] = Y[2 * i]; - XI[i] = Y[2 * i + 1]; - }); - RestoreReal(); - RestoreImag(); -} - -void ComplexVector::Get(std::complex *py, int n) const -{ - MFEM_VERIFY(2 * n == Size(), - "Mismatch in dimension for array of std::complex in ComplexVector!"); - Vector y(reinterpret_cast(py), 2 * n); - const int N = n; - const auto *XR = Real().Read(); - const auto *XI = Imag().Read(); - auto *Y = y.Write(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - Y[2 * i] = XR[i]; - Y[2 * i + 1] = XI[i]; - }); - y.HostReadWrite(); -} - -void ComplexVector::Conj() -{ - Imag() *= -1.0; - RestoreImag(); -} - -ComplexVector &ComplexVector::operator=(std::complex s) -{ - Real() = s.real(); - Imag() = s.imag(); - RestoreReal(); - RestoreImag(); - return *this; -} - -ComplexVector &ComplexVector::operator*=(std::complex s) -{ - if (s.imag() == 0.0) - { - Vector::operator*=(s.real()); - } - else - { - const int N = Size() / 2; - const double sr = s.real(); - const double si = s.imag(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const double t = si * XR[i] + sr * XI[i]; - XR[i] = sr * XR[i] - si * XI[i]; - XI[i] = t; - }); - RestoreReal(); - RestoreImag(); - } - return *this; -} - -std::complex ComplexVector::Dot(const ComplexVector &y) const -{ - return {mfem::InnerProduct(Real(), y.Real()) + mfem::InnerProduct(Imag(), y.Imag()), - mfem::InnerProduct(Imag(), y.Real()) - mfem::InnerProduct(Real(), y.Imag())}; -} - -std::complex ComplexVector::TransposeDot(const ComplexVector &y) const -{ - return {mfem::InnerProduct(Real(), y.Real()) - mfem::InnerProduct(Imag(), y.Imag()), - mfem::InnerProduct(Imag(), y.Real()) + mfem::InnerProduct(Real(), y.Imag())}; -} - -void ComplexVector::AXPY(std::complex alpha, const ComplexVector &y) -{ - const int N = Size() / 2; - const double ar = alpha.real(); - const double ai = alpha.imag(); - const auto *YR = y.Real().Read(); - const auto *YI = y.Imag().Read(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - XR[i] += ar * YR[i] - ai * YI[i]; - XI[i] += ai * YR[i] + ar * YI[i]; - }); - RestoreReal(); - RestoreImag(); -} - -void ComplexVector::AXPBY(std::complex alpha, const ComplexVector &y, - std::complex beta) -{ - const int N = Size() / 2; - const double ar = alpha.real(); - const double ai = alpha.imag(); - const auto *YR = y.Real().Read(); - const auto *YI = y.Imag().Read(); - if (beta != 0.0) - { - const double br = beta.real(); - const double bi = beta.imag(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const double t = bi * XR[i] + br * XI[i]; - XR[i] = ar * YR[i] - ai * YI[i] + br * XR[i] - bi * XI[i]; - XI[i] = ai * YR[i] + ar * YI[i] + t; - }); - } - else - { - auto *XR = Real().Write(); - auto *XI = Imag().Write(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - XR[i] = ar * YR[i] - ai * YI[i]; - XI[i] = ai * YR[i] + ar * YI[i]; - }); - } - RestoreReal(); - RestoreImag(); -} - -void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &y, - std::complex beta, const ComplexVector &z, - std::complex gamma) -{ - const int N = Size() / 2; - const double ar = alpha.real(); - const double ai = alpha.imag(); - const double br = beta.real(); - const double bi = beta.imag(); - const auto *YR = y.Real().Read(); - const auto *YI = y.Imag().Read(); - const auto *ZR = z.Real().Read(); - const auto *ZI = z.Imag().Read(); - if (gamma != 0.0) - { - const double gr = gamma.real(); - const double gi = gamma.imag(); - auto *XR = Real().ReadWrite(); - auto *XI = Imag().ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const double t = gi * XR[i] + gr * XI[i]; - XR[i] = ar * YR[i] - ai * YI[i] + br * ZR[i] - bi * ZI[i] + gr * XR[i] - - gi * XI[i]; - XI[i] = ai * YR[i] + ar * YI[i] + bi * ZR[i] + br * ZI[i] + t; - }); - } - else - { - auto *XR = Real().Write(); - auto *XI = Imag().Write(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - XR[i] = ar * YR[i] - ai * YI[i] + br * ZR[i] - bi * ZI[i]; - XI[i] = ai * YR[i] + ar * YI[i] + bi * ZR[i] + br * ZI[i]; - }); - } - RestoreReal(); - RestoreImag(); -} - -void ComplexOperator::Mult(const Vector &x, Vector &y) const -{ - MFEM_ASSERT(x.Size() == width && y.Size() == height, - "Incompatible dimensions for ComplexOperator::Mult!"); - Vector xr, xi, yr, yi; - xr.MakeRef(const_cast(x), 0, width / 2); - xi.MakeRef(const_cast(x), width / 2, width / 2); - yr.MakeRef(y, 0, height / 2); - yi.MakeRef(y, height / 2, height / 2); - Mult(xr, xi, yr, yi); - yr.SyncAliasMemory(y); - yi.SyncAliasMemory(y); -} - -void ComplexOperator::MultTranspose(const Vector &x, Vector &y) const -{ - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexOperator::MultTranspose!"); - Vector xr, xi, yr, yi; - xr.MakeRef(const_cast(x), 0, height / 2); - xi.MakeRef(const_cast(x), height / 2, height / 2); - yr.MakeRef(y, 0, width / 2); - yi.MakeRef(y, width / 2, width / 2); - MultTranspose(xr, xi, yr, yi); - yr.SyncAliasMemory(y); - yi.SyncAliasMemory(y); -} - -void ComplexOperator::MultHermitianTranspose(const Vector &x, Vector &y) const -{ - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexOperator::MultHermitianTranspose!"); - Vector xr, xi, yr, yi; - xr.MakeRef(const_cast(x), 0, height / 2); - xi.MakeRef(const_cast(x), height / 2, height / 2); - yr.MakeRef(y, 0, width / 2); - yi.MakeRef(y, width / 2, width / 2); - MultHermitianTranspose(xr, xi, yr, yi); - yr.SyncAliasMemory(y); - yi.SyncAliasMemory(y); -} - -void ComplexOperator::AddMult(const Vector &x, Vector &y, const double a) const -{ - MFEM_ASSERT(x.Size() == width && y.Size() == height, - "Incompatible dimensions for ComplexOperator::AddMult!"); - Vector xr, xi, yr, yi; - xr.MakeRef(const_cast(x), 0, width / 2); - xi.MakeRef(const_cast(x), width / 2, width / 2); - yr.MakeRef(y, 0, height / 2); - yi.MakeRef(y, height / 2, height / 2); - AddMult(xr, xi, yr, yi, a); - yr.SyncAliasMemory(y); - yi.SyncAliasMemory(y); -} - -void ComplexOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const -{ - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexOperator::AddMultTranspose!"); - Vector xr, xi, yr, yi; - xr.MakeRef(const_cast(x), 0, height / 2); - xi.MakeRef(const_cast(x), height / 2, height / 2); - yr.MakeRef(y, 0, width / 2); - yi.MakeRef(y, width / 2, width / 2); - AddMultTranspose(xr, xi, yr, yi, a); - yr.SyncAliasMemory(y); - yi.SyncAliasMemory(y); -} - -void ComplexOperator::AddMultHermitianTranspose(const Vector &x, Vector &y, - const double a) const -{ - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexOperator::AddMultHermitianTranspose!"); - Vector xr, xi, yr, yi; - xr.MakeRef(const_cast(x), 0, height / 2); - xi.MakeRef(const_cast(x), height / 2, height / 2); - yr.MakeRef(y, 0, width / 2); - yi.MakeRef(y, width / 2, width / 2); - AddMultHermitianTranspose(xr, xi, yr, yi, a); - yr.SyncAliasMemory(y); - yi.SyncAliasMemory(y); -} - -ComplexParOperator::ComplexParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict) - : ComplexOperator(2 * test_fespace.GetTrueVSize(), 2 * trial_fespace.GetTrueVSize()), - A_(std::move(A)), trial_fespace_(trial_fespace), test_fespace_(test_fespace), - use_R_(test_restrict), trial_dbc_tdof_list_(nullptr), test_dbc_tdof_list_(nullptr), - diag_policy_(DiagonalPolicy::DIAG_ONE) -{ - MFEM_VERIFY(A_, "Cannot construct ComplexParOperator from an empty matrix!"); - lxr_.SetSize(A_->Width() / 2); - lxi_.SetSize(A_->Width() / 2); - lyr_.SetSize(A_->Height() / 2); - lyi_.SetSize(A_->Height() / 2); - txr_.SetSize(width / 2); - txi_.SetSize(width / 2); - if (height != width) - { - tyr_.SetSize(height / 2); - tyi_.SetSize(height / 2); - } - else - { - tyr_.MakeRef(txr_, 0, height / 2); - tyi_.MakeRef(txi_, 0, height / 2); - } -} - -void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a, bool zero_real, - bool zero_imag) const -{ - MFEM_ASSERT(xr.Size() == width / 2 && xi.Size() == width / 2 && yr.Size() == height / 2 && - yi.Size() == height / 2, - "Incompatible dimensions for ComplexParOperator::AddMult!"); - if (trial_dbc_tdof_list_) - { - txr_ = xr; - txi_ = xi; - txr_.SetSubVector(*trial_dbc_tdof_list_, 0.0); - txi_.SetSubVector(*trial_dbc_tdof_list_, 0.0); - } - if (!zero_real) - { - trial_fespace_.GetProlongationMatrix()->Mult(trial_dbc_tdof_list_ ? txr_ : xr, lxr_); - } - if (!zero_imag) - { - trial_fespace_.GetProlongationMatrix()->Mult(trial_dbc_tdof_list_ ? txi_ : xi, lxi_); - } - - // Apply the operator on the L-vector. - lyr_ = 0.0; - lyi_ = 0.0; - A_->AddMult(lxr_, lxi_, lyr_, lyi_, a, zero_real, zero_imag); - - if (test_dbc_tdof_list_) - { - if (!use_R_) - { - test_fespace_.GetProlongationMatrix()->MultTranspose(lyr_, tyr_); - test_fespace_.GetProlongationMatrix()->MultTranspose(lyi_, tyi_); - } - else - { - test_fespace_.GetRestrictionMatrix()->Mult(lyr_, tyr_); - test_fespace_.GetRestrictionMatrix()->Mult(lyi_, tyi_); - } - { - if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) - { - const int N = test_dbc_tdof_list_->Size(); - const auto *idx = test_dbc_tdof_list_->Read(); - const auto *XR = xr.Read(); - const auto *XI = xi.Read(); - auto *TYR = tyr_.ReadWrite(); - auto *TYI = tyi_.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TYR[id] = XR[id]; - TYI[id] = XI[id]; - }); - } - else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) - { - tyr_.SetSubVector(*test_dbc_tdof_list_, 0.0); - tyi_.SetSubVector(*test_dbc_tdof_list_, 0.0); - } - else - { - MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); - } - } - yr += tyr_; - yi += tyi_; - } - else - { - if (!use_R_) - { - test_fespace_.GetProlongationMatrix()->MultTranspose(lyr_, yr); - test_fespace_.GetProlongationMatrix()->MultTranspose(lyi_, yi); - } - else - { - test_fespace_.GetRestrictionMatrix()->Mult(lyr_, yr); - test_fespace_.GetRestrictionMatrix()->Mult(lyi_, yi); - } - } -} - -void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, const std::complex a, - bool zero_real, bool zero_imag) const -{ - MFEM_ASSERT(xr.Size() == height / 2 && xi.Size() == height / 2 && - yr.Size() == width / 2 && yi.Size() == width / 2, - "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); - if (test_dbc_tdof_list_) - { - tyr_ = xr; - tyi_ = xi; - tyr_.SetSubVector(*test_dbc_tdof_list_, 0.0); - tyi_.SetSubVector(*test_dbc_tdof_list_, 0.0); - } - if (!use_R_) - { - if (!zero_real) - { - test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? tyr_ : xr, lyr_); - } - if (!zero_imag) - { - test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? tyi_ : xi, lyi_); - } - } - else - { - if (!zero_real) - { - test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? tyr_ : xr, - lyr_); - } - if (!zero_imag) - { - test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? tyi_ : xi, - lyi_); - } - } - - // Apply the operator on the L-vector. - lxr_ = 0.0; - lxi_ = 0.0; - A_->AddMultTranspose(lyr_, lyi_, lxr_, lxi_, a, zero_real, zero_imag); - - if (trial_dbc_tdof_list_) - { - trial_fespace_.GetProlongationMatrix()->MultTranspose(lxr_, txr_); - trial_fespace_.GetProlongationMatrix()->MultTranspose(lxi_, txi_); - { - if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) - { - const int N = trial_dbc_tdof_list_->Size(); - const auto *idx = trial_dbc_tdof_list_->Read(); - const auto *XR = xr.Read(); - const auto *XI = xi.Read(); - auto *TXR = txr_.ReadWrite(); - auto *TXI = txi_.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TXR[id] = XR[id]; - TXI[id] = XI[id]; - }); - } - else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) - { - txr_.SetSubVector(*trial_dbc_tdof_list_, 0.0); - txi_.SetSubVector(*trial_dbc_tdof_list_, 0.0); - } - else - { - MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); - } - } - yr += txr_; - yi += txi_; - } - else - { - trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lxr_, yr); - trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lxi_, yi); - } -} - -void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, - const std::complex a, - bool zero_real, bool zero_imag) const -{ - MFEM_ASSERT(xr.Size() == height / 2 && xi.Size() == height / 2 && - yr.Size() == width / 2 && yi.Size() == width / 2, - "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); - if (test_dbc_tdof_list_) - { - tyr_ = xr; - tyi_ = xi; - tyr_.SetSubVector(*test_dbc_tdof_list_, 0.0); - tyi_.SetSubVector(*test_dbc_tdof_list_, 0.0); - } - if (!use_R_) - { - if (!zero_real) - { - test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? tyr_ : xr, lyr_); - } - if (!zero_imag) - { - test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? tyi_ : xi, lyi_); - } - } - else - { - if (!zero_real) - { - test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? tyr_ : xr, - lyr_); - } - if (!zero_imag) - { - test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? tyi_ : xi, - lyi_); - } - } - - // Apply the operator on the L-vector. - lxr_ = 0.0; - lxi_ = 0.0; - A_->AddMultHermitianTranspose(lyr_, lyi_, lxr_, lxi_, a, zero_real, zero_imag); - - if (trial_dbc_tdof_list_) - { - trial_fespace_.GetProlongationMatrix()->MultTranspose(lxr_, txr_); - trial_fespace_.GetProlongationMatrix()->MultTranspose(lxi_, txi_); - { - if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) - { - const int N = trial_dbc_tdof_list_->Size(); - const auto *idx = trial_dbc_tdof_list_->Read(); - const auto *XR = xr.Read(); - const auto *XI = xi.Read(); - auto *TXR = txr_.ReadWrite(); - auto *TXI = txi_.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TXR[id] = XR[id]; - TXI[id] = XI[id]; - }); - } - else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) - { - txr_.SetSubVector(*trial_dbc_tdof_list_, 0.0); - txi_.SetSubVector(*trial_dbc_tdof_list_, 0.0); - } - else - { - MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); - } - } - yr += txr_; - yi += txi_; - } - else - { - trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lxr_, yr); - trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lxi_, yi); - } -} - -ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&Ar, - std::unique_ptr &&Ai) - : ComplexOperator(2 * (Ar ? Ar->Height() : Ai->Height()), - 2 * (Ar ? Ar->Width() : Ai->Width())), - Ar_(std::move(Ar)), Ai_(std::move(Ai)) -{ - MFEM_VERIFY(Ar_ || Ai_, "Cannot construct ComplexWrapperOperator from an empty matrix!"); - MFEM_VERIFY((!Ar_ || !Ai_) || - (Ar_->Height() == Ai_->Height() && Ar_->Width() == Ai_->Width()), - "Mismatch in dimension of real and imaginary matrix parts!"); - txr_.SetSize(width / 2); - txi_.SetSize(width / 2); - if (height != width) - { - tyr_.SetSize(height / 2); - tyi_.SetSize(height / 2); - } - else - { - tyr_.MakeRef(txr_, 0, height / 2); - tyi_.MakeRef(txi_, 0, height / 2); - } -} - -void ComplexWrapperOperator::Mult(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real, bool zero_imag) const -{ - if (Ar_) - { - if (!zero_real) - { - Ar_->Mult(xr, yr); - } - if (!zero_imag) - { - Ar_->Mult(xi, yi); - } - } - else - { - yr = 0.0; - yi = 0.0; - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMult(xi, yr, -1.0); - } - if (!zero_real) - { - Ai_->AddMult(xr, yi, 1.0); - } - } -} - -void ComplexWrapperOperator::MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real, bool zero_imag) const -{ - if (Ar_) - { - if (!zero_real) - { - Ar_->MultTranspose(xr, yr); - } - if (!zero_imag) - { - Ar_->MultTranspose(xi, yi); - } - } - else - { - yr = 0.0; - yi = 0.0; - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMultTranspose(xi, yr, -1.0); - } - if (!zero_real) - { - Ai_->AddMultTranspose(xr, yi, 1.0); - } - } -} - -void ComplexWrapperOperator::MultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, bool zero_real, - bool zero_imag) const -{ - if (Ar_) - { - if (!zero_real) - { - Ar_->MultTranspose(xr, yr); - } - if (!zero_imag) - { - Ar_->MultTranspose(xi, yi); - } - } - else - { - yr = 0.0; - yi = 0.0; - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMultTranspose(xi, yr, 1.0); - } - if (!zero_real) - { - Ai_->AddMultTranspose(xr, yi, -1.0); - } - } -} - -void ComplexWrapperOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, const std::complex a, - bool zero_real, bool zero_imag) const -{ - if (a.real() != 0.0 && a.imag() != 0.0) - { - Mult(xr, xi, tyr_, tyi_, zero_real, zero_imag); - const int N = height / 2; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TYR = tyr_.Read(); - const auto *TYI = tyi_.Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TYR[i] - ai * TYI[i]; - YI[i] += ai * TYR[i] + ar * TYI[i]; - }); - } - else if (a.real() != 0.0) - { - if (Ar_) - { - if (!zero_real) - { - Ar_->AddMult(xr, yr, a.real()); - } - if (!zero_imag) - { - Ar_->AddMult(xi, yi, a.real()); - } - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMult(xi, yr, -a.real()); - } - if (!zero_real) - { - Ai_->AddMult(xr, yi, a.real()); - } - } - } - else if (a.imag() != 0.0) - { - if (Ar_) - { - if (!zero_real) - { - Ar_->AddMult(xr, yi, a.imag()); - } - if (!zero_imag) - { - Ar_->AddMult(xi, yr, -a.imag()); - } - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMult(xi, yi, -a.imag()); - } - if (!zero_real) - { - Ai_->AddMult(xr, yr, -a.imag()); - } - } - } -} - -void ComplexWrapperOperator::AddMultTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, - const std::complex a, bool zero_real, - bool zero_imag) const -{ - if (a.real() != 0.0 && a.imag() != 0.0) - { - MultTranspose(xr, xi, txr_, txi_, zero_real, zero_imag); - const int N = width; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TXR = txr_.Read(); - const auto *TXI = txi_.Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TXR[i] - ai * TXI[i]; - YI[i] += ai * TXR[i] + ar * TXI[i]; - }); - } - else if (a.real() != 0.0) - { - if (Ar_) - { - if (!zero_real) - { - Ar_->AddMultTranspose(xr, yr, a.real()); - } - if (!zero_imag) - { - Ar_->AddMultTranspose(xi, yi, a.real()); - } - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMultTranspose(xi, yr, -a.real()); - } - if (!zero_real) - { - Ai_->AddMultTranspose(xr, yi, a.real()); - } - } - } - else if (a.imag() != 0.0) - { - if (Ar_) - { - if (!zero_real) - { - Ar_->AddMultTranspose(xr, yi, a.imag()); - } - if (!zero_imag) - { - Ar_->AddMultTranspose(xi, yr, -a.imag()); - } - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMultTranspose(xi, yi, -a.imag()); - } - if (!zero_real) - { - Ai_->AddMultTranspose(xr, yr, -a.imag()); - } - } - } -} - -void ComplexWrapperOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, - const std::complex a, - bool zero_real, bool zero_imag) const -{ - if (a.real() != 0.0 && a.imag() != 0.0) - { - MultHermitianTranspose(xr, xi, txr_, txi_, zero_real, zero_imag); - const int N = width; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TXR = txr_.Read(); - const auto *TXI = txi_.Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TXR[i] - ai * TXI[i]; - YI[i] += ai * TXR[i] + ar * TXI[i]; - }); - } - else if (a.real() != 0.0) - { - if (Ar_) - { - if (!zero_real) - { - Ar_->AddMultTranspose(xr, yr, a.real()); - } - if (!zero_imag) - { - Ar_->AddMultTranspose(xi, yi, a.real()); - } - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMultTranspose(xi, yr, a.real()); - } - if (!zero_real) - { - Ai_->AddMultTranspose(xr, yi, -a.real()); - } - } - } - else if (a.imag() != 0.0) - { - if (Ar_) - { - if (!zero_real) - { - Ar_->AddMultTranspose(xr, yi, a.imag()); - } - if (!zero_imag) - { - Ar_->AddMultTranspose(xi, yr, -a.imag()); - } - } - if (Ai_) - { - if (!zero_imag) - { - Ai_->AddMultTranspose(xi, yi, a.imag()); - } - if (!zero_real) - { - Ai_->AddMultTranspose(xr, yr, a.imag()); - } - } - } -} - -} // namespace palace diff --git a/palace/linalg/complex.hpp b/palace/linalg/complex.hpp deleted file mode 100644 index 386bb651a..000000000 --- a/palace/linalg/complex.hpp +++ /dev/null @@ -1,519 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LINALG_COMPLEX_HPP -#define PALACE_LINALG_COMPLEX_HPP - -#include -#include -#include -#include -#include -#include "linalg/operator.hpp" -#include "linalg/vector.hpp" - -namespace palace -{ - -// -// Vector and operator classes for complex-valued linear algebra. -// - -// A complex-valued vector represented as two real vectors, one for each component. The -// value returned by the vector size is twice the actual complex-valued size. -class ComplexVector : public Vector -{ -private: - Vector xr_, xi_; - -public: - // Create a vector with the given size. The provided size should be the real-valued size, - // twice the actual complex-valued size, in order to agree with ComplexOperator::Height(). - ComplexVector(int n = 0); - - // Copy constructor. - ComplexVector(const ComplexVector &x); - - // Copy constructor from separately provided real and imaginary parts. - ComplexVector(const Vector &xr, const Vector &xi); - - // Copy constructor from an array of complex values. The size provided should be the - // length of the array x, which is half the resulting real-valued vector size. - ComplexVector(const std::complex *px, int n); - - // Set the size of the vector. The provided size should be the real-valued size, twice the - // actual complex-valued size, in order to agree with ComplexOperator::Height(). See the - // notes for Vector::SetSize for behavior in the cases where n is less than or greater - // than Size() or Capacity(). - void SetSize(int n); - - // Get const access to the real and imaginary vector parts. Assumes that these are - // synchronized following a Sync() call. - const Vector &Real() const { return xr_; } - const Vector &Imag() const { return xi_; } - - // Get access to the real and imaginary vector parts with required synchronization with - // the underlying storage. - Vector &Real() - { - xr_.SyncMemory(*this); - return xr_; - } - Vector &Imag() - { - xi_.SyncMemory(*this); - return xi_; - } - void RestoreReal() { xr_.SyncAliasMemory(*this); } - void RestoreImag() { xi_.SyncAliasMemory(*this); } - - // Copy assignment operator. This should probably not be used to modify the size of the - // vector. - ComplexVector &operator=(const ComplexVector &y); - - // Copy assignment from separately provided real and imaginary parts. - void Set(const Vector &yr, const Vector &yi); - - // Copy assignment from an array of complex values. The size provided should be the length - // of the array x, which is half the real-valued vector size. - void Set(const std::complex *py, int n); - - // Copy the vector into an array of complex values. The size provided should be the length - // of the array y, which is half the real-valued vector size. - void Get(std::complex *py, int n) const; - - // Replace entries with complex conjugate. - void Conj(); - - // Set all entries equal to s. - ComplexVector &operator=(std::complex s); - - // Scale all entries by s. - ComplexVector &operator*=(std::complex s); - - // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. - std::complex Dot(const ComplexVector &y) const; - std::complex TransposeDot(const ComplexVector &y) const; - - // In-place addition x += alpha * y. - void AXPY(std::complex alpha, const ComplexVector &y); - - // In-place addition x = alpha * y + beta * x. - void AXPBY(std::complex alpha, const ComplexVector &y, std::complex beta); - - // In-place addition x = alpha * y + beta * z + gamma * x. - void AXPBYPCZ(std::complex alpha, const ComplexVector &y, - std::complex beta, const ComplexVector &z, - std::complex gamma); - - // Update the memory location of the real and imaginary parts to match the underlying - // storage, or vice versa. - void Sync() - { - xr_.SyncMemory(*this); - xi_.SyncMemory(*this); - } - void SyncAlias() - { - xr_.SyncAliasMemory(*this); - xi_.SyncAliasMemory(*this); - } -}; - -// Abstract base class for complex-valued operators. The values returned by the operator -// height and width are twice the actual complex-valued size. -class ComplexOperator : public Operator -{ -protected: - // The sizes provided by derived class constructors should already be twice the actual - // complex-valued size. - ComplexOperator(int s) : Operator(s) {} - ComplexOperator(int h, int w) : Operator(h, w) {} - -public: - // Test whether or not the operator is purely real or imaginary. - virtual bool IsReal() const = 0; - virtual bool IsImag() const = 0; - - // Get access to the real and imaginary operator parts. - virtual const Operator &Real() const - { - MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); - return *this; - } - virtual Operator &Real() - { - MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); - return *this; - } - virtual const Operator &Imag() const - { - MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); - return *this; - } - virtual Operator &Imag() - { - MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); - return *this; - } - - void Mult(const Vector &x, Vector &y) const override; - - void Mult(const ComplexVector &x, ComplexVector &y) const - { - Mult(x.Real(), x.Imag(), y.Real(), y.Imag()); - } - - virtual void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const = 0; - - void MultTranspose(const Vector &x, Vector &y) const override; - - void MultTranspose(const ComplexVector &x, ComplexVector &y) const - { - MultTranspose(x.Real(), x.Imag(), y.Real(), y.Imag()); - } - - virtual void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const = 0; - - void MultHermitianTranspose(const Vector &x, Vector &y) const; - - void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const - { - MultHermitianTranspose(x.Real(), x.Imag(), y.Real(), y.Imag()); - } - - virtual void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real = false, - bool zero_imag = false) const = 0; - - void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override; - - void AddMult(const ComplexVector &x, ComplexVector &y, - const std::complex a = 1.0) const - { - AddMult(x.Real(), x.Imag(), y.Real(), y.Imag(), a); - } - - virtual void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const = 0; - - void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override; - - void AddMultTranspose(const ComplexVector &x, ComplexVector &y, - const std::complex a = 1.0) const - { - AddMultTranspose(x.Real(), x.Imag(), y.Real(), y.Imag(), a); - } - - virtual void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const = 0; - - void AddMultHermitianTranspose(const Vector &x, Vector &y, const double a = 1.0) const; - - void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, - const std::complex a = 1.0) const - { - AddMultHermitianTranspose(x.Real(), x.Imag(), y.Real(), y.Imag(), a); - } - - virtual void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, const std::complex a = 1.0, - bool zero_real = false, - bool zero_imag = false) const = 0; -}; - -// A parallel complex-valued operator represented by RᵀAP for complex-valued A, constructed -// through the actions of Rᵀ, A, and P with possible eliminated essential BC. -class ComplexParOperator : public ComplexOperator -{ -private: - std::unique_ptr A_; - const mfem::ParFiniteElementSpace &trial_fespace_, &test_fespace_; - const bool use_R_; - - // Lists of constrained essential boundary true dofs for elimination. - mutable const mfem::Array *trial_dbc_tdof_list_, *test_dbc_tdof_list_; - - // Diagonal policy for constrained true dofs. - DiagonalPolicy diag_policy_; - - // Temporary storage for operator application. - mutable Vector lxr_, lxi_, lyr_, lyi_, txr_, txi_, tyr_, tyi_; - -public: - // Construct the complex-valued parallel operator, inheriting ownership of the local - // operator. - ComplexParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ComplexParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &fespace) - : ComplexParOperator(std::move(A), fespace, fespace, false) - { - } - - // Get access to the underlying local (L-vector) operator. - const ComplexOperator &LocalOperator() const - { - MFEM_ASSERT(A_, "No local matrix available for ComplexParOperator::LocalOperator!"); - return *A_; - } - - // Set essential boundary condition true dofs for square operators. - void SetEssentialTrueDofs(const mfem::Array &dbc_tdof_list, - DiagonalPolicy diag_policy) - { - MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " - "for rectangular ComplexParOperator!"); - trial_dbc_tdof_list_ = &dbc_tdof_list; - test_dbc_tdof_list_ = &dbc_tdof_list; - diag_policy_ = diag_policy; - } - - // Set essential boundary condition true dofs for rectangular operators. - void SetEssentialTrueDofs(const mfem::Array *trial_dbc_tdof_list, - const mfem::Array *test_dbc_tdof_list, - DiagonalPolicy diag_policy) - { - MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ZERO, - "Essential boundary condition true dof elimination for rectangular " - "ComplexParOperator only supports DiagonalPolicy::DIAG_ZERO!"); - trial_dbc_tdof_list_ = trial_dbc_tdof_list; - test_dbc_tdof_list_ = test_dbc_tdof_list; - diag_policy_ = diag_policy; - } - - // Get the essential boundary condition true dofs associated with the operator. May be - // nullptr. - const mfem::Array *GetEssentialTrueDofs() const - { - MFEM_VERIFY(trial_dbc_tdof_list_ == test_dbc_tdof_list_ && height == width, - "GetEssentialTrueDofs should only be used for square ComplexParOperator!"); - return trial_dbc_tdof_list_; - } - - // Get access to the finite element spaces associated with the operator. - const mfem::ParFiniteElementSpace &GetFESpace() const - { - MFEM_VERIFY(&trial_fespace_ == &test_fespace_ && height == width, - "GetFESpace should only be used for square ParOperator!"); - return trial_fespace_; - } - - // Get the associated MPI communicator. - MPI_Comm GetComm() const { return trial_fespace_.GetComm(); } - - bool IsReal() const override { return A_->IsReal(); } - bool IsImag() const override { return A_->IsImag(); } - - using ComplexOperator::AddMult; - using ComplexOperator::AddMultHermitianTranspose; - using ComplexOperator::AddMultTranspose; - using ComplexOperator::Mult; - using ComplexOperator::MultHermitianTranspose; - using ComplexOperator::MultTranspose; - - void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override - { - yr = 0.0; - yi = 0.0; - AddMult(xr, xi, yr, yi, 1.0, zero_real, zero_imag); - } - - void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override - { - yr = 0.0; - yi = 0.0; - AddMultTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); - } - - void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override - { - yr = 0.0; - yi = 0.0; - AddMultHermitianTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); - } - - void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; - - void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; - - void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; -}; - -// A complex-valued operator represented using a block 2x2 equivalent-real formulation. -class ComplexWrapperOperator : public ComplexOperator -{ -private: - std::unique_ptr Ar_, Ai_; - - // Temporary storage for operator application. - mutable Vector txr_, txi_, tyr_, tyi_; - -public: - // Construct a complex operator which inherits ownershipt of the input real and imaginary - // parts. - ComplexWrapperOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai); - - bool IsReal() const override { return Ai_ == nullptr; } - bool IsImag() const override { return Ar_ == nullptr; } - - const Operator &Real() const override { return *Ar_; } - Operator &Real() override { return *Ar_; } - const Operator &Imag() const override { return *Ai_; } - Operator &Imag() override { return *Ai_; } - - using ComplexOperator::AddMult; - using ComplexOperator::AddMultHermitianTranspose; - using ComplexOperator::AddMultTranspose; - using ComplexOperator::Mult; - using ComplexOperator::MultHermitianTranspose; - using ComplexOperator::MultTranspose; - - void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override; - - void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override; - - void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, - bool zero_imag = false) const override; - - void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; - - void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; - - void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; -}; - -// Wrap a sequence of operators of the same dimensions and optional coefficients. -class ComplexSumOperator : public ComplexOperator -{ -private: - std::vector>> ops_; - -public: - ComplexSumOperator(int s) : ComplexOperator(s) {} - ComplexSumOperator(int h, int w) : ComplexOperator(h, w) {} - ComplexSumOperator(const ComplexOperator &op, std::complex c = 1.0) - : ComplexOperator(op.Height(), op.Width()) - { - AddOperator(op, c); - } - - void AddOperator(const ComplexOperator &op, std::complex c = 1.0) - { - MFEM_VERIFY(op.Height() == height && op.Width() == width, - "Invalid Operator dimensions for ComplexSumOperator!"); - ops_.emplace_back(&op, c); - } - - bool IsReal() const override - { - for (const auto &[op, c] : ops_) - { - if (!op->IsReal()) - { - return false; - } - } - return true; - } - - bool IsImag() const override - { - for (const auto &[op, c] : ops_) - { - if (!op->IsImag()) - { - return false; - } - } - return true; - } - - using ComplexOperator::AddMult; - using ComplexOperator::AddMultHermitianTranspose; - using ComplexOperator::AddMultTranspose; - using ComplexOperator::Mult; - using ComplexOperator::MultHermitianTranspose; - using ComplexOperator::MultTranspose; - - void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override - { - yr = 0.0; - yi = 0.0; - AddMult(xr, xi, yr, yi, 1.0, zero_real, zero_imag); - } - - void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override - { - yr = 0.0; - yi = 0.0; - AddMultTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); - } - - void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override - { - yr = 0.0; - yi = 0.0; - AddMultHermitianTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); - } - - void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override - { - for (const auto &[op, c] : ops_) - { - op->AddMult(xr, xi, yr, yi, a * c, zero_real, zero_imag); - } - } - - void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override - { - for (const auto &[op, c] : ops_) - { - op->AddMultTranspose(xr, xi, yr, yi, a * c, zero_real, zero_imag); - } - } - - void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override - { - for (const auto &[op, c] : ops_) - { - op->AddMultTranspose(xr, xi, yr, yi, a * c, zero_real, zero_imag); - } - } -}; - -} // namespace palace - -#endif // PALACE_LINALG_COMPLEX_HPP diff --git a/palace/linalg/iterative.cpp b/palace/linalg/iterative.cpp new file mode 100644 index 000000000..3fbbe0b1b --- /dev/null +++ b/palace/linalg/iterative.cpp @@ -0,0 +1,634 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "iterative.hpp" + +#include +#include +#include +#include +#include "linalg/orthog.hpp" +#include "utils/communication.hpp" + +namespace palace +{ + +namespace +{ + +template +inline void CheckDot(T dot, std::string msg) +{ + MFEM_ASSERT(std::isfinite(dot) && dot >= 0.0, msg); +} + +template +inline void CheckDot(std::complex dot, std::string msg) +{ + MFEM_ASSERT(std::isfinite(dot.real()) && std::is_finite(dot.imag()) && dot.real() >= 0.0, + msg); +} + +template +inline constexpr T SafeMin() +{ + // Originally part of LAPACK. + // LAPACK is free software: you can redistribute it and/or modify it under + // the terms of the BSD 3-Clause license. + // + // Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved. + // Copyright (c) 2017-2021, University of Tennessee. All rights reserved. + // + // Original author: Weslley S Pereira, University of Colorado Denver, USA + constexpr int fradix = std::numeric_limits::radix; + constexpr int expm = std::numeric_limits::min_exponent; + constexpr int expM = std::numeric_limits::max_exponent; + return std::max(std::pow(fradix, T(expm - 1)), std::pow(fradix, T(1 - expM))); +} + +template +inline constexpr T SafeMax() +{ + // Originally part of LAPACK. + // LAPACK is free software: you can redistribute it and/or modify it under + // the terms of the BSD 3-Clause license. + // + // Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved. + // Copyright (c) 2017-2021, University of Tennessee. All rights reserved. + // + // Original author: Weslley S Pereira, University of Colorado Denver, USA + constexpr int fradix = std::numeric_limits::radix; + constexpr int expm = std::numeric_limits::min_exponent; + constexpr int expM = std::numeric_limits::max_exponent; + return std::min(std::pow(fradix, T(1 - expm)), std::pow(fradix, T(expM - 1))); +} + +template +inline void GeneratePlaneRotation(const T dx, const T dy, T &cs, T &sn) +{ + // See LAPACK's s/dlartg. + if (dy == 0.0) + { + cs = 1.0; + sn = 0.0; + return; + } + if (dx == 0.0) + { + cs = 0.0; + sn = std::copysign(1.0, dy); + return; + } + const T root_min = std::sqrt(SafeMin()); + const T root_max = std::sqrt(SafeMax() / 2); + T dx1 = std::abs(dx); + T dy1 = std::abs(dy); + if (dx1 > root_min && dx1 < root_max && dy1 > root_min && dy1 < root_max) + { + T d = std::sqrt(dx * dx + dy * dy); + cs = dx1 / d; + sn = dy / std::copysign(d, dx); + } + else + { + T u = std::min(SafeMax(), std::max(SafeMin(), std::max(dx1, dy1))); + T dxs = dx / u; + T dys = dy / u; + T d = std::sqrt(dxs * dxs + dys * dys); + cs = std::abs(dxs) / d; + sn = dys / std::copysign(d, dx); + } +} + +template +inline void GeneratePlaneRotation(const std::complex dx, const std::complex dy, T &cs, + std::complex &sn) +{ + // Generates a plane rotation so that: + // [ cs sn ] . [ dx ] = [ r ] + // [ -conj(sn) cs ] [ dy ] [ 0 ] + // where cs is real and cs² + |sn|² = 1. See LAPACK's c/zlartg. + if (dy == 0.0) + { + cs = 1.0; + sn = 0.0; + return; + } + if (dx == 0.0) + { + cs = 0.0; + if (dy.real() == 0.0) + { + sn = std::conj(dy) / std::abs(dy.imag()); + } + else if (dy.imag() == 0.0) + { + sn = std::conj(dy) / std::abs(dy.real()); + } + else + { + const T root_min = std::sqrt(SafeMin()); + const T root_max = std::sqrt(SafeMax() / 2); + T dy1 = std::max(std::abs(dy.real()), std::abs(dy.imag())); + if (dy1 > root_min && dy1 < root_max) + { + sn = std::conj(dy) / std::sqrt(dy.real() * dy.real() + dy.imag() * dy.imag()); + } + else + { + T u = std::min(SafeMax(), std::max(SafeMin(), dy1)); + std::complex dys = dy / u; + sn = std::conj(dys) / std::sqrt(dys.real() * dys.real() + dys.imag() * dys.imag()); + } + } + return; + } + const T root_min = std::sqrt(SafeMin()); + const T root_max = std::sqrt(SafeMax() / 4); + T dx1 = std::max(std::abs(dx.real()), std::abs(dx.imag())); + T dy1 = std::max(std::abs(dy.real()), std::abs(dy.imag())); + if (dx1 > root_min && dx1 < root_max && dy1 > root_min && dy1 < root_max) + { + T dx2 = dx.real() * dx.real() + dx.imag() * dx.imag(); + T dy2 = dy.real() * dy.real() + dy.imag() * dy.imag(); + T dz2 = dx2 + dy2; + if (dx2 >= dz2 * SafeMin()) + { + cs = std::sqrt(dx2 / dz2); + if (dx2 > root_min && dz2 < root_max * 2) + { + sn = std::conj(dy) * (dx / std::sqrt(dx2 * dz2)); + } + else + { + sn = std::conj(dy) * ((dx / cs) / dz2); + } + } + else + { + T d = std::sqrt(dx2 * dz2); + cs = dx2 / d; + sn = std::conj(dy) * (dx / d); + } + } + else + { + T u = std::min(SafeMax(), std::max(SafeMin(), std::max(dx1, dy1))), w; + std::complex dys = dy / u, dxs; + T dy2 = dys.real() * dys.real() + dys.imag() * dys.imag(), dx2, dz2; + if (dx1 / u < root_min) + { + T v = std::min(SafeMax(), std::max(SafeMin(), dx1)); + w = v / u; + dxs = dx / v; + dx2 = dxs.real() * dxs.real() + dxs.imag() * dxs.imag(); + dz2 = dx2 * w * w + dy2; + } + else + { + w = 1.0; + dxs = dx / u; + dx2 = dxs.real() * dxs.real() + dxs.imag() * dxs.imag(); + dz2 = dx2 + dy2; + } + if (dx2 >= dz2 * SafeMin()) + { + cs = std::sqrt(dx2 / dz2); + if (dx2 > root_min && dz2 < root_max * 2) + { + sn = std::conj(dys) * (dxs / std::sqrt(dx2 * dz2)); + } + else + { + sn = std::conj(dys) * ((dxs / cs) / dz2); + } + } + else + { + T d = std::sqrt(dx2 * dz2); + cs = dx2 / d; + sn = std::conj(dys) * (dxs / d); + } + cs *= w; + } +} + +template +inline void ApplyPlaneRotation(T &dx, T &dy, const T cs, const T sn) +{ + T t = cs * dx + sn * dy; + dy = -sn * dx + cs * dy; + dx = t; +} + +template +inline void ApplyPlaneRotation(std::complex &dx, std::complex &dy, const T cs, + const std::complex sn) +{ + std::complex t = cs * dx + sn * dy; + dy = -std::conj(sn) * dx + cs * dy; + dx = t; +} + +} // namespace + +template +IterativeSolver::IterativeSolver(MPI_comm comm, int print) + : Solver(), comm(comm), A(nullptr), B(nullptr) +{ + print_opts.Warnings(); + if (print > 0) + { + print_opts.Summary(); + if (print > 1) + { + print_opts.Iterations(); + if (print > 2) + { + print_opts.All(); + } + } + } + int_width = 3; + tab_width = 0; + + rel_tol = abs_tol = 0.0; + max_it = 100; + + converged = false; + initial_res = final_res = 0.0; + final_it = 0; +} + +template +void CgSolver::Mult(const VecType &b, VecType &x) const +{ + // Set up workspace. + ScalarType beta, beta_prev, alpha, denom; + RealType res, eps; + MFEM_VERIFY(A, "Operator must be set for CgSolver::Mult!"); + MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), + "Size mismatch for CgSolver::Mult!"); + r.SetSize(A->Height()); + z.SetSize(A->Height()); + p.SetSize(A->Height()); + + // Initialize. + if (initial_guess) + { + A->Mult(x, r); + linalg::AXPBY(1.0, b, -1.0, r); + } + else + { + r = b; + x = 0.0; + } + if (B) + { + B->Mult(r, z); + } + else + { + z = r; + } + beta = linalg::Dot(comm, z, r); + CheckDot(beta, "PCG preconditioner is not positive definite: (Br, r) = " << beta << "!"); + res = initial_res = std::sqrt(std::abs(beta)); + eps = std::max(rel_tol * res, abs_tol); + converged = (res < eps); + + // Begin iterations. + int it = 0; + if (print_opts.iterations) + { + Mpi::Print(comm, "{}Residual norms for PCG solve\n", + std::string(tab_width + int_width - 1, ' ')); + } + for (; it < max_it && !converged; it++) + { + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} iteration, residual (B r, r) = {:.6e}\n", + std::string(tab_width, ' '), it, int_width, beta); + } + if (!it) + { + p = z; + } + else + { + linalg::AXPBY(1.0, z, beta / beta_prev, p); + } + + A->Mult(p, z); + denom = linalg::Dot(comm, z, p); + CheckDot(denom, "PCG operator is not positive definite: (Ap, p) = " << denom << "!"); + alpha = beta / denom; + + x.Add(alpha, p); + r.Add(-alpha, z); + + beta_prev = beta; + if (B) + { + B->Mult(r, z); + } + else + { + z = r; + } + beta = linalg::Dot(comm, z, r); + CheckDot(beta, + "PCG preconditioner is not positive definite: (Br, r) = " << beta << "!"); + res = std::sqrt(std::abs(beta)); + converged = (res < eps); + } + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} iteration, residual (B r, r) = {:.6e}\n", + std::string(tab_width, ' '), it, int_width, beta); + } + if (print_opts.summary || (print_opts.warnings && !converged)) + { + Mpi::Print(comm, "{}PCG solver {} with {:d} iteration{}", std::string(tab_width, ' '), + converged ? "converged" : "did NOT converge", it, (it == 1) ? "" : "s"); + if (it > 0) + { + Mpi::Print(comm, " (avg. reduction factor: {:.6e})\n", + std::pow(res / initial_res, 1.0 / it)); + } + else + { + Mpi::Print(comm, "\n"); + } + } + final_res = res; + final_it = it; +} + +template +void GmresSolver::Initialize() const +{ + if (!V.empty()) + { + MFEM_ASSERT(V.Size() == max_dim + 1 && V[0].Size() == A->Height(), + "Repeated solves with GmresSolver should not modify the operator size or " + "restart dimension!"); + return; + } + if (max_dim < 0) + { + max_dim = max_it; + } + V.resize(max_dim + 1); + for (int j = 0; j < std::min(5, max_dim + 1); j++) + { + V[j].SetSize(A->Height()); + } + if (flexible) + { + Z.resize(max_dim + 1); + for (int j = 0; j < std::min(5, max_dim + 1); j++) + { + Z[j].SetSize(A->Height()); + } + } + else + { + r.SetSize(A->Height()); + } + H.resize((max_dim + 1) * max_dim); + s.resize(max_dim + 1); + cs.resize(max_dim + 1); + sn.resize(max_dim + 1); +} + +template +void GmresSolver::Mult(const VecType &x, VecType &y) const +{ + // Set up workspace. + RealType beta = 0.0, true_beta, eps; + MFEM_VERIFY(A, "Operator must be set for GmresSolver::Mult!"); + MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), + "Size mismatch for GmresSolver::Mult!"); + Initialize(); + + // Begin iterations. + converged = false; + int it = 0, restart = 0; + if (print_opts.iterations) + { + Mpi::Print(comm, "{}Residual norms for {}GMRES solve\n", flexible ? "F" : "", + std::string(tab_width + int_width - 1, ' ')); + } + for (; it < max_it && !converged; restart++) + { + // Initialize. + if (B && pc_side == PrecSide::LEFT) + { + if (initial_guess || restart > 0) + { + A->Mult(x, V[0]); + linalg::AXPBY(1.0, b, -1.0, V[0]); + B->Mult(V[0], r); + } + else + { + B->Mult(b, r); + x = 0.0; + } + } + else // !B || pc_side == PrecSide::RIGHT + { + if (initial_guess || restart > 0) + { + A->Mult(x, r); + linalg::AXPBY(1.0, b, -1.0, r); + } + else + { + r = b; + x = 0.0; + } + } + true_beta = linalg::Norml2(comm, r); + CheckDot(true_beta, "GMRES residual norm is not valid: ||Br|| = " << true_beta << "!"); + if (it == 0) + { + initial_res = true_beta; + eps = std::max(rel_tol * true_beta, abs_tol); + } + else if (beta > 0.0 && std::abs(beta - true_beta) > 0.1 * initial_res && + print_opts.warnings) + { + Mpi::Print( + comm, + "{}{}GMRES residual at restart ({:.6e}) is far from the residual norm estimate " + "from the recursion formula ({.6e}) (initial residual = {:.6e})\n", + std::string(tab_width, ' '), flexible ? "F" : "", true_beta, beta, initial_res); + } + beta = true_beta; + if (beta < eps) + { + converged = true; + break; + } + + V[0] = 0.0; + V[0].Add(1.0 / beta, r); + std::fill(s.begin(), s.end(), 0.0); + s[0] = beta; + + int j = 0; + for (; j < max_dim && it < max_it; j++, it++) + { + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} iteration ({:d} restarts), residual {:.6e}\n", it, + std::string(tab_width, ' '), int_width, restart, beta); + } + VecType &w = V[j + 1]; + if (w.Size() == 0) + { + // Add storage for basis vectors in increments. + for (int k = j + 1; k < std::min(j + 11, max_dim + 1); k++) + { + V[k].SetSize(A->Height()); + } + if (flexible) + { + for (int k = j + 1; k < std::min(j + 11, max_dim + 1); k++) + { + Z[k].SetSize(A->Height()); + } + } + } + if (B && pc_side == PrecSide::LEFT) + { + A->Mult(V[j], r); + B->Mult(r, w); + } + else if (B && pc_side == PrecSide::RIGHT) + { + if (!flexible) + { + B->Mult(V[j], r); + A->Mult(r, w); + } + else + { + B->Mult(V[j], Z[j]); + A->Mult(Z[j], w); + } + } + else + { + A->Mult(V[j], w); + } + + ScalarType *Hj = H.data() + j * (max_dim + 1); + switch (orthog_type) + { + case OrthogType::MGS: + linalg::OrthogonalizeColumnMGS(comm, V, w, Hj, j + 1); + break; + case OrthogType::CGS: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1); + break; + case OrthogType::CGS2: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1, true); + break; + } + Hj[j + 1] = linalg::Norml2(comm, w); + w *= 1.0 / Hj[j + 1]; + + for (int k = 0; k < j; k++) + { + ApplyPlaneRotation(Hj[k], Hj[k + 1], cs[k], sn[k]); + } + GeneratePlaneRotation(Hj[j], Hj[j + 1], cs[j], sn[j]); + ApplyPlaneRotation(Hj[j], Hj[j + 1], cs[j], sn[j]); + ApplyPlaneRotation(s[j], s[j + 1], cs[j], sn[j]); + + beta = std::abs(s[j + 1]); + CheckDot(beta, "GMRES residual norm is not valid: ||Br|| = " << beta << "!"); + if (beta < eps) + { + converged = true; + break; + } + } + + // Reconstruct the solution (for restart or due to convergence or maximum iterations). + for (int i = j; i >= 0; i--) + { + ScalarType *Hi = H.data() + i * (max_dim + 1); + s[i] /= Hi[i]; + for (int k = 0; k < i; k++) + { + s[k] -= Hi[k] * s[i]; + } + } + if (!B || pc_side == PrecSide::LEFT) + { + for (int k = 0; k <= j; k++) + { + x.Add(s[k], V[k]); + } + } + else // B && pc_side == PrecSide::RIGHT + { + if (!flexible) + { + r = 0.0; + for (int k = 0; k <= j; k++) + { + r.Add(s[k], V[k]); + } + B->Mult(r, V[0]); + x += V[0]; + } + else + { + for (int k = 0; k <= j; k++) + { + x.Add(s[k], Z[k]); + } + } + } + } + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} iteration ({:d} restarts), residual {:.6e}\n", it, int_width, + std::string(tab_width, ' '), restart, beta); + } + if (print_opts.summary || (print_opts.warnings && !converged)) + { + Mpi::Print(comm, "{}{}GMRES solver {} with {:d} iteration{}", flexible ? "F" : "", + std::string(tab_width, ' '), converged ? "converged" : "did NOT converge", + it, (it == 1) ? "" : "s"); + if (it > 0) + { + Mpi::Print(comm, " (avg. reduction factor: {:.6e})\n", + std::pow(beta / initial_res, 1.0 / it)); + } + else + { + Mpi::Print(comm, "\n"); + } + } + final_res = beta; + final_it = it; +} + +template class IterativeSolver; +template class IterativeSolver; +template class CgSolver; +template class CgSolver; +template class GmresSolver; +template class GmresSolver; +template class FgmresSolver; +template class FgmresSolver; + +} // namespace palace diff --git a/palace/linalg/iterative.hpp b/palace/linalg/iterative.hpp new file mode 100644 index 000000000..48beac434 --- /dev/null +++ b/palace/linalg/iterative.hpp @@ -0,0 +1,193 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_ITERATIVE_HPP +#define PALACE_LINALG_ITERATIVE_HPP + +#include +#include +#include +#include "linalg/operator.hpp" +#include "linalg/solver.hpp" +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// Iterative solvers based on Krylov subspace methods with optional preconditioning, for +// real- or complex-valued systems. +// + +// Base class for iterative solvers based on Krylov subspace methods with optional +// preconditioning. +template +class IterativeSolver : public Solver +{ +protected: + typedef typename double RealType; + typedef typename std::conditional::value, + std::complex, RealType>::type ScalarType; + + // MPI communicator associated with the solver. + MPI_Comm comm; + + // Control level of printing during solves. + mfem::IterativeSolver::PrintLevel print_opts; + int int_width, tab_width; + + // Relative and absolute tolerances. + double rel_tol, abs_tol; + + // Limit for the number of solver iterations. + int max_it; + + // Operator and (optional) preconditioner associated with the iterative solver (not + // owned). + const OperType *A; + const Solver *B; + + // Variables set during solve to capture solve statistics. + mutable bool converged; + mutable double initial_res, final_res; + mutable int final_it; + +public: + IterativeSolver(MPI_comm comm, int print); + + // Set an indentation for all log printing. + void SetTabWidth(int width) { tab_width = width; } + + // Set the relative convergence tolerance. + void SetTol(double tol) { SetRelTol(tol); } + void SetRelTol(double tol) { rel_tol = tol; } + + // Set the absolute convergence tolerance. + void SetAbsTol(double tol) { abs_tol = tol; } + + // Set the maximum number of iterations. + void SetMaxIter(int its) + { + max_it = its; + int_width = 1 + static_cast(std::log10(its)); + } + + // Set the operator for the solver. + void SetOperator(const OperType &op) override { A = &op; } + + // Set the preconditioner for the solver. + void SetPreconditioner(const Solver &pc) { B = &pc; } + + // Returns if the previous solve converged or not. + bool GetConverged() const { return converged; } + + // Returns the initial (absolute) residual for the previous solve. + double GetInitialRes() const { return initial_res; } + + // Returns the final (absolute) residual for the previous solve, which may be an estimate + // to the true residual. + double GetFinalRes() const { return final_res; } + + // Returns the number of iterations for the previous solve. + int GetNumIterations() const { return final_it; } + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return comm; } +}; + +// Preconditioned Conjugate Gradient (CG) method for SPD linear systems. +template +class CgSolver : public IterativeSolver +{ +protected: + // Temporary workspace for solve. + mutable VecType r, z, p; + +public: + CgSolver(MPI_comm comm, int print) : IterativeSolver(comm, print) {} + + void Mult(const VecType &b, VecType &x) const override; +}; + +// Preconditioned Generalized Minimum Residual Method (GMRES) for general nonsymmetric +// linear systems. +template +class GmresSolver : public IterativeSolver +{ +public: + enum class OrthogType + { + MGS, + CGS, + CGS2 + }; + + enum class PrecSide + { + LEFT, + RIGHT + }; + +protected: + // Maximum subspace dimension for restarted GMRES. + mutable int max_dim; + + // Orthogonalization method for orthonormalizing a newly computed vector against a basis + // at each iteration. + OrthogType orthog_type; + + // Use left or right preconditioning. + PrecSide pc_side; + + // Flag for flexible GMRES which stores and makes use of the preconditioned vectors. + const bool flexible; + + // Temporary workspace for solve. + mutable std::vector V, Z; + mutable VecType r; + mutable std::vector H; + mutable std::vector s, sn; + mutable std::vector cs; + + // Allocate storage for solve. + void Initialize() const; + + GmresSolver(MPI_comm comm, int print, bool fgmres) + : IterativeSolver(comm, print), max_dim(-1), orthog_type(OrthogType::MGS), + pc_side(fgmres ? PrecSide::RIGHT : PrecSide::LEFT), flexible(fgmres) + { + } + +public: + GmresSolver(MPI_comm comm, int print) : GmresSolver(comm, print, false) {} + + // Set the dimension for restart. + void SetRestartDim(int dim) { max_dim = dim; } + + // Set the orthogonalization method. + void SetOrthogonalization(OrthogType type) { orthog_type = type; } + + // Set the side for preconditioning. + virtual void SetPrecSide(PrecSide side) { pc_side = side; } + + void Mult(const VecType &b, VecType &x) const override; +}; + +// Preconditioned Flexible Generalized Minimum Residual Method (FGMRES) for general +// nonsymmetric linear systems with a non-constant preconditioner. +template +class FgmresSolver : public GmresSolver +{ +public: + FgmresSolver(MPI_comm comm, int print) : GmresSolver(comm, print, true) {} + + void SetPrecSide(PrecSide side) override + { + MFEM_VERIFY(side == PrecSide::RIGHT, + "FGMRES solver only supports right preconditioning!"); + } +}; + +} // namespace palace + +#endif // PALACE_LINALG_ITERATIVE_HPP diff --git a/palace/linalg/orthog.hpp b/palace/linalg/orthog.hpp new file mode 100644 index 000000000..32c8e7220 --- /dev/null +++ b/palace/linalg/orthog.hpp @@ -0,0 +1,63 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_ORTHOG_HPP +#define PALACE_LINALG_ORTHOG_HPP + +#include +#include "linalg/vector.hpp" +#include "utils/communication.hpp" + +namespace palace::linalg +{ + +// +// Orthogonalization functions for a orthogonalizing a vector against a number of basis +// vectors using modified or classical Gram-Schmidt. +// + +template +inline void OrthogonalizeColumnMGS(MPI_Comm comm, const std::vector &V, VecType &w, + ScalarType *H, int m) +{ + MFEM_ASSERT(m <= V.size(), "Out of bounds number of columns for MGS orthogonalization!"); + for (int j = 0; j < m; j++) + { + H[j] = linalg::Dot(comm, w, V[j]); // Global inner product + w.Add(-H[j], V[j]); + } +} + +template +inline void OrthogonalizeColumnCGS(MPI_Comm comm, const std::vector &V, VecType &w, + ScalarType *H, int m, bool refine = false) +{ + MFEM_ASSERT(m <= V.size(), "Out of bounds number of columns for CGS orthogonalization!"); + for (int j = 0; j < m; j++) + { + H[j] = w * V[j]; // Local inner product + } + Mpi::GlobalSum(m, H, comm); + for (int j = 0; j < m; j++) + { + w.Add(-H[j], V[j]); + } + if (refine) + { + std::vector dH(m); + for (int j = 0; j < m; j++) + { + dH[j] = w * V[j]; // Local inner product + } + Mpi::GlobalSum(m, dH.data(), comm); + for (int j = 0; j < m; j++) + { + H[j] += dH[j]; + w.Add(-dH[j], V[j]); + } + } +} + +} // namespace palace::linalg + +#endif // PALACE_LINALG_ORTHOG_HPP diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp new file mode 100644 index 000000000..f402e43f8 --- /dev/null +++ b/palace/linalg/solver.cpp @@ -0,0 +1,41 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "solver.hpp" + +namespace palace +{ + +template <> +void WrapperSolver::SetOperator(const Operator &op) +{ + pc->SetOperator(op); +} + +template <> +void WrapperSolver::SetOperator(const ComplexOperator &op) +{ + MFEM_VERIFY(op.IsReal() && op.HasReal(), + "WrapperSolver::SetOperator assumes an operator which is purely real!"); + pc->SetOperator(*op.Real()); +} + +template <> +void WrapperSolver::Mult(const Vector &x, Vector &y) const +{ + pc->Mult(x, y); +} + +template <> +void WrapperSolver::Mult(const ComplexVector &x, ComplexVector &y) const +{ + mfem::Array X(2); + mfem::Array Y(2); + X[0] = &x.Real(); + X[1] = &x.Imag(); + Y[0] = &y.Real(); + Y[1] = &y.Imag(); + pc->ArrayMult(X, Y); +} + +} // namespace palace diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp new file mode 100644 index 000000000..22841295a --- /dev/null +++ b/palace/linalg/solver.hpp @@ -0,0 +1,81 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_SOLVER_HPP +#define PALACE_LINALG_SOLVER_HPP + +#include +#include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// The base Solver class is a templated version of mfem::Solver for operation with +// real- or complex-valued operators. +// + +// Abstract base class for real-valued or complex-valued solvers. +template +class Solver +{ + static_assert(std::is_same::value || + std::is_same::value, + "Solver can only be defined for OperType = Operator or ComplexOperator!"); + +protected: + typedef typename std::conditional::value, + ComplexVector, Vector>::type VecType; + + // Whether or not to use the second argument of Mult() as an initial guess. + bool initial_guess; + +public: + Solver(bool initial_guess = false) : initial_guess(initial_guess) {} + + // Configure whether or not to use an initial guess when applying the solver. + virtual void SetInitialGuess(bool guess) { initial_guess = guess; } + + // Set the operator associated with the solver, or update it if called repeatedly. + virtual void SetOperator(const OperType &op) = 0; + + // Apply the solver. + virtual void Mult(const VecType &x, VecType &y) const = 0; + + // Apply the solver for the transpose problem. + virtual void MultTranspose(const VecType &x, VecType &y) const + { + MFEM_ABORT("MultTranspose() is not implemented for base class Solver!"); + } +}; + +// This solver wraps a real-valued mfem::Solver for application to complex-valued problems +// as a preconditioner inside of a Solver +template +class WrapperSolver : public Solver +{ +private: + std::unique_ptr pc; + +public: + WrapperSolver(std::unique_ptr &&pc) + : Solver(pc->iterative_mode), pc(std::move(pc)) + { + } + + void SetInitialGuess(bool guess) override + { + Solver::SetInitialGuess(guess); + pc->iterative_mode = guess; + } + + void SetOperator(const OperType &op) override; + + void Mult(const VecType &x, VecType &y) const override; +}; + +} // namespace palace + +#endif // PALACE_LINALG_SOLVER_HPP From be224816601863eca1eefa945b2ba09df23186cb Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Wed, 24 May 2023 12:06:24 -0700 Subject: [PATCH 13/41] WIP: Refactor ParOperator/ComplexParOperator classes and other linear algebra and operator wrappers for complex-valued linear systems --- palace/linalg/operator.cpp | 809 ++++++++++++++++----------- palace/linalg/operator.hpp | 396 ++++++++----- palace/linalg/rap.cpp | 742 ++++++++++++++++++++++++ palace/linalg/rap.hpp | 227 ++++++++ palace/linalg/vector.cpp | 430 ++++++++++++-- palace/linalg/vector.hpp | 157 +++++- palace/models/domainpostoperator.hpp | 3 +- palace/models/postoperator.hpp | 1 + 8 files changed, 2245 insertions(+), 520 deletions(-) create mode 100644 palace/linalg/rap.cpp create mode 100644 palace/linalg/rap.hpp diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index 3b83397a0..17af1dc69 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -4,484 +4,647 @@ #include "operator.hpp" #include -#include "linalg/complex.hpp" #include "linalg/slepc.hpp" -#include "linalg/vector.hpp" #include "utils/communication.hpp" namespace palace { -ParOperator::ParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict) - : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), A_(std::move(A)), - trial_fespace_(trial_fespace), test_fespace_(test_fespace), use_R_(test_restrict), - trial_dbc_tdof_list_(nullptr), test_dbc_tdof_list_(nullptr), - diag_policy_(DiagonalPolicy::DIAG_ONE), RAP_(nullptr), save_A_(false) +bool ComplexOperator::IsReal() const { - MFEM_VERIFY(A_, "Cannot construct ParOperator from an empty matrix!"); - lx_.SetSize(A_->Width()); - ly_.SetSize(A_->Height()); - tx_.SetSize(width); - if (height != width) - { - ty_.SetSize(height); - } - else - { - ty_.MakeRef(tx_, 0, height); - } + MFEM_ABORT("IsReal() is not implemented for base class ComplexOperator!"); + return false; } -void ParOperator::EliminateRHS(const Vector &x, Vector &b) const +bool ComplexOperator::IsImag() const { - if (!trial_dbc_tdof_list_ || !test_dbc_tdof_list_) - { - return; - } + MFEM_ABORT("IsImag() is not implemented for base class ComplexOperator!"); + return false; +} - MFEM_VERIFY(A_, "No local matrix available for ParOperator::EliminateRHS!"); - tx_ = 0.0; - { - const int N = trial_dbc_tdof_list_->Size(); - const auto *idx = trial_dbc_tdof_list_->Read(); - const auto *X = x.Read(); - auto *TX = tx_.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TX[id] = X[id]; - }); - } +bool ComplexOperator::HasReal() const +{ + MFEM_ABORT("HasReal() is not implemented for base class ComplexOperator!"); + return false; +} + +bool ComplexOperator::HasImag() const +{ + MFEM_ABORT("HasImag() is not implemented for base class ComplexOperator!"); + return false; +} - // Apply the unconstrained operator. - trial_fespace_.GetProlongationMatrix()->Mult(tx_, lx_); - A_->Mult(lx_, ly_); - if (!use_R_) +const Operator *ComplexOperator::Real() const +{ + MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); + return nullptr; +} + +Operator *ComplexOperator::Real() +{ + MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); + return nullptr; +} + +const Operator *ComplexOperator::Imag() const +{ + MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); + return nullptr; +} + +Operator *ComplexOperator::Imag() +{ + MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); + return nullptr; +} + +void ComplexOperator::MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real, bool zero_imag) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement MultTranspose!"); +} + +void ComplexOperator::MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real, + bool zero_imag) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement MultHermitianTranspose!"); +} + +void ComplexOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a, bool zero_real, + bool zero_imag) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement AddMult!"); +} + +void ComplexOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, const std::complex a, + bool zero_real, bool zero_imag) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement AddMultTranspose!"); +} + +void ComplexOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, + const std::complex a, + bool zero_real, bool zero_imag) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement AddMultHermitianTranspose!"); +} + +ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&data_Ar, + std::unique_ptr &&data_Ai, + Operator *Ar, Operator *Ai) + : ComplexOperator(Ar ? Ar->Height() : (Ai ? Ai->Height() : 0), + Ar ? Ar->Width() : (Ai ? Ai->Width() : 0)), + data_Ar(std::move(data_Ar)), data_Ai(std::move(data_Ai)), + Ar(this->data_Ar ? this->data_Ar.get() : Ar), + Ai(this->data_Ai ? this->data_Ai.get() : Ai) +{ + MFEM_VERIFY(Ar || Ai, "Cannot construct ComplexWrapperOperator from an empty matrix!"); + MFEM_VERIFY((!Ar || !Ai) || (Ar->Height() == Ai->Height() && Ar->Width() == Ai->Width()), + "Mismatch in dimension of real and imaginary matrix parts!"); +} + +ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&Ar, + std::unique_ptr &&Ai) + : ComplexWrapperOperator(std::move(Ar), std::move(Ai), nullptr, nullptr) +{ +} + +ComplexWrapperOperator::ComplexWrapperOperator(Operator *Ar, Operator *Ai) + : ComplexWrapperOperator(nullptr, nullptr, Ar, Ai) +{ +} + +void ComplexWrapperOperator::Mult(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real, bool zero_imag) const +{ + if (Ar) { - test_fespace_.GetProlongationMatrix()->AddMultTranspose(ly_, b, -1.0); + if (!zero_real) + { + Ar->Mult(xr, yr); + } + if (!zero_imag) + { + Ar->Mult(xi, yi); + } } else { - test_fespace_.GetRestrictionMatrix()->AddMult(ly_, b, -1.0); + yr = 0.0; + yi = 0.0; } - + if (Ai) { - if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) - { - const int N = test_dbc_tdof_list_->Size(); - const auto *idx = test_dbc_tdof_list_->Read(); - const auto *X = x.Read(); - auto *B = b.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - B[id] = X[id]; - }); - } - else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) + if (!zero_imag) { - b.SetSubVector(*test_dbc_tdof_list_, 0.0); + Ai->AddMult(xi, yr, -1.0); } - else + if (!zero_real) { - MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + Ai->AddMult(xr, yi, 1.0); } } } -void ParOperator::AssembleDiagonal(Vector &diag) const +void ComplexWrapperOperator::MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real, bool zero_imag) const { - if (RAP_) - { - RAP_->GetDiag(diag); - return; - } - - // For an AMR mesh, a convergent diagonal is assembled with |P|ᵀ dₗ, where |P| has - // entry-wise absolute values of the conforming prolongation operator. - MFEM_VERIFY(&trial_fespace_ == &test_fespace_, - "Diagonal assembly is only available for square ParOperator!"); - if (auto *bfA = dynamic_cast(A_.get())) + if (Ar) { - if (bfA->HasSpMat()) + if (!zero_real) { - bfA->SpMat().GetDiag(ly_); + Ar->MultTranspose(xr, yr); } - else if (bfA->HasExt()) + if (!zero_imag) { - bfA->Ext().AssembleDiagonal(ly_); - } - else - { - MFEM_ABORT("Unable to assemble the local operator diagonal of BilinearForm!"); + Ar->MultTranspose(xi, yi); } } - else if (auto *sA = dynamic_cast(A_.get())) + else { - sA->GetDiag(ly_); + yr = 0.0; + yi = 0.0; } - else + if (Ai) { - MFEM_ABORT("ParOperator::AssembleDiagonal requires A as a BilinearForm or " - "SparseMatrix!"); + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yr, -1.0); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yi, 1.0); + } } +} - const Operator *P = test_fespace_.GetProlongationMatrix(); - if (const auto *hP = dynamic_cast(P)) +void ComplexWrapperOperator::MultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, bool zero_real, + bool zero_imag) const +{ + if (Ar) { - hP->AbsMultTranspose(1.0, ly_, 0.0, diag); + if (!zero_real) + { + Ar->MultTranspose(xr, yr); + } + if (!zero_imag) + { + Ar->MultTranspose(xi, yi); + } } else { - P->MultTranspose(ly_, diag); + yr = 0.0; + yi = 0.0; } - - if (test_dbc_tdof_list_) + if (Ai) { - if (diag_policy_ == DiagonalPolicy::DIAG_ONE) + if (!zero_imag) { - diag.SetSubVector(*test_dbc_tdof_list_, 1.0); + Ai->AddMultTranspose(xi, yr, 1.0); } - else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO) + if (!zero_real) { - diag.SetSubVector(*test_dbc_tdof_list_, 0.0); - } - else - { - MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + Ai->AddMultTranspose(xr, yi, -1.0); } } } -mfem::HypreParMatrix &ParOperator::ParallelAssemble() +void ComplexWrapperOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, const std::complex a, + bool zero_real, bool zero_imag) const { - if (RAP_) - { - return *RAP_; + if (a.real() != 0.0 && a.imag() != 0.0) + { + ty.SetSize(height); + Mult(xr, xi, ty.Real(), ty.Imag(), zero_real, zero_imag); + const int N = height; + const double ar = a.real(); + const double ai = a.imag(); + const auto *TYR = ty.Real().Read(); + const auto *TYI = ty.Imag().Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TYR[i] - ai * TYI[i]; + YI[i] += ai * TYR[i] + ar * TYI[i]; + }); } - - // XX TODO: For mfem::AssemblyLevel::PARTIAL, we cannot use CeedOperatorFullAssemble for - // a ND space with p > 1. We should throw an error here that the user needs to - // use AssemblyLevel::LEGACY in this case. - - // Build the square or rectangular RᵀAP HypreParMatrix. - if (&trial_fespace_ == &test_fespace_) + else if (a.real() != 0.0) { - mfem::SparseMatrix *lA; - bool own_lA = false; - if (auto *bfA = dynamic_cast(A_.get())) + if (Ar) { -#ifdef MFEM_USE_CEED - if (bfA->HasSpMat()) - { - lA = &bfA->SpMat(); - } - else if (bfA->HasExt()) + if (!zero_real) { - lA = mfem::ceed::CeedOperatorFullAssemble(*bfA); - own_lA = true; + Ar->AddMult(xr, yr, a.real()); } - else + if (!zero_imag) { - MFEM_ABORT("Unable to assemble the local operator for parallel assembly of " - "BilinearForm!"); + Ar->AddMult(xi, yi, a.real()); } -#else - MFEM_VERIFY(bfA->HasSpMat(), - "Missing assembled SparseMatrix for parallel assembly of BilinearForm!"); - lA = &bfA->SpMat(); -#endif } - else if (auto *sA = dynamic_cast(A_.get())) + if (Ai) { - lA = sA; + if (!zero_imag) + { + Ai->AddMult(xi, yr, -a.real()); + } + if (!zero_real) + { + Ai->AddMult(xr, yi, a.real()); + } } - else + } + else if (a.imag() != 0.0) + { + if (Ar) { - MFEM_ABORT("ParOperator::ParallelAssemble requires A as a BilinearForm or " - "SparseMatrix!"); - lA = nullptr; + if (!zero_real) + { + Ar->AddMult(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar->AddMult(xi, yr, -a.imag()); + } } - mfem::HypreParMatrix *hA = - new mfem::HypreParMatrix(trial_fespace_.GetComm(), trial_fespace_.GlobalVSize(), - trial_fespace_.GetDofOffsets(), lA); - const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); - RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); - delete hA; - if (own_lA) + if (Ai) { - delete lA; + if (!zero_imag) + { + Ai->AddMult(xi, yi, -a.imag()); + } + if (!zero_real) + { + Ai->AddMult(xr, yr, -a.imag()); + } } } - else +} + +void ComplexWrapperOperator::AddMultTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, + const std::complex a, bool zero_real, + bool zero_imag) const +{ + if (a.real() != 0.0 && a.imag() != 0.0) + { + tx.SetSize(width); + MultTranspose(xr, xi, tx.Real(), tx.Imag(), zero_real, zero_imag); + const int N = width; + const double ar = a.real(); + const double ai = a.imag(); + const auto *TXR = tx.Real().Read(); + const auto *TXI = tx.Imag().Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TXR[i] - ai * TXI[i]; + YI[i] += ai * TXR[i] + ar * TXI[i]; + }); + } + else if (a.real() != 0.0) { - mfem::SparseMatrix *lA; - bool own_lA = false; - if (auto *mbfA = dynamic_cast(A_.get())) + if (Ar) { -#ifdef MFEM_USE_CEED - if (mbfA->HasSpMat()) + if (!zero_real) { - lA = &mbfA->SpMat(); + Ar->AddMultTranspose(xr, yr, a.real()); } - else if (bfA->HasExt()) + if (!zero_imag) { - lA = mfem::ceed::CeedOperatorFullAssemble(*bfA); - own_lA = true; + Ar->AddMultTranspose(xi, yi, a.real()); } - else - { - MFEM_ABORT("Unable to assemble the local operator for parallel assembly of " - "MixedBilinearForm!"); - } -#else - MFEM_VERIFY( - mbfA->HasSpMat(), - "Missing assembled SparseMatrix for parallel assembly of MixedBilinearForm!"); - lA = &mbfA->SpMat(); -#endif } - else if (auto *sA = dynamic_cast(A_.get())) + if (Ai) { - lA = sA; + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yr, -a.real()); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yi, a.real()); + } } - else + } + else if (a.imag() != 0.0) + { + if (Ar) { - MFEM_ABORT("ParOperator::ParallelAssemble requires A as a MixedBilinearForm or " - "SparseMatrix!"); - lA = nullptr; + if (!zero_real) + { + Ar->AddMultTranspose(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar->AddMultTranspose(xi, yr, -a.imag()); + } } - mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( - trial_fespace_.GetComm(), test_fespace_.GlobalVSize(), trial_fespace_.GlobalVSize(), - test_fespace_.GetDofOffsets(), trial_fespace_.GetDofOffsets(), lA); - const mfem::HypreParMatrix *P = trial_fespace_.Dof_TrueDof_Matrix(); - if (!use_R_) + if (Ai) { - const mfem::HypreParMatrix *Rt = test_fespace_.Dof_TrueDof_Matrix(); - RAP_ = - std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), true); + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yi, -a.imag()); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yr, -a.imag()); + } } - else + } +} + +void ComplexWrapperOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, + const std::complex a, + bool zero_real, bool zero_imag) const +{ + if (a.real() != 0.0 && a.imag() != 0.0) + { + tx.SetSize(width); + MultHermitianTranspose(xr, xi, tx.Real(), tx.Imag(), zero_real, zero_imag); + const int N = width; + const double ar = a.real(); + const double ai = a.imag(); + const auto *TXR = tx.Real().Read(); + const auto *TXI = tx.Imag().Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TXR[i] - ai * TXI[i]; + YI[i] += ai * TXR[i] + ar * TXI[i]; + }); + } + else if (a.real() != 0.0) + { + if (Ar) { - mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace_.GetRestrictionMatrix()); - mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( - test_fespace_.GetComm(), test_fespace_.GlobalVSize(), - test_fespace_.GlobalTrueVSize(), test_fespace_.GetDofOffsets(), - test_fespace_.GetTrueDofOffsets(), sRt); - RAP_ = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), - true); - delete sRt; - delete hRt; + if (!zero_real) + { + Ar->AddMultTranspose(xr, yr, a.real()); + } + if (!zero_imag) + { + Ar->AddMultTranspose(xi, yi, a.real()); + } } - delete hA; - if (own_lA) + if (Ai) { - delete lA; + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yr, a.real()); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yi, -a.real()); + } } } - hypre_ParCSRMatrixSetNumNonzeros(*RAP_); - - // Delete the original local operator. - if (!save_A_) + else if (a.imag() != 0.0) { - A_.reset(); - } - - // Eliminate boundary conditions on the assembled matrix. - if (test_dbc_tdof_list_ || trial_dbc_tdof_list_) - { - if (test_dbc_tdof_list_ == trial_dbc_tdof_list_) + if (Ar) { - // Elimination for a square operator. - MFEM_VERIFY( - &trial_fespace_ == &test_fespace_, - "Only square ParOperator should have same trial and test eliminated tdofs!"); - RAP_->EliminateBC(*trial_dbc_tdof_list_, diag_policy_); + if (!zero_real) + { + Ar->AddMultTranspose(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar->AddMultTranspose(xi, yr, -a.imag()); + } } - else + if (Ai) { - // Rectangular elimination sets all eliminated rows/columns to zero. - if (test_dbc_tdof_list_) + if (!zero_imag) { - RAP_->EliminateRows(*test_dbc_tdof_list_); + Ai->AddMultTranspose(xi, yi, a.imag()); } - if (trial_dbc_tdof_list_) + if (!zero_real) { - mfem::HypreParMatrix *RAPe = RAP_->EliminateCols(*trial_dbc_tdof_list_); - delete RAPe; + Ai->AddMultTranspose(xr, yr, a.imag()); } } } - return *RAP_; } -void ParOperator::Mult(const Vector &x, Vector &y) const +SumOperator::SumOperator(const Operator &op, double c) : Operator(op.Height(), op.Width()) { - if (RAP_) + AddOperator(op, c); +} + +void SumOperator::AddOperator(const Operator &op, double c) +{ + MFEM_VERIFY(op.Height() == height && op.Width() == width, + "Invalid Operator dimensions for SumOperator!"); + ops.emplace_back(&op, c); +} + +void SumOperator::Mult(const Vector &x, Vector &y) const +{ + if (ops.size() == 1 && ops[0].second == 1.0) { - RAP_->Mult(x, y); - return; + return ops[0].first->Mult(x, y); } y = 0.0; AddMult(x, y); } -void ParOperator::MultTranspose(const Vector &x, Vector &y) const +void SumOperator::MultTranspose(const Vector &x, Vector &y) const { - if (RAP_) + if (ops.size() == 1 && ops[0].second == 1.0) { - RAP_->MultTranspose(x, y); - return; + return ops[0].first->MultTranspose(x, y); } y = 0.0; AddMultTranspose(x, y); } -void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const +void SumOperator::AddMult(const Vector &x, Vector &y, const double a) const { - if (RAP_) + for (const auto &[op, c] : ops) { - RAP_->AddMult(x, y, a); - return; + op->AddMult(x, y, a * c); } - MFEM_ASSERT(x.Size() == width && y.Size() == height, - "Incompatible dimensions for ParOperator::AddMult!"); - if (trial_dbc_tdof_list_) +} + +void SumOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const +{ + for (const auto &[op, c] : ops) { - tx_ = x; - tx_.SetSubVector(*trial_dbc_tdof_list_, 0.0); + op->AddMultTranspose(x, y, a * c); } - trial_fespace_.GetProlongationMatrix()->Mult(trial_dbc_tdof_list_ ? tx_ : x, lx_); +} + +ComplexSumOperator::ComplexSumOperator(const ComplexOperator &op, std::complex c) + : ComplexOperator(op.Height(), op.Width()) +{ + AddOperator(op, c); +} - // Apply the operator on the L-vector. - A_->Mult(lx_, ly_); +void ComplexSumOperator::AddOperator(const ComplexOperator &op, std::complex c) +{ + MFEM_VERIFY(op.Height() == height && op.Width() == width, + "Invalid Operator dimensions for ComplexSumOperator!"); + ops.emplace_back(&op, c); +} - if (test_dbc_tdof_list_) +bool ComplexSumOperator::IsReal() const +{ + for (const auto &[op, c] : ops) { - if (!use_R_) - { - test_fespace_.GetProlongationMatrix()->MultTranspose(ly_, ty_); - } - else + if (!op->IsReal()) { - test_fespace_.GetRestrictionMatrix()->Mult(ly_, ty_); + return false; } - if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) - { - const int N = test_dbc_tdof_list_->Size(); - const auto *idx = test_dbc_tdof_list_->Read(); - const auto *X = x.Read(); - auto *TY = ty_.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TY[id] = X[id]; - }); - } - else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) - { - ty_.SetSubVector(*test_dbc_tdof_list_, 0.0); - } - else - { - MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); - } - y.Add(a, ty_); } - else + return true; +} + +bool ComplexSumOperator::IsImag() const +{ + for (const auto &[op, c] : ops) { - if (!use_R_) - { - test_fespace_.GetProlongationMatrix()->AddMultTranspose(ly_, y, a); - } - else + if (!op->IsImag()) { - test_fespace_.GetRestrictionMatrix()->AddMult(ly_, y, a); + return false; } } + return true; } -void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const +void ComplexSumOperator::Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real, bool zero_imag) const { - if (RAP_) + if (ops.Size() == 1 && ops[0].second == 1.0) { - RAP_->AddMultTranspose(x, y, a); - return; + return ops[0].first->Mult(xr, xi, yr, yi, zero_real, zero_imag); } - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ParOperator::AddMultTranspose!"); - if (test_dbc_tdof_list_) + yr = 0.0; + yi = 0.0; + AddMult(xr, xi, yr, yi, 1.0, zero_real, zero_imag); +} + +void ComplexSumOperator::MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real, bool zero_imag) const +{ + if (ops.Size() == 1 && ops[0].second == 1.0) { - ty_ = x; - ty_.SetSubVector(*test_dbc_tdof_list_, 0.0); + return ops[0].first->MultTranspose(xr, xi, yr, yi, zero_real, zero_imag); } - if (!use_R_) + yr = 0.0; + yi = 0.0; + AddMultTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); +} + +void ComplexSumOperator::MultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, bool zero_real, + bool zero_imag) const +{ + if (ops.Size() == 1 && ops[0].second == 1.0) { - test_fespace_.GetProlongationMatrix()->Mult(test_dbc_tdof_list_ ? ty_ : x, ly_); + return ops[0].first->MultHermitianTranspose(xr, xi, yr, yi, zero_real, zero_imag); } - else + yr = 0.0; + yi = 0.0; + AddMultHermitianTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); +} + +void ComplexSumOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a, bool zero_real, + bool zero_imag) const +{ + for (const auto &[op, c] : ops) { - test_fespace_.GetRestrictionMatrix()->MultTranspose(test_dbc_tdof_list_ ? ty_ : x, ly_); + op->AddMult(xr, xi, yr, yi, a * c, zero_real, zero_imag); } +} - // Apply the operator on the L-vector. - A_->MultTranspose(ly_, lx_); - - if (trial_dbc_tdof_list_) +void ComplexSumOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, const std::complex a, + bool zero_real, bool zero_imag) const +{ + for (const auto &[op, c] : ops) { - trial_fespace_.GetProlongationMatrix()->MultTranspose(lx_, tx_); - if (diag_policy_ == DiagonalPolicy::DIAG_ONE && height == width) - { - const int N = trial_dbc_tdof_list_->Size(); - const auto *idx = trial_dbc_tdof_list_->Read(); - const auto *X = x.Read(); - auto *TX = tx_.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const int id = idx[i]; - TX[id] = X[id]; - }); - } - else if (diag_policy_ == DiagonalPolicy::DIAG_ZERO || height != width) - { - tx_.SetSubVector(*test_dbc_tdof_list_, 0.0); - } - else - { - MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); - } - y.Add(a, tx_); + op->AddMultTranspose(xr, xi, yr, yi, a * c, zero_real, zero_imag); } - else +} + +void ComplexSumOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, + const std::complex a, + bool zero_real, bool zero_imag) const +{ + for (const auto &[op, c] : ops) { - trial_fespace_.GetProlongationMatrix()->AddMultTranspose(lx_, y, a); + op->AddMultTranspose(xr, xi, yr, yi, a * c, zero_real, zero_imag); } } -void DiagonalOperator::Mult(const Vector &x, Vector &y) const +template <> +void DiagonalOperator::Mult(const Vector &x, Vector &y) const { const int N = height; - const auto *D = d_.Read(); + const auto *D = d.Read(); const auto *X = x.Read(); auto *Y = y.Write(); mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Y[i] = D[i] * X[i]; }); } +template <> +void DiagonalOperator::Mult(const ComplexVector &x, ComplexVector &y) const +{ + const int N = height; + const auto *DR = d.Real().Read(); + const auto *DI = d.Imag().Read(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + auto *YR = y.Real().Write(); + auto *YI = y.Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] = DR[i] * XR[i] - DI[i] * XI[i]; + YI[i] = DI[i] * XR[i] + DR[i] * XI[i]; + }); +} + +template <> +void DiagonalOperator::MultHermitianTranspose(const ComplexVector &x, + ComplexVector &y) const +{ + const int N = height; + const auto *DR = d.Real().Read(); + const auto *DI = d.Imag().Read(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + auto *YR = y.Real().Write(); + auto *YI = y.Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] = DR[i] * XR[i] + DI[i] * XI[i]; + YI[i] = -DI[i] * XR[i] + DR[i] * XI[i]; + }); +} + namespace linalg { double SpectralNorm(MPI_Comm comm, const Operator &A, bool sym, double tol, int max_it) { - // The SumOperator does not take ownership of A and allows the ComplexWrapperOperator - // to own its input. - ComplexWrapperOperator Ar(std::make_unique(A, 1.0), nullptr); + ComplexWrapperOperator Ar(&A, nullptr); // Non-owning constructor return SpectralNorm(comm, Ar, sym, tol, max_it); } diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index 3485c0018..ed8bcd346 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -4,132 +4,181 @@ #ifndef PALACE_LINALG_OPERATOR_HPP #define PALACE_LINALG_OPERATOR_HPP +#include #include +#include #include #include -#include +#include "linalg/vector.hpp" namespace palace { -class ComplexOperator; - -using Operator = mfem::Operator; -using Vector = mfem::Vector; - // -// Derived operator classes extending mfem::Operator from MFEM. +// Functionality extending mfem::Operator from MFEM. // -// A parallel operator represented by RᵀAP constructed through the actions of Rᵀ, A, and P -// with possible eliminated essential BC. -class ParOperator : public Operator +// Abstract base class for complex-valued operators. +class ComplexOperator { -private: - std::unique_ptr A_; - const mfem::ParFiniteElementSpace &trial_fespace_, &test_fespace_; - const bool use_R_; +protected: + // The size of the complex-valued operator. + int height, width; - // Lists of constrained essential boundary true dofs for elimination. - const mfem::Array *trial_dbc_tdof_list_, *test_dbc_tdof_list_; +public: + ComplexOperator(int s) : height(s), width(s) {} + ComplexOperator(int h, int w) : height(h), width(w) {} - // Diagonal policy for constrained true dofs. - DiagonalPolicy diag_policy_; + // Get the height (size of output) of the operator. + int Height() const { return height; } - // Assembled operator as a parallel Hypre matrix. If the save flag is true, calls to - // ParallelAssemble will not delete the local operator. This is useful for later on calls - // to EliminateRHS, for example. - std::unique_ptr RAP_; - bool save_A_; + // Get the width (size of input) of the operator. + int Width() const { return width; } - // Temporary storage for operator application. - mutable Vector lx_, ly_, tx_, ty_; + // Test whether or not the operator is purely real or imaginary. + virtual bool IsReal() const; + virtual bool IsImag() const; -public: - // Construct the parallel operator, inheriting ownership of the local operator. - ParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &fespace) - : ParOperator(std::move(A), fespace, fespace, false) + // Test whether or not we can access the real and imaginary operator parts. + virtual bool HasReal() const; + virtual bool HasImag() const; + + // Get access to the real and imaginary operator parts. + virtual const Operator *Real() const; + virtual Operator *Real(); + virtual const Operator *Imag() const; + virtual Operator *Imag(); + + virtual void Mult(const ComplexVector &x, ComplexVector &y) const { + Mult(x.Real(), x.Imag(), y.Real(), y.Imag()); } - // Get access to the underlying local (L-vector) operator. - const Operator &LocalOperator() const + virtual void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const = 0; + + virtual void MultTranspose(const ComplexVector &x, ComplexVector &y) const { - MFEM_ASSERT(A_, "No local matrix available for ParOperator::LocalOperator!"); - return *A_; + MultTranspose(x.Real(), x.Imag(), y.Real(), y.Imag()); } - // Set essential boundary condition true dofs for square operators. - void SetEssentialTrueDofs(const mfem::Array &dbc_tdof_list, - DiagonalPolicy diag_policy) + virtual void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const; + + virtual void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const { - MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " - "for rectangular ParOperator!"); - trial_dbc_tdof_list_ = &dbc_tdof_list; - test_dbc_tdof_list_ = &dbc_tdof_list; - diag_policy_ = diag_policy; + MultHermitianTranspose(x.Real(), x.Imag(), y.Real(), y.Imag()); } - // Set essential boundary condition true dofs for rectangular operators. - void SetEssentialTrueDofs(const mfem::Array *trial_dbc_tdof_list, - const mfem::Array *test_dbc_tdof_list, - DiagonalPolicy diag_policy) + virtual void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, bool zero_real = false, + bool zero_imag = false) const; + + virtual void AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const { - MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ZERO, - "Essential boundary condition true dof elimination for rectangular " - "ParOperator only supports DiagonalPolicy::DIAG_ZERO!"); - trial_dbc_tdof_list_ = trial_dbc_tdof_list; - test_dbc_tdof_list_ = test_dbc_tdof_list; - diag_policy_ = diag_policy; + AddMult(x.Real(), x.Imag(), y.Real(), y.Imag(), a); } - // Get the essential boundary condition true dofs associated with the operator. May be - // nullptr. - const mfem::Array *GetEssentialTrueDofs() const + virtual void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const; + + virtual void AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const { - MFEM_VERIFY(trial_dbc_tdof_list_ == test_dbc_tdof_list_ && height == width, - "GetEssentialTrueDofs should only be used for square ParOperator!"); - return trial_dbc_tdof_list_; + AddMultTranspose(x.Real(), x.Imag(), y.Real(), y.Imag(), a); } - // Get access to the finite element spaces associated with the operator. - const mfem::ParFiniteElementSpace &GetFESpace() const + virtual void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const; + + virtual void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const { - MFEM_VERIFY(&trial_fespace_ == &test_fespace_ && height == width, - "GetFESpace should only be used for square ParOperator!"); - return trial_fespace_; + AddMultHermitianTranspose(x.Real(), x.Imag(), y.Real(), y.Imag(), a); } - // A call to ParallelAssemble will typically free the memory associated with the local - // operator as it is no longer required. When the save flag is set, the local operator - // will not be deleted during parallel assembly. - void SaveLocalOperator() { save_A_ = true; } + virtual void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, const std::complex a = 1.0, + bool zero_real = false, + bool zero_imag = false) const; +}; - // Eliminate essential true dofs from the RHS vector b, using the essential boundary - // condition values in x. - void EliminateRHS(const Vector &x, Vector &b) const; +// A complex-valued operator represented using a block 2x2 equivalent-real formulation. +class ComplexWrapperOperator : public ComplexOperator +{ +private: + // Storage and access for real and imaginary parts of the operator. + std::unique_ptr data_Ar, data_Ai; + Operator *Ar, *Ai; - // Assemble the diagonal for the parallel operator. - void AssembleDiagonal(Vector &diag) const override; + // Temporary storage for operator application. + mutable ComplexVector tx, ty; - // Assemble the operator as a parallel sparse matrix. This frees the memory associated - // with the local operator. - mfem::HypreParMatrix &ParallelAssemble(); + ComplexWrapperOperator(std::unique_ptr &&data_Ar, + std::unique_ptr &&data_Ai, Operator *Ar, Operator *Ai); - // Steal the assembled parallel sparse matrix. The local operator is saved so that this - // object still can perform operations after this is called. - std::unique_ptr StealParallelAssemble() - { - SaveLocalOperator(); - ParallelAssemble(); - return std::move(RAP_); - } +public: + // Construct a complex operator which inherits ownership of the input real and imaginary + // parts. + ComplexWrapperOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai); + + // Non-owning constructor. + ComplexWrapperOperator(Operator *Ar, Operator *Ai); + + bool IsReal() const override { return Ai == nullptr; } + bool IsImag() const override { return Ar == nullptr; } + bool HasReal() const override { return Ar != nullptr; } + bool HasImag() const override { return Ai != nullptr; } + const Operator *Real() const override { return Ar; } + Operator *Real() override { return Ar; } + const Operator *Imag() const override { return Ai; } + Operator *Imag() override { return Ai; } + + using ComplexOperator::AddMult; + using ComplexOperator::AddMultHermitianTranspose; + using ComplexOperator::AddMultTranspose; + using ComplexOperator::Mult; + using ComplexOperator::MultHermitianTranspose; + using ComplexOperator::MultTranspose; + + void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override; + + void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override; + + void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, + bool zero_imag = false) const override; + + void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; +}; + +// Wrap a sequence of operators of the same dimensions and optional coefficients. +class SumOperator : public Operator +{ +private: + std::vector> ops; + +public: + SumOperator(int s) : Operator(s) {} + SumOperator(int h, int w) : Operator(h, w) {} + SumOperator(const Operator &op, double c = 1.0); - // Get the associated MPI communicator. - MPI_Comm GetComm() const { return trial_fespace_.GetComm(); } + void AddOperator(const Operator &op, double c = 1.0); void Mult(const Vector &x, Vector &y) const override; @@ -141,95 +190,164 @@ class ParOperator : public Operator }; // Wrap a sequence of operators of the same dimensions and optional coefficients. -class SumOperator : public Operator +class ComplexSumOperator : public ComplexOperator { private: - std::vector> ops_; + std::vector>> ops; public: - SumOperator(int s) : Operator(s) {} - SumOperator(int h, int w) : Operator(h, w) {} - SumOperator(const Operator &op, double c = 1.0) : Operator(op.Height(), op.Width()) - { - AddOperator(op, c); - } + ComplexSumOperator(int s) : ComplexOperator(s) {} + ComplexSumOperator(int h, int w) : ComplexOperator(h, w) {} + ComplexSumOperator(const ComplexOperator &op, std::complex c = 1.0); - void AddOperator(const Operator &op, double c = 1.0) - { - MFEM_VERIFY(op.Height() == height && op.Width() == width, - "Invalid Operator dimensions for SumOperator!"); - ops_.emplace_back(&op, c); - } + void AddOperator(const ComplexOperator &op, std::complex c = 1.0); - void Mult(const Vector &x, Vector &y) const override + bool IsReal() const override; + bool IsImag() const override; + + using ComplexOperator::AddMult; + using ComplexOperator::AddMultHermitianTranspose; + using ComplexOperator::AddMultTranspose; + using ComplexOperator::Mult; + using ComplexOperator::MultHermitianTranspose; + using ComplexOperator::MultTranspose; + + void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override; + + void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override; + + void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, + bool zero_imag = false) const override; + + void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; +}; + +// Wraps two operators such that: (AB)ᵀ = BᵀAᵀ and, for complex symmetric operators, the +// Hermitian transpose operation is (AB)ᴴ = BᴴAᴴ. +template +class ProductOperator : public OperType +{ +private: + typedef typename std::conditional::value, + ComplexVector, Vector>::type VecType; + + const OperType &A, &B; + mutable VecType z; + +public: + ProductOperator(const OperType &A, const OperType &B) + : OperType(A.Height(), B.Width()), A(A), B(B), z(B.Height()) { - y = 0.0; - AddMult(x, y); } - void MultTranspose(const Vector &x, Vector &y) const override + void Mult(const VecType &x, VecType &y) const override { - y = 0.0; - AddMultTranspose(x, y); + B.Mult(x, z); + A.Mult(z, y); } - void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override + void MultTranspose(const VecType &x, VecType &y) const override { - for (const auto &[op, c] : ops_) - { - op->AddMult(x, y, a * c); - } + A.MultTranspose(x, z); + B.MultTranspose(z, y); } - void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override + template ::value>> + void MultHermitianTranspose(const VecType &x, VecType &y) const override { - for (const auto &[op, c] : ops_) - { - op->AddMultTranspose(x, y, a * c); - } + A.MultHermitianTranspose(x, z); + B.MultHermitianTranspose(z, y); } }; -// Wraps two symmetric operators such that: (AB)ᵀ = BᵀAᵀ = BA. -class SymmetricProductOperator : public Operator +using ComplexProductOperator = ProductOperator; + +// Applies the simple (symmetric) operator: diag(d). +template +class DiagonalOperator : public OperType { private: - const Operator &A_, &B_; - mutable Vector z_; + typedef typename std::conditional::value, + ComplexVector, Vector>::type VecType; + + const VecType &d; public: - SymmetricProductOperator(const Operator &A, const Operator &B) - : Operator(A.Height(), B.Width()), A_(A), B_(B), z_(B_.Height()) + DiagonalOperator(const VecType &d) : OperType(d.Size()), d(d) {} + + void Mult(const VecType &x, VecType &y) const override; + + void MultTranspose(const VecType &x, VecType &y) const override { Mult(x, y); } + + template ::value>> + void MultHermitianTranspose(const VecType &x, VecType &y) const override; +}; + +using ComplexDiagonalOperator = DiagonalOperator; + +// A container for a sequence of operators corresponding to a multigrid hierarchy. +// Optionally includes operators for the auxiliary space at each level as well. The +// Operators are stored from coarsest to finest level. The height and width of this operator +// are never set. +template +class MultigridOperator : public OperType +{ +private: + typedef typename std::conditional::value, + ComplexVector, Vector>::type VecType; + + std::vector> ops, aux_ops; + +public: + MultigridOperator(int l) : OperType(0) { + ops.reserve(l); + aux_ops.reserve(l); } - void Mult(const Vector &x, Vector &y) const override + void AddOperator(std::unique_ptr &&op) { - B_.Mult(x, z_); - A_.Mult(z_, y); + ops.push_back(std::move(op)); + height = ops.back()->Height(); + width = ops.back()->Width(); } - void MultTranspose(const Vector &x, Vector &y) const override + void AddAuxiliaryOperator(std::unique_ptr &&aux_op) { - A_.Mult(x, z_); - B_.Mult(z_, y); + aux_ops.push_back(std::move(aux_op)); } -}; -// Applies the simple (symmetric) operator: diag(d). -class DiagonalOperator : public Operator -{ -private: - const Vector &d_; + bool HasAuxiliaryOperators() const { return !aux_ops.empty(); } -public: - DiagonalOperator(const Vector &d) : Operator(d.Size()), d_(d) {} + int GetNumLevels() const { return static_cast(ops.size()); } + int GetNumAuxiliaryLevels() const { return static_cast(aux_ops.size()); } - void Mult(const Vector &x, Vector &y) const override; + const OperType &GetFinestOperator() const { return *ops.back(); } + const OperType &GetFinestAuxiliaryOperator() const { return *aux_ops.back(); } - void MultTranspose(const Vector &x, Vector &y) const override { Mult(x, y); } + const OperType &GetOperatorAtLevel(int l) const { return *ops[l]; } + const OperType &GetAuxiliaryOperatorAtLevel(int l) const { return *aux_ops[l]; } + + void Mult(const VecType &x, VecType &y) const override { GetFinestOperator().Mult(x, y); } }; +using ComplexMultigridOperator = MultigridOperator; + namespace linalg { diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp new file mode 100644 index 000000000..1412fda53 --- /dev/null +++ b/palace/linalg/rap.cpp @@ -0,0 +1,742 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "rap.hpp" + +#include + +namespace palace +{ + +ParOperator::ParOperator(std::unique_ptr &&data_A, Operator *A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), + data_A(std::move(data_A)), A(this->data_A ? this->data_A.get() : A), + trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict), + dbc_tdof_list(nullptr), diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) +{ + MFEM_VERIFY(A, "Cannot construct ParOperator from an empty matrix!"); + lx.SetSize(A->Width()); + ly.SetSize(A->Height()); + ty.SetSize(width); +} + +ParOperator::ParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ParOperator(std::move(A), nullptr, trial_fespace, test_fespace, test_restrict), +{ +} + +ParOperator::ParOperator(Operator *A, const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ParOperator(nullptr, A, trial_fespace, test_fespace, test_restrict), +{ +} + +const Operator &ParOperator::LocalOperator() const +{ + MFEM_ASSERT(A, "No local matrix available for ParOperator::LocalOperator!"); + return *A; +} + +void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, + DiagonalPolicy policy); +{ + MFEM_VERIFY(policy == DiagonalPolicy::DIAG_ONE || policy == DiagonalPolicy::DIAG_ZERO, + "Essential boundary condition true dof elimination for ParOperator supports " + "only DiagonalPolicy::DIAG_ONE or DiagonalPolicy::DIAG_ZERO!"); + MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " + "for rectangular ParOperator!"); + dbc_tdof_list = &tdof_list; + diag_policy = policy; +} + +const mfem::Array *ParOperator::GetEssentialTrueDofs() const; +{ + return dbc_tdof_list; +} + +void ParOperator::AssembleDiagonal(Vector &diag) const +{ + // For an AMR mesh, a convergent diagonal is assembled with |P|ᵀ dₗ, where |P| has + // entry-wise absolute values of the conforming prolongation operator. + MFEM_VERIFY(&trial_fespace == &test_fespace, + "Diagonal assembly is only available for square ParOperator!"); + if (auto *bfA = dynamic_cast(A)) + { + if (bfA->HasSpMat()) + { + bfA->SpMat().GetDiag(ly); + } + else if (bfA->HasExt()) + { + bfA->Ext().AssembleDiagonal(ly); + } + else + { + MFEM_ABORT("Unable to assemble the local operator diagonal of BilinearForm!"); + } + } + else if (auto *sA = dynamic_cast(A)) + { + sA->GetDiag(ly); + } + else + { + MFEM_ABORT("ParOperator::AssembleDiagonal requires A as a BilinearForm or " + "SparseMatrix!"); + } + + const Operator *P = test_fespace.GetProlongationMatrix(); + if (const auto *hP = dynamic_cast(P)) + { + hP->AbsMultTranspose(1.0, ly, 0.0, diag); + } + else + { + P->MultTranspose(ly, diag); + } + + if (dbc_tdof_list) + { + if (diag_policy == DiagonalPolicy::DIAG_ONE) + { + diag.SetSubVector(*dbc_tdof_list, 1.0); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + { + diag.SetSubVector(*dbc_tdof_list, 0.0); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + } +} + +mfem::HypreParMatrix &ParOperator::ParallelAssemble() +{ + if (RAP) + { + return *RAP; + } + + // XX TODO: For mfem::AssemblyLevel::PARTIAL, we cannot use CeedOperatorFullAssemble for + // a ND space with p > 1. We should throw an error here that the user needs to + // use AssemblyLevel::LEGACY in this case. + + // Build the square or rectangular RAP HypreParMatrix. + if (&trial_fespace == &test_fespace) + { + mfem::SparseMatrix *lA; + bool own_lA = false; + if (auto *bfA = dynamic_cast(A)) + { +#ifdef MFEM_USE_CEED + if (bfA->HasSpMat()) + { + lA = &bfA->SpMat(); + } + else if (bfA->HasExt()) + { + lA = mfem::ceed::CeedOperatorFullAssemble(*bfA); + own_lA = true; + } + else + { + MFEM_ABORT("Unable to assemble the local operator for parallel assembly of " + "BilinearForm!"); + } +#else + MFEM_VERIFY(bfA->HasSpMat(), + "Missing assembled SparseMatrix for parallel assembly of BilinearForm!"); + lA = &bfA->SpMat(); +#endif + } + else if (auto *sA = dynamic_cast(A)) + { + lA = sA; + } + else + { + MFEM_ABORT("ParOperator::ParallelAssemble requires A as a BilinearForm or " + "SparseMatrix!"); + lA = nullptr; + } + mfem::HypreParMatrix *hA = + new mfem::HypreParMatrix(trial_fespace.GetComm(), trial_fespace.GlobalVSize(), + trial_fespace.GetDofOffsets(), lA); + const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); + RAP = std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); + delete hA; + if (own_lA) + { + delete lA; + } + } + else + { + mfem::SparseMatrix *lA; + bool own_lA = false; + if (auto *mbfA = dynamic_cast(A)) + { +#ifdef MFEM_USE_CEED + if (mbfA->HasSpMat()) + { + lA = &mbfA->SpMat(); + } + else if (bfA->HasExt()) + { + lA = mfem::ceed::CeedOperatorFullAssemble(*bfA); + own_lA = true; + } + else + { + MFEM_ABORT("Unable to assemble the local operator for parallel assembly of " + "MixedBilinearForm!"); + } +#else + MFEM_VERIFY( + mbfA->HasSpMat(), + "Missing assembled SparseMatrix for parallel assembly of MixedBilinearForm!"); + lA = &mbfA->SpMat(); +#endif + } + else if (auto *sA = dynamic_cast(A)) + { + lA = sA; + } + else + { + MFEM_ABORT("ParOperator::ParallelAssemble requires A as a MixedBilinearForm or " + "SparseMatrix!"); + lA = nullptr; + } + mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( + trial_fespace.GetComm(), test_fespace.GlobalVSize(), trial_fespace.GlobalVSize(), + test_fespace.GetDofOffsets(), trial_fespace.GetDofOffsets(), lA); + const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); + if (!use_R) + { + const mfem::HypreParMatrix *Rt = test_fespace.Dof_TrueDof_Matrix(); + RAP = + std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), true); + } + else + { + mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace.GetRestrictionMatrix()); + mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( + test_fespace.GetComm(), test_fespace.GlobalVSize(), + test_fespace.GlobalTrueVSize(), test_fespace.GetDofOffsets(), + test_fespace.GetTrueDofOffsets(), sRt); + RAP = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), + true); + delete sRt; + delete hRt; + } + delete hA; + if (own_lA) + { + delete lA; + } + } + hypre_ParCSRMatrixSetNumNonzeros(*RAP); + + // Eliminate boundary conditions on the assembled (square) matrix. + if (dbc_tdof_list) + { + MFEM_VERIFY( + &trial_fespace == &test_fespace, + "Only square ParOperator should have same trial and test eliminated tdofs!"); + RAP->EliminateBC(*dbc_tdof_list, diag_policy); + } + return *RAP; +} + +void ParOperator::EliminateRHS(const Vector &x, Vector &b) const +{ + if (!dbc_tdof_list) + { + return; + } + + MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); + ty = 0.0; + { + const int N = dbc_tdof_list->Size(); + const auto *idx = dbc_tdof_list->Read(); + const auto *X = x.Read(); + auto *TY = ty.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TY[id] = X[id]; + }); + } + + // Apply the unconstrained operator. + trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + A->Mult(lx, ly); + + if (!use_R) + { + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, b, -1.0); + } + else + { + test_fespace.GetRestrictionMatrix()->AddMult(ly, b, -1.0); + } + if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + { + const int N = dbc_tdof_list->Size(); + const auto *idx = dbc_tdof_list->Read(); + const auto *X = x.Read(); + auto *B = b.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + B[id] = X[id]; + }); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + { + b.SetSubVector(*dbc_tdof_list, 0.0); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } +} + +void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const +{ + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ParOperator::AddMult!"); + if (dbc_tdof_list) + { + ty = x; + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + trial_fespace.GetProlongationMatrix()->Mult(dbc_tdof_list ? ty : x, lx); + + // Apply the operator on the L-vector. + A->Mult(lx, ly); + + if (dbc_tdof_list) + { + if (!use_R) + { + test_fespace.GetProlongationMatrix()->MultTranspose(ly, ty); + } + else + { + test_fespace.GetRestrictionMatrix()->Mult(ly, ty); + } + if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + { + const int N = dbc_tdof_list->Size(); + const auto *idx = dbc_tdof_list->Read(); + const auto *X = x.Read(); + auto *TY = ty.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TY[id] = X[id]; + }); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + { + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + y.Add(a, ty); + } + else + { + if (!use_R) + { + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, y, a); + } + else + { + test_fespace.GetRestrictionMatrix()->AddMult(ly, y, a); + } + } +} + +void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const +{ + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ParOperator::AddMultTranspose!"); + if (dbc_tdof_list) + { + ty = x; + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + if (!use_R) + { + test_fespace.GetProlongationMatrix()->Mult(dbc_tdof_list ? ty : x, ly); + } + else + { + test_fespace.GetRestrictionMatrix()->MultTranspose(dbc_tdof_list ? ty : x, ly); + } + + // Apply the operator on the L-vector. + A->MultTranspose(ly, lx); + + if (dbc_tdof_list) + { + trial_fespace.GetProlongationMatrix()->MultTranspose(lx, ty); + if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + { + const int N = dbc_tdof_list->Size(); + const auto *idx = dbc_tdof_list->Read(); + const auto *X = x.Read(); + auto *TY = ty.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TY[id] = X[id]; + }); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + { + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + y.Add(a, ty); + } + else + { + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx, y, a); + } +} + +ComplexParOperator::ComplexParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ComplexOperator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), + data_A(std::move(A)), A(data_A.get()), trial_fespace(trial_fespace), + test_fespace(test_fespace), use_R(test_restrict), dbc_tdof_list(nullptr), + diag_policy(DiagonalPolicy::DIAG_ONE) +{ + lx.SetSize(A->Width()); + ly.SetSize(A->Height()); + ty.SetSize(width); +} + +ComplexParOperator::ComplexParOperator(std::unique_ptr &&Ar, + std::unique_ptr &&Ai, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ComplexParOperator( + std::make_unique(std::move(Ar), std::move(Ai)), + trial_fespace, test_fespace, test_restrict); +{ + // Non-owning constructors for real and imaginary part ParOperators. We know A is a + // ComplexWrapperOperator which has separate access to the real and imaginary components. + if (A->HasReal()) + { + RAPr = std::make_unique(A->Real(), trial_fespace, test_fespace, use_R); + } + if (A->HasImag()) + { + RAPi = std::make_unique(A->Imag(), trial_fespace, test_fespace, use_R); + } +} + +const ComplexOperator &ComplexParOperator::LocalOperator() const +{ + MFEM_ASSERT(A, "No local matrix available for ComplexParOperator::LocalOperator!"); + return *A; +} + +void ComplexParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, + DiagonalPolicy policy) +{ + MFEM_VERIFY(policy == DiagonalPolicy::DIAG_ONE || policy == DiagonalPolicy::DIAG_ZERO, + "Essential boundary condition true dof elimination for ComplexParOperator " + "supports only DiagonalPolicy::DIAG_ONE or DiagonalPolicy::DIAG_ZERO!"); + MFEM_VERIFY( + policy != DiagonalPolicy::DIAG_ONE || RAPr, + "DiagonalPolicy::DIAG_ONE specified for ComplexParOperator with no real part!"); + MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " + "for rectangular ComplexParOperator!"); + dbc_tdof_list = &tdof_list; + diag_policy = policy; + if (RAPr) + { + RAPr->SetEssentialTrueDofs(tdof_list, policy); + } + if (RAPi) + { + RAPi->SetEssentialTrueDofs(tdof_list, DiagonalPolicy::DIAG_ZERO); + } +} + +const mfem::Array *ComplexParOperator::GetEssentialTrueDofs() const +{ + return dbc_tdof_list; +} + +void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a, bool zero_real, + bool zero_imag) const +{ + MFEM_ASSERT(xr.Size() == width && xi.Size() == width && yr.Size() == height && + yi.Size() == height, + "Incompatible dimensions for ComplexParOperator::AddMult!"); + if (dbc_tdof_list) + { + ty.Real() = xr; + ty.Imag() = xi; + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + if (!zero_real) + { + trial_fespace.GetProlongationMatrix()->Mult(dbc_tdof_list ? ty.Real() : xr, lx.Real()); + } + if (!zero_imag) + { + trial_fespace.GetProlongationMatrix()->Mult(dbc_tdof_list ? ty.Imag() : xi, lx.Imag()); + } + + // Apply the operator on the L-vector. + ly = 0.0; + A->AddMult(lx.Real(), lx.Imag(), ly.Real(), ly.Imag(), a, zero_real, zero_imag); + + if (dbc_tdof_list) + { + if (!use_R) + { + test_fespace.GetProlongationMatrix()->MultTranspose(ly.Real(), ty.Real()); + test_fespace.GetProlongationMatrix()->MultTranspose(ly.Imag(), ty.Imag()); + } + else + { + test_fespace.GetRestrictionMatrix()->Mult(ly.Real(), ty.Real()); + test_fespace.GetRestrictionMatrix()->Mult(ly.Imag(), ty.Imag()); + } + if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + { + const int N = dbc_tdof_list->Size(); + const auto *idx = dbc_tdof_list->Read(); + const auto *XR = xr.Read(); + const auto *XI = xi.Read(); + auto *TYR = ty.Real().ReadWrite(); + auto *TYI = ty.Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TYR[id] = XR[id]; + TYI[id] = XI[id]; + }); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + { + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + yr += ty.Real(); + yi += ty.Imag(); + } + else + { + if (!use_R) + { + test_fespace.GetProlongationMatrix()->MultTranspose(ly.Real(), yr); + test_fespace.GetProlongationMatrix()->MultTranspose(ly.Imag(), yi); + } + else + { + test_fespace.GetRestrictionMatrix()->Mult(ly.Real(), yr); + test_fespace.GetRestrictionMatrix()->Mult(ly.Imag(), yi); + } + } +} + +void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, + Vector &yi, const std::complex a, + bool zero_real, bool zero_imag) const +{ + MFEM_ASSERT(xr.Size() == height && xi.Size() == height && yr.Size() == width && + yi.Size() == width, + "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); + if (dbc_tdof_list) + { + ty.Real() = xr; + ty.Imag() = xi; + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + if (!use_R) + { + if (!zero_real) + { + test_fespace.GetProlongationMatrix()->Mult(dbc_tdof_list ? ty.Real() : xr, ly.Real()); + } + if (!zero_imag) + { + test_fespace.GetProlongationMatrix()->Mult(dbc_tdof_list ? ty.Imag() : xi, ly.Imag()); + } + } + else + { + if (!zero_real) + { + test_fespace.GetRestrictionMatrix()->MultTranspose(dbc_tdof_list ? ty.Real() : xr, + ly.Real()); + } + if (!zero_imag) + { + test_fespace.GetRestrictionMatrix()->MultTranspose(dbc_tdof_list ? ty.Imag() : xi, + ly.Imag()); + } + } + + // Apply the operator on the L-vector. + lx = 0.0; + A->AddMultTranspose(ly.Real(), ly.Imag(), lx.Real(), lx.Imag(), a, zero_real, zero_imag); + + if (dbc_tdof_list) + { + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); + if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + { + const int N = dbc_tdof_list->Size(); + const auto *idx = dbc_tdof_list->Read(); + const auto *XR = xr.Read(); + const auto *XI = xi.Read(); + auto *TYR = ty.Real().ReadWrite(); + auto *TYI = ty.Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TYR[id] = XR[id]; + TYI[id] = XI[id]; + }); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + { + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + yr += ty.Real(); + yi += ty.Imag(); + } + else + { + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), yr); + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), yi); + } +} + +void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, + Vector &yr, Vector &yi, + const std::complex a, + bool zero_real, bool zero_imag) const +{ + MFEM_ASSERT(xr.Size() == height && xi.Size() == height && yr.Size() == width && + yi.Size() == width, + "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); + if (dbc_tdof_list) + { + ty.Real() = xr; + ty.Imag() = xi; + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + if (!use_R) + { + if (!zero_real) + { + test_fespace.GetProlongationMatrix()->Mult(dbc_tdof_list ? ty.Real() : xr, ly.Real()); + } + if (!zero_imag) + { + test_fespace.GetProlongationMatrix()->Mult(dbc_tdof_list ? ty.Imag() : xi, ly.Imag()); + } + } + else + { + if (!zero_real) + { + test_fespace.GetRestrictionMatrix()->MultTranspose(dbc_tdof_list ? ty.Real() : xr, + ly.Real()); + } + if (!zero_imag) + { + test_fespace.GetRestrictionMatrix()->MultTranspose(dbc_tdof_list ? ty.Imag() : xi, + ly.Imag()); + } + } + + // Apply the operator on the L-vector. + lx = 0.0; + A->AddMultHermitianTranspose(ly.Real(), ly.Imag(), lx.Real(), lx.Imag(), a, zero_real, + zero_imag); + + if (dbc_tdof_list) + { + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); + if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + { + const int N = dbc_tdof_list->Size(); + const auto *idx = dbc_tdof_list->Read(); + const auto *XR = xr.Read(); + const auto *XI = xi.Read(); + auto *TYR = ty.Real().ReadWrite(); + auto *TYI = ty.Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + TYR[id] = XR[id]; + TYI[id] = XI[id]; + }); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + { + ty.SetSubVector(*dbc_tdof_list, 0.0); + } + else + { + MFEM_ABORT("Unsupported Operator::DiagonalPolicy for ParOperator!"); + } + yr += ty.Real(); + yi += ty.Imag(); + } + else + { + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), yr); + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), yi); + } +} + +} // namespace palace diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp new file mode 100644 index 000000000..491364712 --- /dev/null +++ b/palace/linalg/rap.hpp @@ -0,0 +1,227 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_RAP_HPP +#define PALACE_LINALG_RAP_HPP + +#include +#include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// A parallel operators represented by RAP constructed through the actions of R, A, and P, +// usually with R = Pᵀ, and with possible eliminated essential BC. +// + +// Real-valued RAP operator. +class ParOperator : public Operator +{ +private: + // Storage and access for the local operator. + std::unique_ptr data_A; + Operator *A; + + // Finite element spaces for parallel prolongation and restriction. + const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; + const bool use_R; + + // Lists of constrained essential boundary true dofs for elimination. + const mfem::Array *dbc_tdof_list; + + // Diagonal policy for constrained true dofs. + DiagonalPolicy diag_policy; + + // Assembled operator as a parallel Hypre matrix. If assembled, the local operator is not + // deleted. + std::unique_ptr RAP; + + // Temporary storage for operator application. + mutable Vector lx, ly, ty; + + ParOperator(std::unique_ptr &&data_A, Operator *A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + +public: + // Construct the parallel operator, inheriting ownership of the local operator. + ParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &fespace) + : ParOperator(std::move(A), fespace, fespace, false) + { + } + + // Non-owning constructors. + ParOperator(Operator *A, const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(Operator *A, const mfem::ParFiniteElementSpace &fespace) + : ParOperator(A, fespace, fespace, false) + { + } + + // Get access to the underlying local (L-vector) operator. + const Operator &LocalOperator() const; + + // Set essential boundary condition true dofs for square operators. + void SetEssentialTrueDofs(const mfem::Array &tdof_list, DiagonalPolicy policy); + + // Get the essential boundary condition true dofs associated with the operator. May be + // nullptr. + const mfem::Array *GetEssentialTrueDofs() const; + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return trial_fespace.GetComm(); } + + // Assemble the diagonal for the parallel operator. + void AssembleDiagonal(Vector &diag) const override; + + // Assemble the operator as a parallel sparse matrix. The memory associated with the + // local operator is not freed. + mfem::HypreParMatrix &ParallelAssemble(); + + // Steal the assembled parallel sparse matrix. + std::unique_ptr StealParallelAssemble() + { + ParallelAssemble(); + return std::move(RAP); + } + + // Eliminate essential true dofs from the RHS vector b, using the essential boundary + // condition values in x. + void EliminateRHS(const Vector &x, Vector &b) const; + + void Mult(const Vector &x, Vector &y) const override + { + y = 0.0; + AddMult(x, y); + } + + void MultTranspose(const Vector &x, Vector &y) const override + { + y = 0.0; + AddMultTranspose(x, y); + } + + void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override; + + void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override; +}; + +// Complex-valued RAP operator. +class ComplexParOperator : public ComplexOperator +{ +private: + // Storage and access for the local operator. + std::unique_ptr data_A; + ComplexOperator *A; + + // Finite element spaces for parallel prolongation and restriction. + const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; + const bool use_R; + + // Lists of constrained essential boundary true dofs for elimination. + mutable const mfem::Array *dbc_tdof_list; + + // Diagonal policy for constrained true dofs. + DiagonalPolicy diag_policy; + + // Real and imaginary parts of the operator as non-owning ParOperator objects. + std::unique_ptr RAPr, RAPi; + + // Temporary storage for operator application. + mutable ComplexVector lx, ly, ty; + +public: + // Construct the complex-valued parallel operator, inheriting ownership of the local + // operator. + ComplexParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ComplexParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &fespace) + : ComplexParOperator(std::move(Ar), std::move(A), fespace, fespace, false) + { + } + ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, + const mfem::ParFiniteElementSpace &fespace) + : ComplexParOperator(std::move(Ar), std::move(Ai), fespace, fespace, false) + { + } + + // Get access to the underlying local (L-vector) operator. + const ComplexOperator &LocalOperator() const; + + // Set essential boundary condition true dofs for square operators. + void SetEssentialTrueDofs(const mfem::Array &tdof_list, DiagonalPolicy policy); + + // Get the essential boundary condition true dofs associated with the operator. May be + // nullptr. + const mfem::Array *GetEssentialTrueDofs() const; + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return trial_fespace.GetComm(); } + + bool IsReal() const override { return A->IsReal(); } + bool IsImag() const override { return A->IsImag(); } + bool HasReal() const override { return RAPr != nullptr; } + bool HasImag() const override { return RAPi != nullptr; } + const Operator *Real() const override { return RAPr.get(); } + Operator *Real() override { return RAPr.get(); } + const Operator *Imag() const override { return RAPi.get(); } + Operator *Imag() override { return RAPi.get(); } + + using ComplexOperator::AddMult; + using ComplexOperator::AddMultHermitianTranspose; + using ComplexOperator::AddMultTranspose; + using ComplexOperator::Mult; + using ComplexOperator::MultHermitianTranspose; + using ComplexOperator::MultTranspose; + + void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMult(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMultTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + bool zero_real = false, bool zero_imag = false) const override + { + yr = 0.0; + yi = 0.0; + AddMultHermitianTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + } + + void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; + + void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, + const std::complex a = 1.0, bool zero_real = false, + bool zero_imag = false) const override; +}; + +} // namespace palace + +#endif // PALACE_LINALG_RAP_HPP diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index 3d585f864..1fd893796 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -3,27 +3,344 @@ #include "vector.hpp" +#include +#include #include -#include "linalg/complex.hpp" -#include "linalg/operator.hpp" -#include "utils/communication.hpp" -namespace palace::linalg +namespace palace { -HYPRE_BigInt GlobalSize(MPI_Comm comm, const Vector &x) +ComplexVector::ComplexVector(int n) : x(2 * n); { - HYPRE_BigInt N = x.Size(); - Mpi::GlobalSum(1, &N, comm); - return N; + xr.MakeRef(x, 0, n); + xi.MakeRef(x, n, n); } +ComplexVector::ComplexVector(const ComplexVector &y) : x(2 * x.Size()) +{ + xr.MakeRef(x, 0, y.Size()); + xi.MakeRef(x, y.Size(), y.Size()); + Set(y.Real(), y.Imag()); +} + +ComplexVector::ComplexVector(const Vector &xr, const Vector &xi) : x(2 * xr.Size()) +{ + MFEM_VERIFY(yr.Size() == yi.Size(), + "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); + xr.MakeRef(x, 0, yr.Size()); + xi.MakeRef(x, yr.Size(), yr.Size()); + Set(yr, yi); +} + +ComplexVector::ComplexVector(const std::complex *py, int n) : x(2 * n) +{ + xr.MakeRef(x, 0, n); + xi.MakeRef(x, n, n); + Set(py, n); +} + +void ComplexVector::SetSize(int n) +{ + x.SetSize(2 * n); + xr.MakeRef(x, 0, n); + xi.MakeRef(x, n, n); +} + +ComplexVector &ComplexVector::operator=(const ComplexVector &y) +{ + Set(y.Real(), y.Imag()); + return *this; +} + +void ComplexVector::Set(const Vector &yr, const Vector &yi) +{ + MFEM_VERIFY(yr.Size() == yi.Size() && yr.Size() == Size(), + "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); + Real() = yr; + Imag() = yi; +} + +void ComplexVector::Set(const std::complex *py, int n) +{ + MFEM_VERIFY(n == Size(), + "Mismatch in dimension for array of std::complex in ComplexVector!"); + Vector y(reinterpret_cast(const_cast *>(py)), 2 * n); + const int N = n; + const auto *Y = y.Read(); + auto *XR = Real().Write(); + auto *XI = Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + XR[i] = Y[2 * i]; + XI[i] = Y[2 * i + 1]; + }); +} + +void ComplexVector::Get(std::complex *py, int n) const +{ + MFEM_VERIFY(n == Size(), + "Mismatch in dimension for array of std::complex in ComplexVector!"); + Vector y(reinterpret_cast(py), 2 * n); + const int N = n; + const auto *XR = Real().Read(); + const auto *XI = Imag().Read(); + auto *Y = y.Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + Y[2 * i] = XR[i]; + Y[2 * i + 1] = XI[i]; + }); + y.HostReadWrite(); +} + +void ComplexVector::Conj() +{ + Imag() *= -1.0; +} + +ComplexVector &ComplexVector::operator=(std::complex s) +{ + Real() = s.real(); + Imag() = s.imag(); + return *this; +} + +ComplexVector &ComplexVector::operator*=(std::complex s) +{ + const double sr = s.real(); + const double si = s.imag(); + if (si == 0.0) + { + Real() *= sr; + Imag() *= sr; + } + else + { + const int N = Size(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = si * XR[i] + sr * XI[i]; + XR[i] = sr * XR[i] - si * XI[i]; + XI[i] = t; + }); + } + return *this; +} + +void ComplexVector::Reciprocal(bool abs) +{ + const int N = Size(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + if (abs) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = 1.0 / std::sqrt(XR[i] * XR[i] + XI[i] * XI[i]); + XR[i] = t; + XI[i] = 0.0; + }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const std::complex t = 1.0 / std::complex(XR[i], XI[i]); + XR[i] = t.real(); + XI[i] = t.imag(); + }); + } +} + +void ComplexVector::SetSubVector(const mfem::Array &rows, std::complex s) +{ + const int N = dofs.Size(); + const double sr = s.real(); + const double si = s.imag(); + const auto *idx = dofs.Read(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + XR[id] = sr; + }); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + XI[id] = si; + }); +} + +std::complex ComplexVector::Dot(const ComplexVector &y) const +{ + return {(Real() * y.Real()) + (Imag() * y.Imag()), + (Imag() * y.Real()) - (Real() * y.Imag())}; +} + +std::complex ComplexVector::TransposeDot(const ComplexVector &y) const +{ + return {(Real() * y.Real()) - (Imag() * y.Imag()), + (Imag() * y.Real()) + (Real() * y.Imag())}; +} + +void ComplexVector::AXPY(std::complex alpha, const ComplexVector &x) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + auto *YR = Real().ReadWrite(); + auto *YI = Imag().ReadWrite(); + if (ai == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YR[i] += ar * XR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YI[i] += ar * XI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * XR[i] - ai * XI[i]; + YI[i] += ai * XR[i] + ar * XI[i]; + }); + } +} + +void ComplexVector::AXPBY(std::complex alpha, const ComplexVector &x, + std::complex beta) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + auto *YR = Real().ReadWrite(); + auto *YI = Imag().ReadWrite(); + if (beta == 0.0) + { + if (ai == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YR[i] = ar * XR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YI[i] = ar * XI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] = ar * XR[i] - ai * XI[i]; + YI[i] = ai * XR[i] + ar * XI[i]; + }); + } + } + else + { + const double br = beta.real(); + const double bi = beta.imag(); + if (ai == 0.0 && bi == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YR[i] = ar * XR[i] + br * YR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YI[i] = ar * XI[i] + br * YI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = bi * YR[i] + br * YI[i]; + YR[i] = ar * XR[i] - ai * XI[i] + br * YR[i] - bi * YI[i]; + YI[i] = ai * XR[i] + ar * XI[i] + t; + }); + } + } +} + +void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &x, + std::complex beta, const ComplexVector &y, + std::complex gamma) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + const double br = beta.real(); + const double bi = beta.imag(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + const auto *YR = y.Real().Read(); + const auto *YI = y.Imag().Read(); + auto *ZR = Real().Write(); + auto *ZI = Imag().Write(); + if (gamma == 0.0) + { + if (ai == 0.0 && bi == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { ZR[i] = ar * XR[i] + br * YR[i] }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { ZI[i] = ar * XI[i] + br * YI[i] }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + ZR[i] = ar * XR[i] - ai * XI[i] + br * YR[i] - bi * YI[i]; + ZI[i] = ai * XR[i] + ar * XI[i] + bi * YR[i] + br * YI[i]; + }); + } + } + else + { + const double gr = gamma.real(); + const double gi = gamma.imag(); + if (ai == 0.0 && bi == 0.0 && gi == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { ZR[i] = ar * XR[i] + br * YR[i] + gr * ZR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { ZI[i] = ar * XI[i] + br * YI[i] + gr * ZI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = gi * ZR[i] + gr * ZI[i]; + ZR[i] = ar * XR[i] - ai * XI[i] + br * YR[i] - bi * YI[i] + + gr * ZR[i] - gi * ZI[i]; + ZI[i] = ai * XR[i] + ar * XI[i] + bi * YR[i] + br * YI[i] + t; + }); + } + } +} + +namespace linalg +{ + +template <> void SetRandom(MPI_Comm comm, Vector &x, int seed) { - seed *= Mpi::Rank(comm) + 1; + if (seed == 0) + { + std::vector seeds(1); + std::seed_seq seed_gen{Mpi::Rank(comm)}; + seed_gen.generate(seeds.begin(), seeds.end()); + seed = static_cast(seeds[0]); + } x.Randomize(seed); } +template <> void SetRandomSign(MPI_Comm comm, Vector &x, int seed) { SetRandom(comm, x, seed); @@ -33,51 +350,94 @@ void SetRandomSign(MPI_Comm comm, Vector &x, int seed) { X[i] = (X[i] > 0.0) ? 1.0 : ((X[i] < 0.0) ? -1.0 : 0.0); }); } -double Norml2(MPI_Comm comm, const Vector &x) -{ - return std::sqrt(mfem::InnerProduct(comm, x, x)); -} - -double Normlinf(MPI_Comm comm, const Vector &x) +template <> +void SetRandom(MPI_Comm comm, ComplexVector &x, int seed) { - double norm = x.Normlinf(); - Mpi::GlobalMax(1, &norm, comm); - return norm; + if (seed == 0) + { + std::vector seeds(2); + std::seed_seq seed_gen{2 * Mpi::Rank(comm), 2 * Mpi::Rank(comm) + 1}; + seed_gen.generate(seeds.begin(), seeds.end()); + SetRandom(comm, x.Real(), static_cast(seeds[0])); + SetRandom(comm, x.Imag(), static_cast(seeds[1])); + } + else + { + SetRandom(comm, x.Real(), seed); + SetRandom(comm, x.Imag(), seed); + } } -double Norml1(MPI_Comm comm, const Vector &x) +template <> +void SetRandomSign(MPI_Comm comm, ComplexVector &x, int seed) { - double norm = x.Norml1(); - Mpi::GlobalSum(1, &norm, comm); - return norm; -} - -double Normalize(MPI_Comm comm, Vector &x) -{ - double norm = Norml2(comm, x); - MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); - x *= 1.0 / norm; - return norm; + SetRandom(comm, x, seed); + const int N = x.Size(); + auto *XR = x.Real().ReadWrite(); + auto *XI = x.Imag().ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { XR[i] = (XR[i] > 0.0) ? 1.0 : ((XR[i] < 0.0) ? -1.0 : 0.0); }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { XI[i] = (XI[i] > 0.0) ? 1.0 : ((XI[i] < 0.0) ? -1.0 : 0.0); }); } +template <> double Normalize(MPI_Comm comm, Vector &x, const Operator &B, Vector &Bx) { B.Mult(x, Bx); - double norm = std::sqrt(mfem::InnerProduct(comm, x, Bx)); - MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + double dot = Dot(comm, x, Bx); + MFEM_ASSERT(dot > 0.0, "Non-positive vector norm in normalization!"); + double norm = std::sqrt(dot); x *= 1.0 / norm; return norm; } +template <> double Normalize(MPI_Comm comm, ComplexVector &x, const Operator &B, ComplexVector &Bx) { // For SPD B, xᴴ B x is real. B.Mult(x.Real(), Bx.Real()); B.Mult(x.Imag(), Bx.Imag()); - double norm = std::sqrt(mfem::InnerProduct(comm, x, Bx)); - MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + std::complex dot = Dot(comm, x, Bx); + MFEM_ASSERT(dot.real() > 0.0 && dot.imag() == 0.0, + "Non-positive vector norm in normalization!"); + double norm = std::sqrt(dot.real()); x *= 1.0 / norm; return norm; } -} // namespace palace::linalg +template <> +void AXPY(double alpha, const Vector &x, Vector &y) +{ + if (alpha == 1.0) + { + y += x; + } + else + { + y.Add(alpha, x); + } +} + +template <> +void AXPY(std::complex alpha, const ComplexVector &x, ComplexVector &y) +{ + y.AXPY(alpha, x); +} + +template <> +void AXPBY(double alpha, const Vector &x, double beta, Vector &y) +{ + Vector::add(alpha, x, beta, y, y); +} + +template <> +void AXPBY(std::complex alpha, const ComplexVector &x, std::complex beta, + ComplexVector &y) +{ + y.AXPBY(alpha, x, beta); +} + +} // namespace linalg + +} // namespace palace diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp index 4f4235f46..3d5b5d4e9 100644 --- a/palace/linalg/vector.hpp +++ b/palace/linalg/vector.hpp @@ -4,44 +4,159 @@ #ifndef PALACE_LINALG_VECTOR_HPP #define PALACE_LINALG_VECTOR_HPP -#include +#include #include +#include "utils/communication.hpp" namespace palace { -class ComplexVector; - using Operator = mfem::Operator; using Vector = mfem::Vector; -namespace linalg -{ - // -// Basic functions for parallel vectors distributed across MPI processes. +// Functionality extending mfem::Vector from MFEM, including basic functions for parallel +// vectors distributed across MPI processes. // -// Returns the global vector size. -HYPRE_BigInt GlobalSize(MPI_Comm comm, const Vector &x); +// A complex-valued vector represented as two real vectors, one for each component. +class ComplexVector +{ +private: + Vector x, xr, xi; -// Sets all entries of the vector to random numbers sampled from the [-1, 1]. -void SetRandom(MPI_Comm comm, Vector &x, int seed = 0); -void SetRandomSign(MPI_Comm comm, Vector &x, int seed = 0); +public: + // Create a vector with the given size. + ComplexVector(int n = 0); -// Calculate the vector 2-norm. -double Norml2(MPI_Comm comm, const Vector &x); + // Copy constructor. + ComplexVector(const ComplexVector &y); + + // Copy constructor from separately provided real and imaginary parts. + ComplexVector(const Vector &yr, const Vector &yi); + + // Copy constructor from an array of complex values. + ComplexVector(const std::complex *py, int n); + + // Return the size of the vector. + int Size() const { return x.Size() / 2; } + + // Set the size of the vector. See the notes for Vector::SetSize for behavior in the cases + // where n is less than or greater than Size() or Capacity(). + void SetSize(int n); + + // Get access to the real and imaginary vector parts. + const Vector &Real() const { return xr; } + Vector &Real() { return xr; } + const Vector &Imag() const { return xi; } + Vector &Imag() { return xi; } + + // Copy assignment operator. This should probably not be used to modify the size of the + // vector. + ComplexVector &operator=(const ComplexVector &y); + + // Copy assignment from separately provided real and imaginary parts. + void Set(const Vector &yr, const Vector &yi); + + // Copy assignment from an array of complex values. + void Set(const std::complex *py, int n); + + // Copy the vector into an array of complex values. + void Get(std::complex *py, int n) const; + + // Replace entries with complex conjugate. + void Conj(); -// Calculate the vector infinity-norm. -double Normlinf(MPI_Comm comm, const Vector &x); + // Set all entries equal to s. + ComplexVector &operator=(std::complex s); -// Calculate the vector 1-norm. -double Norml1(MPI_Comm comm, const Vector &x); + // Scale all entries by s. + ComplexVector &operator*=(std::complex s); + + // Set all entries to their reciprocal. + void Reciprocal(bool abs = false); + + // Set the entries listed the given array to value. All entries in the list should be + // non-negative. + void SetSubVector(const Array &rows, std::complex s); + + // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. + std::complex Dot(const ComplexVector &y) const; + std::complex TransposeDot(const ComplexVector &y) const; + std::complex operator*(const ComplexVector &y) const { return Dot(y); } + + // In-place addition (*this) += alpha * x. + void AXPY(std::complex alpha, const ComplexVector &x); + void Add(std::complex alpha, const ComplexVector &x) { AXPY(alpha, x); } + ComplexVector &operator+=(const ComplexVector &x) + { + AXPY(1.0, x); + return *this; + } + + // In-place addition (*this) = alpha * x + beta * (*this). + void AXPBY(std::complex alpha, const ComplexVector &x, std::complex beta); + + // In-place addition (*this) = alpha * x + beta * y + gamma * (*this). + void AXPBYPCZ(std::complex alpha, const ComplexVector &x, + std::complex beta, const ComplexVector &y, + std::complex gamma); +}; + +namespace linalg +{ + +// Returns the global vector size. +template +inline HYPRE_BigInt GlobalSize(MPI_Comm comm, const VecType &x) +{ + HYPRE_BigInt N = x.Size(); + Mpi::GlobalSum(1, &N, comm); + return N; +} + +// Sets all entries of the vector to random numbers sampled from the [-1, 1] or [-1 - 1i, +// 1 + 1i] for complex-valued vectors. +template +void SetRandom(MPI_Comm comm, VecType &x, int seed = 0); +template +void SetRandomSign(MPI_Comm comm, VecType &x, int seed = 0); + +// Calculate the inner product yᴴ x or yᵀ x. +template +inline ScalarType Dot(MPI_Comm comm, const VecType &x, const VecType &y) +{ + ScalarType dot = x * y; + Mpi::GlobalSum(1, &dot, comm); + return dot; +} + +// Calculate the vector 2-norm. +template +inline double Norml2(MPI_Comm comm, const VecType &x) +{ + return std::sqrt(std::abs(Dot(comm, x, x))); +} // Normalize the vector, possibly with respect to an SPD matrix B. -double Normalize(MPI_Comm comm, Vector &x); -double Normalize(MPI_Comm comm, Vector &x, const Operator &B, Vector &Bx); -double Normalize(MPI_Comm comm, ComplexVector &x, const Operator &B, ComplexVector &Bx); +template +inline double Normalize(MPI_Comm comm, VecType &x) +{ + double norm = Norml2(comm, x); + MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + x *= 1.0 / norm; + return norm; +} +template +double Normalize(MPI_Comm comm, VecType &x, const Operator &B, VecType &Bx); + +// Addition y += alpha * x. +template +void AXPY(ScalarType alpha, const VecType &x, VecType &y); + +// Addition y = alpha * x + beta * y. +template +void AXPBY(ScalarType alpha, const VecType &x, ScalarType beta, VecType &y); } // namespace linalg diff --git a/palace/models/domainpostoperator.hpp b/palace/models/domainpostoperator.hpp index e017b6f9f..b3dd33c31 100644 --- a/palace/models/domainpostoperator.hpp +++ b/palace/models/domainpostoperator.hpp @@ -8,7 +8,6 @@ #include #include #include -#include "linalg/vector.hpp" namespace palace { @@ -27,7 +26,7 @@ class DomainPostOperator std::map> M_NDi; // Temporary vectors for inner product calculations. - mutable Vector D, H; + mutable mfem::Vector D, H; public: DomainPostOperator(const IoData &iodata, const MaterialOperator &mat_op, diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp index a790b5c02..bbd807a2b 100644 --- a/palace/models/postoperator.hpp +++ b/palace/models/postoperator.hpp @@ -12,6 +12,7 @@ #include #include #include "fem/interpolation.hpp" +#include "linalg/operator.hpp" #include "linalg/vector.hpp" #include "models/domainpostoperator.hpp" #include "models/surfacepostoperator.hpp" From 91e4b79022e30cb60d4e36682a4095949dffabbd Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Wed, 24 May 2023 12:54:43 -0700 Subject: [PATCH 14/41] WIP: Updates to operator construction in SpaceOperator, LaplaceOperator, CurlCurlOperator after refactor --- palace/models/curlcurloperator.cpp | 13 +- palace/models/curlcurloperator.hpp | 7 +- palace/models/laplaceoperator.cpp | 23 +- palace/models/laplaceoperator.hpp | 9 +- palace/models/spaceoperator.cpp | 632 ++++++++++++++++++----------- palace/models/spaceoperator.hpp | 85 ++-- 6 files changed, 458 insertions(+), 311 deletions(-) diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index 5733f3514..b0689fb93 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -112,7 +112,7 @@ void CurlCurlOperator::CheckBoundaryProperties() } } -void CurlCurlOperator::GetStiffnessMatrix(std::vector> &K) +std::unique_ptr CurlCurlOperator::GetStiffnessMatrix() { if (print_hdr) { @@ -122,8 +122,7 @@ void CurlCurlOperator::GetStiffnessMatrix(std::vector(nd_fespaces.GetNumLevels()); for (int l = 0; l < nd_fespaces.GetNumLevels(); l++) { auto &nd_fespace_l = nd_fespaces.GetFESpaceAtLevel(l); @@ -148,13 +147,15 @@ void CurlCurlOperator::GetStiffnessMatrix(std::vector(std::move(k), nd_fespace_l)); - K.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + auto K_l = std::make_unique(std::move(k), nd_fespace_l); + K_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + K.AddOperator(std::move(K_l)); } print_hdr = false; + return K; } -std::unique_ptr CurlCurlOperator::GetCurlMatrix() +std::unique_ptr CurlCurlOperator::GetCurlMatrix() { auto curl = std::make_unique(&GetNDSpace(), &GetRTSpace()); curl->AddDomainInterpolator(new mfem::CurlInterpolator); diff --git a/palace/models/curlcurloperator.hpp b/palace/models/curlcurloperator.hpp index a0202943d..9976c7d49 100644 --- a/palace/models/curlcurloperator.hpp +++ b/palace/models/curlcurloperator.hpp @@ -69,14 +69,17 @@ class CurlCurlOperator // Construct and return system matrix representing discretized curl-curl operator for // Ampere's law. - void GetStiffnessMatrix(std::vector> &K); + std::unique_ptr GetStiffnessMatrix(); // Construct and return the discrete curl matrix. - std::unique_ptr GetCurlMatrix(); + std::unique_ptr GetCurlMatrix(); // Assemble the right-hand side source term vector for a current source applied on // specified excited boundaries. void GetExcitationVector(int idx, Vector &RHS); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return GetNDSpace().GetComm(); } }; } // namespace palace diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index ef9a8c289..64ba4898c 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -135,7 +135,7 @@ LaplaceOperator::LaplaceOperator(const IoData &iodata, } } -void LaplaceOperator::GetStiffnessMatrix(std::vector> &K) +std::unique_ptr LaplaceOperator::GetStiffnessMatrix() { if (print_hdr) { @@ -144,8 +144,7 @@ void LaplaceOperator::GetStiffnessMatrix(std::vector(h1_fespaces.GetNumLevels()); for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) { auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); @@ -170,15 +169,15 @@ void LaplaceOperator::GetStiffnessMatrix(std::vector(std::move(k), h1_fespace_l)); - K.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + auto K_l = std::make_unique(std::move(k), h1_fespace_l); + K_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + K.AddOperator(std::move(K_l)); } - // Save local (uneliminated) operator after parallel assembly for RHS BC elimination. - K.back()->SaveLocalOperator(); print_hdr = false; + return K; } -std::unique_ptr LaplaceOperator::GetGradMatrix() +std::unique_ptr LaplaceOperator::GetGradMatrix() { auto grad = std::make_unique(&GetH1Space(), &GetNDSpace()); grad->AddDomainInterpolator(new mfem::GradientInterpolator); @@ -188,7 +187,7 @@ std::unique_ptr LaplaceOperator::GetGradMatrix() return std::make_unique(std::move(grad), GetH1Space(), GetNDSpace(), true); } -void LaplaceOperator::GetExcitationVector(int idx, const ParOperator &K, Vector &X, +void LaplaceOperator::GetExcitationVector(int idx, const Operator &K, Vector &X, Vector &RHS) { // Apply the Dirichlet BCs to the solution vector: V = 1 on terminal boundaries with the @@ -209,7 +208,11 @@ void LaplaceOperator::GetExcitationVector(int idx, const ParOperator &K, Vector X = 0.0; RHS = 0.0; x.ParallelProject(X); // Restrict to the true dofs - K.EliminateRHS(X, RHS); + const auto *mg_K = dynamic_cast(&K); + MFEM_VERIFY(mg_K, "LaplaceOperator requires MultigridOperator for RHS elimination!"); + const auto *PtAP_K = dynamic_cast(&mg_K->GetFinestOperator()); + MFEM_VERIFY(PtAP_K, "LaplaceOperator requires ParOperator for RHS elimination!"); + PtAP_K->EliminateRHS(X, RHS); } } // namespace palace diff --git a/palace/models/laplaceoperator.hpp b/palace/models/laplaceoperator.hpp index f1f8f0ac8..76dd5cd21 100644 --- a/palace/models/laplaceoperator.hpp +++ b/palace/models/laplaceoperator.hpp @@ -64,14 +64,17 @@ class LaplaceOperator // Construct and return system matrix representing discretized Laplace operator for // Gauss's law. - void GetStiffnessMatrix(std::vector> &K); + std::unique_ptr GetStiffnessMatrix(); // Construct and return the discrete gradient matrix. - std::unique_ptr GetGradMatrix(); + std::unique_ptr GetGradMatrix(); // Assemble the solution boundary conditions and right-hand side vector for a nonzero // prescribed voltage on the specified surface index. - void GetExcitationVector(int idx, const ParOperator &K, Vector &X, Vector &RHS); + void GetExcitationVector(int idx, const Operator &K, Vector &X, Vector &RHS); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return GetH1Space().GetComm(); } }; } // namespace palace diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 78e0d107c..4b0cb586c 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -3,6 +3,7 @@ #include "spaceoperator.hpp" +#include #include "fem/integrator.hpp" #include "fem/multigrid.hpp" #include "utils/communication.hpp" @@ -66,40 +67,6 @@ mfem::Array SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe return dbc_marker; } -template -auto AddIntegrators(mfem::BilinearForm &a, T1 &df, T2 &f, T3 &dfb, T4 &fb) -{ - if (!df.empty()) - { - a.AddDomainIntegrator(new mfem::CurlCurlIntegrator(df)); - } - if (!f.empty()) - { - a.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(f)); - } - if (!dfb.empty()) - { - a.AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(dfb)); - } - if (!fb.empty()) - { - a.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(fb)); - } -} - -template -auto AddAuxIntegrators(mfem::BilinearForm &a, T1 &f, T2 &fb) -{ - if (!f.empty()) - { - a.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(f)); - } - if (!fb.empty()) - { - a.AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(fb)); - } -} - } // namespace SpaceOperator::SpaceOperator(const IoData &iodata, @@ -204,214 +171,353 @@ void SpaceOperator::CheckBoundaryProperties() } } -std::unique_ptr -SpaceOperator::GetSystemMatrix(SpaceOperator::OperatorType type, - Operator::DiagonalPolicy diag_policy) +namespace +{ + +void PrintHeader(mfem::ParFiniteElementSpace &h1_fespace, + mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &rt_fespace, bool &print_hdr) { if (print_hdr) { Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" " H1: {:d}, ND: {:d}, RT: {:d}\n", - GetH1Space().GlobalTrueVSize(), GetNDSpace().GlobalTrueVSize(), - GetRTSpace().GlobalTrueVSize()); + h1_fespace.GlobalTrueVSize(), nd_fespace.GlobalTrueVSize(), + rt_fespace.GlobalTrueVSize()); print_hdr = false; } +} + +template +auto BuildOperator(mfem::ParFiniteElementSpace &fespace, T1 *df, T2 *f, T3 *dfb, T4 *fb, + mfem::AssemblyLevel assembly_level, int skip_zeros, + bool no_assembly = false) +{ + auto a = std::make_unique(&fespace); + if (df && !df->empty()) + { + a->AddDomainIntegrator(new mfem::CurlCurlIntegrator(*df)); + } + if (df && !f->empty()) + { + a->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(*f)); + } + if (df && !dfb->empty()) + { + a->AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(*dfb)); + } + if (df && !fb->empty()) + { + a->AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(*fb)); + } + if (!no_assembly) + { + a->SetAssemblyLevel(assembly_level); + a->Assemble(skip_zeros); + a->Finalize(skip_zeros); + } + return std::move(a); +} + +template +auto BuildAuxOperator(mfem::ParFiniteElementSpace &fespace, T1 *f, T2 *fb, + mfem::AssemblyLevel assembly_level, int skip_zeros, + bool no_assembly = false) +{ + auto a = std::make_unique(&fespace); + if (f && !f->empty()) + { + a.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(*f)); + } + if (fb && !fb->empty()) + { + a.AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(*fb)); + } + if (!no_assembly) + { + a->SetAssemblyLevel(assembly_level); + a->Assemble(skip_zeros); + a->Finalize(skip_zeros); + } + return std::move(a); +} + +} // namespace + +std::unique_ptr +SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); - SumCoefficient dfb; - switch (type) - { - case OperatorType::STIFFNESS: - AddStiffnessCoefficients(1.0, df, f, fb); - break; - case OperatorType::DAMPING: - AddDampingCoefficients(1.0, f, fb); - break; - case OperatorType::MASS: - AddRealMassCoefficients(1.0, f, fb); - break; - case OperatorType::EXTRA: - default: - MFEM_ABORT("Invalid GetSystemMatrix matrix type for HypreParMatrix output!"); - } - if (df.empty() && f.empty() && dfb.empty() && fb.empty()) + AddStiffnessCoefficients(1.0, df, f); + AddBdrStiffnessCoefficients(1.0, fb); + if (df.empty() && f.empty() && fb.empty()) { return {}; } - auto a = std::make_unique(&GetNDSpace()); - AddIntegrators(*a, df, f, dfb, fb); - a->SetAssemblyLevel(assembly_level); - a->Assemble(skip_zeros); - a->Finalize(skip_zeros); - auto A = std::make_unique(std::move(a), GetNDSpace()); - A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); - return A; + auto K = std::make_unique( + BuildOperator(GetNDSpace(), &df, &f, nullptr, &fb, assembly_level, skip_zeros), + GetNDSpace()); + K->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return K; } -std::unique_ptr -SpaceOperator::GetComplexSystemMatrix(SpaceOperator::OperatorType type, double omega, - Operator::DiagonalPolicy diag_policy) +std::unique_ptr +SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy) { - if (print_hdr) + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient f(sdim), fb(sdim); + AddDampingCoefficients(1.0, f); + AddBdrDampingCoefficients(1.0, fb); + if (f.empty() && fb.empty()) { - Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" - " H1: {:d}, ND: {:d}, RT: {:d}\n", - GetH1Space().GlobalTrueVSize(), GetNDSpace().GlobalTrueVSize(), - GetRTSpace().GlobalTrueVSize()); - print_hdr = false; + return {}; } + auto C = std::make_unique( + BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, assembly_level, skip_zeros), + GetNDSpace()); + C->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return C; +} + +std::unique_ptr SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); - SumMatrixCoefficient dfr(sdim), dfi(sdim), fr(sdim), fi(sdim), fbr(sdim), fbi(sdim); - SumCoefficient dfbr, dfbi; - switch (type) - { - case OperatorType::STIFFNESS: - MFEM_VERIFY(omega == 0.0, "GetComplexSystemMatrix for type OperatorType::STIFFNESS " - "does not use omega parameter!"); - AddStiffnessCoefficients(1.0, dfr, fr, fbr); - break; - case OperatorType::DAMPING: - MFEM_VERIFY(omega == 0.0, "GetComplexSystemMatrix for type OperatorType::DAMPING " - "does not use omega parameter!"); - AddDampingCoefficients(1.0, fr, fbr); - break; - case OperatorType::MASS: - MFEM_VERIFY(omega == 0.0, "GetComplexSystemMatrix for type OperatorType::MASS does " - "not use omega parameter!"); - AddRealMassCoefficients(1.0, fr, fbr); - AddImagMassCoefficients(1.0, fi, fbi); - break; - case OperatorType::EXTRA: - MFEM_VERIFY(omega > 0.0, - "GetComplexSystemMatrix for type OperatorType::EXTRA requires " - "use of omega parameter!"); - AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi); - break; - } - bool has_real = false, has_imag = false; - std::unique_ptr ar, ai; - if (!dfr.empty() || !fr.empty() || !dfbr.empty() || !fbr.empty()) + SumMatrixCoefficient f(sdim), fb(sdim); + AddRealMassCoefficients(1.0, f); + AddRealMassBdrCoefficients(1.0, fb); + if (f.empty() && fb.empty()) { - has_real = true; - ar = std::make_unique(&GetNDSpace()); - AddIntegrators(*ar, dfr, fr, dfbr, fbr); - ar->SetAssemblyLevel(assembly_level); - ar->Assemble(skip_zeros); - ar->Finalize(skip_zeros); + return {}; } - if (!dfi.empty() || !fi.empty() || !dfbi.empty() || !fbi.empty()) + auto M = std::make_unique( + BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, assembly_level, skip_zeros), + GetNDSpace()); + M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return M; +} + +std::unique_ptr +SpaceOperator::GetComplexStiffnessMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); + AddStiffnessCoefficients(1.0, df, f); + AddBdrStiffnessCoefficients(1.0, fb); + if (df.empty() && f.empty() && fb.empty()) { - has_imag = true; - ai = std::make_unique(&GetNDSpace()); - AddIntegrators(*ai, dfi, fi, dfbi, fbi); - ai->SetAssemblyLevel(assembly_level); - ai->Assemble(skip_zeros); - ai->Finalize(skip_zeros); + return {}; } - if (!has_real && !has_imag) + auto K = std::make_unique( + BuildOperator(GetNDSpace(), &df, &f, nullptr, &fb, assembly_level, skip_zeros), + nullptr, GetNDSpace()); + K->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return K; +} + +std::unique_ptr +SpaceOperator::GetComplexDampingMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient f(sdim), fb(sdim); + AddDampingCoefficients(1.0, f); + AddBdrDampingCoefficients(1.0, fb); + if (f.empty() && fb.empty()) { return {}; } - auto A = std::make_unique( - std::make_unique(std::move(ar), std::move(ai)), GetNDSpace()); - A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); - return A; + auto C = std::make_unique( + BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, assembly_level, skip_zeros), + nullptr, GetNDSpace()); + C->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return C; } -std::unique_ptr SpaceOperator::GetSystemMatrix(double a0, double a1, double a2, - const ParOperator *K, - const ParOperator *C, - const ParOperator *M) +std::unique_ptr +SpaceOperator::GetComplexMassMatrix(Operator::DiagonalPolicy diag_policy) { - int height = -1, width = -1; - if (K) + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient fr(sdim), fi(sdim), fbr(sdim); + AddRealMassCoefficients(1.0, fr); + AddRealMassBdrCoefficients(1.0, fbr); + AddImagMassCoefficients(1.0, fi); + std::unique_ptr mr, mi; + if (!fr.empty() || !fbr.empty()) { - height = K->LocalOperator().Height(); - width = K->LocalOperator().Width(); + mr = BuildOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, assembly_level, + skip_zeros); } - else if (C) + if (!fi.empty()) { - height = C->LocalOperator().Height(); - width = C->LocalOperator().Width(); + mi = BuildOperator(GetNDSpace(), nullptr, &fi, nullptr, nullptr, assembly_level, + skip_zeros); } - else if (M) + if (!mr && !mi) { - height = M->LocalOperator().Height(); - width = M->LocalOperator().Width(); + return {}; } - MFEM_VERIFY(height >= 0 && width >= 0, - "At least one argument to GetSystemMatrix must not be empty!"); - auto sum = std::make_unique(height, width); - if (K && a0 != 0.0) + auto M = std::make_unique(std::move(mr), std::move(mi)), GetNDSpace(); + M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return M; +} + +std::unique_ptr +SpaceOperator::GetComplexExtraSystemMatrix(double omega, + Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient fbr(sdim), fbi(sdim); + SumCoefficient dfbr, dfbi; + AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi); + std::unique_ptr ar, ai; + if (!dfbr.empty() || !fbr.empty()) { - sum->AddOperator(K->LocalOperator(), a0); + ar = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, assembly_level, + skip_zeros); } - if (C && a1 != 0.0) + if (!fi.empty()) { - sum->AddOperator(C->LocalOperator(), a1); + ai = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, assembly_level, + skip_zeros); } - if (M && a2 != 0.0) + if (!ar && !ai) { - sum->AddOperator(M->LocalOperator(), a2); + return {}; } - auto A = std::make_unique(std::move(sum), GetNDSpace()); - A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); + auto A = std::make_unique(std::move(ar), std::move(ai)), GetNDSpace(); + A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return A; } -std::unique_ptr SpaceOperator::GetComplexSystemMatrix( - std::complex a0, std::complex a1, std::complex a2, - const ComplexParOperator *K, const ComplexParOperator *C, const ComplexParOperator *M, - const ComplexParOperator *A2) +template +std::unique_ptr +SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, + const OperType *K, const OperType *C, const OperType *M, + const OperType *A2) { + typedef typename std::conditional::value, + ComplexParOperator, ParOperator>::type ParOperType; + typedef typename std::conditional::value, + ComplexSumOperator, SumOperator>::type SumOperType; + + const auto *PtAP_K = (K) ? dynamic_cast(K) : nullptr; + const auto *PtAP_C = (C) ? dynamic_cast(C) : nullptr; + const auto *PtAP_M = (M) ? dynamic_cast(M) : nullptr; + const auto *PtAP_A2 = (A2) ? dynamic_cast(A2) : nullptr; + MFEM_VERIFY((!K || PtAP_K) && (!C || PtAP_C) && (!M || PtAP_M) && (!A2 || PtAP_A2), + "SpaceOperator requires ParOperator or ComplexParOperator for system matrix " + "construction!"); + int height = -1, width = -1; - if (K) + if (PtAP_K) { - height = K->LocalOperator().Height(); - width = K->LocalOperator().Width(); + height = PtAP_K->LocalOperator().Height(); + width = PtAP_K->LocalOperator().Width(); } - else if (C) + else if (PtAP_C) { - height = C->LocalOperator().Height(); - width = C->LocalOperator().Width(); + height = PtAP_C->LocalOperator().Height(); + width = PtAP_C->LocalOperator().Width(); } - else if (M) + else if (PtAP_M) { - height = M->LocalOperator().Height(); - width = M->LocalOperator().Width(); + height = PtAP_M->LocalOperator().Height(); + width = PtAP_M->LocalOperator().Width(); } - else if (A2) + else if (PtAP_A2) { - height = A2->LocalOperator().Height(); - width = A2->LocalOperator().Width(); + height = PtAP_A2->LocalOperator().Height(); + width = PtAP_A2->LocalOperator().Width(); } MFEM_VERIFY(height >= 0 && width >= 0, "At least one argument to GetSystemMatrix must not be empty!"); - auto sum = std::make_unique(height, width); - if (K && a0 != 0.0) + + auto sum = std::make_unique(height, width); + if (PtAP_K && a0 != 0.0) { - sum->AddOperator(K->LocalOperator(), a0); + sum->AddOperator(PtAP_K->LocalOperator(), a0); } - if (C && a1 != 0.0) + if (PtAP_C && a1 != 0.0) { - sum->AddOperator(C->LocalOperator(), a1); + sum->AddOperator(PtAP_C->LocalOperator(), a1); } - if (M && a2 != 0.0) + if (PtAP_M && a2 != 0.0) { - sum->AddOperator(M->LocalOperator(), a2); + sum->AddOperator(PtAP_M->LocalOperator(), a2); } - if (A2) + if (PtAP_A2) { - sum->AddOperator(A2->LocalOperator(), 1.0); + sum->AddOperator(PtAP_A2->LocalOperator(), 1.0); } - auto A = std::make_unique(std::move(sum), GetNDSpace()); + auto A = std::make_unique(std::move(sum), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); return A; } -void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, double a3, - std::vector> &B, - std::vector> &AuxB) +std::unique_ptr SpaceOperator::GetInnerProductMatrix(double a0, double a2, + const ComplexOperator *K, + const ComplexOperator *M) +{ + const auto *PtAP_K = (K) ? dynamic_cast(K) : nullptr; + const auto *PtAP_M = (M) ? dynamic_cast(M) : nullptr; + MFEM_VERIFY( + (!K || PtAP_K) && (!M || PtAP_M), + "SpaceOperator requires ComplexParOperator for inner product matrix construction!"); + + int height = -1, width = -1; + if (PtAP_K) + { + height = PtAP_K->LocalOperator().Height(); + width = PtAP_K->LocalOperator().Width(); + } + else if (PtAP_M) + { + height = PtAP_M->LocalOperator().Height(); + width = PtAP_M->LocalOperator().Width(); + } + MFEM_VERIFY(height >= 0 && width >= 0, + "At least one argument to GetInnerProductMatrix must not be empty!"); + + auto sum = std::make_unique(height, width); + if (PtAP_K && a0 != 0.0) + { + sum->AddOperator(PtAP_K->LocalOperator().Real(), a0); + } + if (PtAP_M && a2 != 0.0) + { + sum->AddOperator(PtAP_M->LocalOperator().Real(), a2); + } + return std::make_unique(std::move(sum), GetNDSpace()); +} + +namespace +{ + +auto GetLevelOperator(std::unique_ptr &B, std::unique_ptr &&br, + std::unique_ptr &&bi, mfem::FiniteElementSpace &fespace) +{ + return std::make_unique(std::move(br), fespace); +} + +auto GetLevelOperator(std::unique_ptr &B, + std::unique_ptr &&br, std::unique_ptr &&bi, + mfem::FiniteElementSpace &fespace) +{ + return std::make_unique(std::move(br), std::move(bi), fespace); +} + +} // namespace + +template +std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, double a1, + double a2, double a3) { if (print_prec_hdr) { @@ -419,42 +525,37 @@ void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, dou } MFEM_VERIFY(h1_fespaces.GetNumLevels() == nd_fespaces.GetNumLevels(), "Multigrid hierarchy mismatch for auxiliary space preconditioning!"); + auto B = std::make_unique>(nd_fespaces.GetNumLevels()); for (int s = 0; s < 2; s++) { - auto &B_ = (s == 0) ? B : AuxB; auto &fespaces = (s == 0) ? nd_fespaces : h1_fespaces; auto &dbc_tdof_lists = (s == 0) ? nd_dbc_tdof_lists : h1_dbc_tdof_lists; - B_.clear(); - B_.reserve(fespaces.GetNumLevels()); for (int l = 0; l < fespaces.GetNumLevels(); l++) { auto &fespace_l = fespaces.GetFESpaceAtLevel(l); - const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); - SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); - SumCoefficient dfb; - AddStiffnessCoefficients(a0, df, f, fb); - AddDampingCoefficients(a1, f, fb); - // XX TODO: Test out difference of |Mr + i Mi| vs. Mr + Mi - AddRealMassCoefficients( - pc_shifted ? std::abs(a2) : a2, f, fb); - // AddRealMassCoefficients(pc_shifted ? std::abs(a2) : a2, f, fb); - // AddImagMassCoefficients(a2, f, fb); - AddExtraSystemBdrCoefficients(a3, dfb, dfb, fb, fb); - auto b = std::make_unique(&fespace_l); - if (s == 0) - { - AddIntegrators(*b, df, f, dfb, fb); - } - else - { - // H1 auxiliary space matrix Gᵀ B G. - AddAuxIntegrators(*b, f, fb); - } if (print_prec_hdr) { Mpi::Print(" Level {:d}{}: {:d} unknowns", l, (s == 0) ? "" : " (auxiliary)", fespace_l.GlobalTrueVSize()); } + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); + SumCoefficient dfb; + AddStiffnessCoefficients(a0, df, f); + AddStiffnessBdrCoefficients(a0, fb); + AddDampingCoefficients(a1, f); + AddDampingBdrCoefficients(a1, fb); + // XX TODO: Test out difference of |Mr + i Mi| vs. Mr + Mi + // AddRealMassCoefficients(pc_shifted ? std::abs(a2) : a2, f); + // AddImagMassCoefficients(a2, f); + AddAbsMassCoefficients(pc_shifted ? std::abs(a2) : a2, f); + AddRealMassBdrCoefficients(pc_shifted ? std::abs(a2) : a2, fb); + AddExtraSystemBdrCoefficients(a3, dfb, dfb, fb, fb); + auto b = (s == 0) ? BuildOperator(fespace_l, &df, &f, &dfb, &fb, assembly_level, + skip_zeros, pc_lor) + : BuildAuxOperator(fespace_l, &f, &fb, assembly_level, skip_zeros, + pc_lor); + std::unique_ptr b_loc; if (pc_lor) { // After we construct the LOR discretization we deep copy the LOR matrix and the @@ -468,13 +569,10 @@ void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, dou Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); Mpi::Print(", {:d} NNZ (LOR)\n", nnz); } - B_.push_back(std::make_unique(std::move(b_lor), fespace_l)); + b_loc = std::move(b_lor); } else { - b->SetAssemblyLevel(assembly_level); - b->Assemble(skip_zeros); - b->Finalize(skip_zeros); if (print_prec_hdr) { if (assembly_level == mfem::AssemblyLevel::LEGACY) @@ -488,56 +586,77 @@ void SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, dou Mpi::Print("\n"); } } - B_.push_back(std::make_unique(std::move(b), fespace_l)); + b_loc = std::move(b); + } + auto B_l = GetLevelOperator(B, std::move(b_loc), nullptr, fespace_l); + B_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + if (s == 0) + { + B.AddOperator(std::move(B_l)); + } + else + { + B.AddAuxiliaryOperator(std::move(B_l)); } - B_.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], - Operator::DiagonalPolicy::DIAG_ONE); } } print_prec_hdr = false; } -std::unique_ptr SpaceOperator::GetCurlMatrix() +namespace { - auto curl = std::make_unique(&GetNDSpace(), &GetRTSpace()); + +std::unique_ptr GetCurl(mfem::FiniteElementSpace &nd_fespace, + mfem::FiniteElementSpace &rt_fespace, + mfem::AssemblyLevel assembly_level, + int skip_zeros = 1) +{ + auto curl = std::make_unique(&nd_fespace, &rt_fespace); curl->AddDomainInterpolator(new mfem::CurlInterpolator); curl->SetAssemblyLevel(assembly_level); - curl->Assemble(); - curl->Finalize(); - return std::make_unique(std::move(curl), GetNDSpace(), GetRTSpace(), true); + curl->Assemble(skip_zeros); + curl->Finalize(skip_zeros); + return curl; +} + +std::unique_ptr GetGrad(mfem::FiniteElementSpace &h1_fespace, + mfem::FiniteElementSpace &nd_fespace, + mfem::AssemblyLevel assembly_level, + int skip_zeros = 1) +{ + auto grad = std::make_unique(&h1_fespace, &nd_fespace); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(assembly_level); + grad->Assemble(skip_zeros); + grad->Finalize(skip_zeros); + return grad; +} + +} // namespace + +std::unique_ptr SpaceOperator::GetCurlMatrix() +{ + return std::make_unique(GetCurl(GetNDSpace(), GetRTSpace(), assembly_level), + GetNDSpace(), GetRTSpace(), true); } std::unique_ptr SpaceOperator::GetComplexCurlMatrix() { - auto curl = std::make_unique(&GetNDSpace(), &GetRTSpace()); - curl->AddDomainInterpolator(new mfem::CurlInterpolator); - curl->SetAssemblyLevel(assembly_level); - curl->Assemble(); - curl->Finalize(); return std::make_unique( - std::make_unique(std::move(curl), nullptr), GetNDSpace(), + GetCurl(GetNDSpace(), GetRTSpace(), assembly_level), nullptr, GetNDSpace(), GetRTSpace(), true); } std::unique_ptr SpaceOperator::GetGradMatrix() { - auto grad = std::make_unique(&GetH1Space(), &GetNDSpace()); - grad->AddDomainInterpolator(new mfem::GradientInterpolator); - grad->SetAssemblyLevel(assembly_level); - grad->Assemble(); - grad->Finalize(); - return std::make_unique(std::move(grad), GetH1Space(), GetNDSpace(), true); + return std::make_unique(GetGrad(GetH1Space(), GetNDSpace(), assembly_level), + GetH1Space(), GetNDSpace(), true); } std::unique_ptr SpaceOperator::GetComplexGradMatrix() { - auto grad = std::make_unique(&GetH1Space(), &GetNDSpace()); - grad->AddDomainInterpolator(new mfem::GradientInterpolator); - grad->SetAssemblyLevel(assembly_level); - grad->Assemble(); - grad->Finalize(); return std::make_unique( - std::make_unique(std::move(grad), nullptr), GetH1Space(), + GetGrad(GetH1Space(), GetNDSpace(), assembly_level), nullptr, GetH1Space(), GetNDSpace(), true); } @@ -545,26 +664,26 @@ void SpaceOperator::AddStiffnessCoefficients(double coef, SumMatrixCoefficient & SumMatrixCoefficient &f, SumMatrixCoefficient &fb) { - { - constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; - df.AddCoefficient(std::make_unique>(mat_op, coef)); - } + constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; + df.AddCoefficient(std::make_unique>(mat_op, coef)); // Contribution for London superconductors. if (mat_op.HasLondonDepth()) { - constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_LONDON_DEPTH; - f.AddCoefficient(std::make_unique>(mat_op, coef), + constexpr MaterialPropertyType MatTypeL = MaterialPropertyType::INV_LONDON_DEPTH; + f.AddCoefficient(std::make_unique>(mat_op, coef), mat_op.GetLondonDepthMarker()); } +} +void SpaceOperator::AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb) +{ // Robin BC contributions due to surface impedance and lumped ports (inductance). surf_z_op.AddStiffnessBdrCoefficients(coef, fb); lumped_port_op.AddStiffnessBdrCoefficients(coef, fb); } -void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) +void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f) { // Contribution for domain conductivity. if (mat_op.HasConductivity()) @@ -573,7 +692,10 @@ void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f, f.AddCoefficient(std::make_unique>(mat_op, coef), mat_op.GetConductivityMarker()); } +} +void SpaceOperator::AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb) +{ // Robin BC contributions due to surface impedance, lumped ports, and absorbing // boundaries (resistance). farfield_op.AddDampingBdrCoefficients(coef, fb); @@ -581,19 +703,20 @@ void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f, lumped_port_op.AddDampingBdrCoefficients(coef, fb); } -template -void SpaceOperator::AddRealMassCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) +void SpaceOperator::AddRealMassCoefficients(double coef, SumMatrixCoefficient &f) { + constexpr MaterialPropertyType MatType = MaterialPropertyType::PERMITTIVITY_REAL; f.AddCoefficient(std::make_unique>(mat_op, coef)); +} +void SpaceOperator::AddRealMassBdrCoefficients(double coef, SumMatrixCoefficient &fb) +{ // Robin BC contributions due to surface impedance and lumped ports (capacitance). surf_z_op.AddMassBdrCoefficients(coef, fb); lumped_port_op.AddMassBdrCoefficients(coef, fb); } -void SpaceOperator::AddImagMassCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) +void SpaceOperator::AddImagMassCoefficients(double coef, SumMatrixCoefficient &f) { // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). if (mat_op.HasLossTangent()) @@ -604,6 +727,12 @@ void SpaceOperator::AddImagMassCoefficients(double coef, SumMatrixCoefficient &f } } +void SpaceOperator::AddAbsMassCoefficients(double coef, SumMatrixCoefficient &f) +{ + constexpr MaterialPropertyType MatType = MaterialPropertyType::PERMITTIVITY_ABS; + f.AddCoefficient(std::make_unique>(mat_op, coef)); +} + void SpaceOperator::AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr, SumCoefficient &dfbi, SumMatrixCoefficient &fbr, @@ -728,11 +857,18 @@ void SpaceOperator::GetRandomInitialVector(ComplexVector &v) v.SyncAlias(); } -template void -SpaceOperator::AddRealMassCoefficients( - double, SumMatrixCoefficient &, SumMatrixCoefficient &); -template void -SpaceOperator::AddRealMassCoefficients( - double, SumMatrixCoefficient &, SumMatrixCoefficient &); +template std::unique_ptr +GetSystemMatrix(double a0, double a1, double a2, const Operator *K, + const Operator *C, const Operator *M, const Operator *A2); +template std::unique_ptr +GetSystemMatrix>( + std::complex a0, std::complex a1, std::complex a2, + const ComplexOperator *K, const ComplexOperator *C, const ComplexOperator *M, + const ComplexOperator *A2); + +template std::unique_ptr GetPreconditionerMatrix(double a0, double a1, + double a2, double a3); +template std::unique_ptr +GetPreconditionerMatrix(double a0, double a1, double a2, double a3); } // namespace palace diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp index b8774037e..24c042639 100644 --- a/palace/models/spaceoperator.hpp +++ b/palace/models/spaceoperator.hpp @@ -9,7 +9,6 @@ #include #include #include "fem/coefficient.hpp" -#include "linalg/complex.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" #include "models/farfieldboundaryoperator.hpp" @@ -68,14 +67,14 @@ class SpaceOperator // Helper functions for building the bilinear forms corresponding to the discretized // operators in Maxwell's equations. void AddStiffnessCoefficients(double coef, SumMatrixCoefficient &df, - SumMatrixCoefficient &f, SumMatrixCoefficient &fb); - void AddDampingCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb); - template - void AddRealMassCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb); - void AddImagMassCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb); + SumMatrixCoefficient &f); + void AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb); + void AddDampingCoefficients(double coef, SumMatrixCoefficient &f); + void AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb); + void AddRealMassCoefficients(double coef, SumMatrixCoefficient &f); + void AddRealMassBdrCoefficients(double coef, SumMatrixCoefficient &fb); + void AddImagMassCoefficients(double coef, SumMatrixCoefficient &f); + void AddAbsMassCoefficients(double coef, SumMatrixCoefficient &f); void AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr, SumCoefficient &dfbi, SumMatrixCoefficient &fbr, SumMatrixCoefficient &fbi); @@ -128,53 +127,52 @@ class SpaceOperator // A = K + iω C - ω² (Mr + i Mi) + A2(ω) . // For time domain problems, any one of K, C, or M = Mr can be constructed. The argument // ω is required only for the constructing the "extra" matrix A2(ω). - enum class OperatorType - { - STIFFNESS, - DAMPING, - MASS, - EXTRA - }; - - std::unique_ptr GetSystemMatrix(OperatorType type, - Operator::DiagonalPolicy diag_policy); - std::unique_ptr - GetComplexSystemMatrix(OperatorType type, Operator::DiagonalPolicy diag_policy) - { - return GetComplexSystemMatrix(type, 0.0, diag_policy); - } - std::unique_ptr - GetComplexSystemMatrix(OperatorType type, double omega, - Operator::DiagonalPolicy diag_policy); + std::unique_ptr GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr GetDampingMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr GetMassMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexStiffnessMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexDampingMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexMassMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_policy); // Construct the complete frequency or time domain system matrix using the provided // stiffness, damping, mass, and extra matrices: // A = a0 K + a1 C + a2 (Mr + i Mi) + A2 . // It is assumed that the inputs have been constructed using previous calls to // GetSystemMatrix() and the returned operator does inherit ownership of any of them. - std::unique_ptr GetSystemMatrix(double a0, double a1, double a2, - const ParOperator *K, const ParOperator *C, - const ParOperator *M); - std::unique_ptr - GetComplexSystemMatrix(std::complex a0, std::complex a1, - std::complex a2, const ComplexParOperator *K, - const ComplexParOperator *C, const ComplexParOperator *M, - const ComplexParOperator *A2); + template + std::unique_ptr + GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, const OperType *K, + const OperType *C, const OperType *M, const OperType *A2 = nullptr); + + // Construct the real, SPD matrix for weighted L2 or H(curl) inner products: + // B = a0 Kr + a2 Mr . + // It is assumed that the inputs have been constructed using previous calls to + // GetSystemMatrix() and the returned operator does inherit ownership of any of them. + // If K or M have eliminated boundary conditions, they are not eliminated from the + // returned operator. + std::unique_ptr GetInnerProductMatrix(double a0, double a2, + const ComplexOperator *K, + const ComplexOperator *M); // Construct the real, optionally SPD matrix for frequency or time domain linear system // preconditioning (Mr > 0, Mi < 0, |Mr + i Mi| is done on the material property // coefficient, not the matrix entries themselves): // B = a0 K + a1 C -/+ a2 |Mr + i Mi| + A2r(a3) + A2i(a3) . - void GetPreconditionerMatrix(double a0, double a1, double a2, double a3, - std::vector> &B, - std::vector> &AuxB); + template + std::unique_ptr GetPreconditionerMatrix(double a0, double a1, double a2, + double a3); // Construct and return the discrete curl or gradient matrices. The complex variants // return a matrix suitable for applying to complex-valued vectors. - std::unique_ptr GetCurlMatrix(); - std::unique_ptr GetComplexCurlMatrix(); - std::unique_ptr GetGradMatrix(); - std::unique_ptr GetComplexGradMatrix(); + std::unique_ptr GetCurlMatrix(); + std::unique_ptr GetComplexCurlMatrix(); + std::unique_ptr GetGradMatrix(); + std::unique_ptr GetComplexGradMatrix(); // Assemble the right-hand side source term vector for an incident field or current source // applied on specified excited boundaries. The return value indicates whether or not the @@ -191,6 +189,9 @@ class SpaceOperator // boundary conditions. void GetRandomInitialVector(ComplexVector &v); void GetConstantInitialVector(ComplexVector &v); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return GetNDSpace().GetComm(); } }; } // namespace palace From 7838a45cda62f9041bb24a0b2304faabcfa56a66 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Wed, 24 May 2023 13:34:49 -0700 Subject: [PATCH 15/41] WIP: Updates for linear solver and eigenvalue solver classes for new solvers and operator types --- palace/linalg/arpack.cpp | 12 +- palace/linalg/arpack.hpp | 2 +- palace/linalg/chebyshev.cpp | 162 ++++++++++++++----- palace/linalg/chebyshev.hpp | 17 +- palace/linalg/distrelaxation.cpp | 104 ++++++++---- palace/linalg/distrelaxation.hpp | 36 +++-- palace/linalg/eps.hpp | 5 +- palace/linalg/gmg.cpp | 202 +++++++++++------------ palace/linalg/gmg.hpp | 50 +++--- palace/linalg/jacobi.hpp | 5 +- palace/linalg/ksp.cpp | 267 ++++++++++++------------------- palace/linalg/ksp.hpp | 97 ++++------- palace/linalg/slepc.cpp | 108 +++++++------ palace/linalg/slepc.hpp | 2 +- palace/utils/configfile.cpp | 18 ++- palace/utils/configfile.hpp | 15 +- 16 files changed, 578 insertions(+), 524 deletions(-) diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp index f785de0df..aa951b589 100644 --- a/palace/linalg/arpack.cpp +++ b/palace/linalg/arpack.cpp @@ -20,7 +20,6 @@ // clang-format on #include "linalg/divfree.hpp" #include "linalg/ksp.hpp" -#include "linalg/vector.hpp" #include "utils/communication.hpp" namespace @@ -270,8 +269,7 @@ void ArpackEigenSolver::SetInitialSpace(const ComplexVector &v) { r = std::make_unique[]>(n); } - MFEM_VERIFY(v.Size() == 2 * n, - "Invalid size mismatch for provided initial space vector!"); + MFEM_VERIFY(v.Size() == n, "Invalid size mismatch for provided initial space vector!"); v.Get(r.get(), n); info = 1; } @@ -365,7 +363,7 @@ int ArpackEigenSolver::SolveInternal(int n, std::complex *r, " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", GetName(), (num_conv >= nev) ? "converged" : "finished", num_conv, num_it, - opInv->NumTotalMult(), opInv->NumTotalMultIter()); + opInv->NumTotalMult(), opInv->NumTotalMultIterations()); } if (num_conv < nev) { @@ -438,7 +436,7 @@ void ArpackEigenSolver::GetEigenvector(int i, ComplexVector &x) const { MFEM_VERIFY(eig && i >= 0 && i < nev, "Out of range eigenpair requested (i = " << i << ", nev = " << nev << ")!"); - MFEM_VERIFY(x.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + MFEM_VERIFY(x.Size() == n, "Invalid size mismatch for provided eigenvector!"); const int &j = perm.get()[i]; x.Set(V.get() + j * n, n); } @@ -492,7 +490,7 @@ void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperat x.SetSize(opK->Height()); y.SetSize(opK->Height()); z.SetSize(opK->Height()); - n = opK->Height() / 2; + n = opK->Height(); } int ArpackEPSSolver::Solve() @@ -645,7 +643,7 @@ void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperat y1.SetSize(opK->Height()); y2.SetSize(opK->Height()); z.SetSize(opK->Height()); - n = opK->Height() / 2; + n = opK->Height(); } int ArpackPEPSolver::Solve() diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp index a09038a31..fc88f14ea 100644 --- a/palace/linalg/arpack.hpp +++ b/palace/linalg/arpack.hpp @@ -9,9 +9,9 @@ #include #include #include -#include "linalg/complex.hpp" #include "linalg/eps.hpp" #include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 0cdc83027..e6269e82a 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -9,42 +9,135 @@ namespace palace { -ChebyshevSmoother::ChebyshevSmoother(int smooth_it, int poly_order) - : mfem::Solver(), pc_it(smooth_it), order(poly_order), A(nullptr) +namespace { + +void GetDiagonal(const ParOperator &A, Vector &diag) +{ + diag.SetSize(A.Height()); + A.AssembleDiagonal(diag); } -void ChebyshevSmoother::SetOperator(const Operator &op) +void GetDiagonal(const ComplexParOperator &A, ComplexVector &diag) { - const auto *PtAP = dynamic_cast(&op); - MFEM_VERIFY(PtAP, "ChebyshevSmoother requires a ParOperator operator!"); - A = PtAP; - - height = A->Height(); - width = A->Width(); - r.SetSize(height); - d.SetSize(height); - dinv.SetSize(height); - A->AssembleDiagonal(dinv); + MFEM_VERIFY(A.HasReal() || A.HasImag(), + "Invalid zero ComplexParOperator for ChebyshevSmoother!"); + diag.SetSize(A.Height()); + diag = 0.0; + if (A.HasReal()) + { + A.Real().AssembleDiagonal(diag.Real()); + } + if (A.HasImag()) + { + A.Imag().AssembleDiagonal(diag.Imag()); + } +} + +} // namespace + +template +ChebyshevSmoother::ChebyshevSmoother(int smooth_it, int poly_order) + : Solver(), pc_it(smooth_it), order(poly_order), A(nullptr) +{ +} + +template +void ChebyshevSmoother::SetOperator(const OperType &op) +{ + typedef typename std::conditional::value, + ComplexParOperator, ParOperator>::type ParOperType; + + const auto *PtAP = dynamic_cast(&op); + MFEM_VERIFY(PtAP, + "ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!"); + GetDiagonal(*PtAP, dinv); dinv.Reciprocal(); + A = &op; + r.SetSize(A->Height()); + d.SetSize(A->Height()); + // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See // mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing: // polynomial versus Gauss-Seidel, JCP (2003). - DiagonalOperator Dinv(dinv); - SymmetricProductOperator DinvA(Dinv, *A); - lambda_max = 1.1 * linalg::SpectralNorm(A->GetComm(), DinvA, false); + DiagonalOperator Dinv(dinv); + ProductOperator DinvA(Dinv, *A); + lambda_max = 1.1 * linalg::SpectralNorm(PtAP->GetComm(), DinvA, false); } -void ChebyshevSmoother::Mult(const Vector &x, Vector &y) const +namespace +{ + +inline void ApplyOrder0(double sr, const Vector &dinv, const Vector &r, Vector &d) +{ + const int N = d.Size(); + const auto *DI = dinv.Read(); + const auto *R = r.Read(); + auto *D = d.ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { D[i] = sr * DI[i] * R[i]; }); +} + +inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const ComplexVector &r, + ComplexVector &d) +{ + const int N = dinv.Size(); + const auto *DIR = dinv.Real().Read(); + const auto *DII = dinv.Imag().Read(); + const auto *RR = r.Real().Read(); + const auto *RI = r.Imag().Read(); + auto *DR = d.Real().ReadWrite(); + auto *DI = d.Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = DII[i] * RR[i] + DIR[i] * RI[i]; + DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]); + DI[i] = sr * t; + }); +} + +inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv, + const Vector &r, Vector &d) +{ + const int N = dinv.Size(); + const auto *DI = dinv.Read(); + const auto *R = r.Read(); + auto *D = d.ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { D[i] = sd * D[i] + sr * DI[i] * R[i]; }); +} + +inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &dinv, + const ComplexVector &r, ComplexVector &d) +{ + const int N = dinv.Size(); + const auto *DIR = dinv.Real().Read(); + const auto *DII = dinv.Imag().Read(); + const auto *RR = r.Real().Read(); + const auto *RI = r.Imag().Read(); + auto *DR = d.Real().ReadWrite(); + auto *DI = d.Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = DII[i] * RR[i] + DIR[i] * RI[i]; + DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]); + DI[i] = sd * DI[i] + sr * t + }); +} + +} // namespace + +template +void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const { // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) { - if (iterative_mode || it > 0) + if (initial_guess || it > 0) { A->Mult(y, r); - subtract(x, r, r); + linalg::AXPBY(1.0, x, -1.0, r); } else { @@ -54,35 +147,20 @@ void ChebyshevSmoother::Mult(const Vector &x, Vector &y) const // 4th-kind Chebyshev smoother, from Phillips and Fischer or Lottes (with k -> k + 1 // shift due to 1-based indexing). - { - const int N = height; - const auto *DI = dinv.Read(); - const auto *R = r.Read(); - auto *D = d.ReadWrite(); - mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) - { D[i] = 4.0 / (3.0 * lambda_max) * DI[i] * R[i]; }); - } + ApplyOrder0(4.0 / (3.0 * lambda_max), dinv, r, d); for (int k = 1; k < order; k++) { + y += d; A->AddMult(d, r, -1.0); - { - const int N = height; - const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); - const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); - const auto *DI = dinv.Read(); - const auto *R = r.Read(); - auto *Y = y.ReadWrite(); - auto *D = d.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - Y[i] += D[i]; - D[i] = sd * D[i] + sr * DI[i] * R[i]; - }); - } + const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); + const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); + ApplyOrderK(sd, sr, dinv, r, d); } y += d; } } +template class ChebyshevSmoother; +template class ChebyshevSmoother; + } // namespace palace diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index 1302b113f..8b4945142 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -4,8 +4,8 @@ #ifndef PALACE_LINALG_CHEBYSHEV_SMOOTHER_HPP #define PALACE_LINALG_CHEBYSHEV_SMOOTHER_HPP -#include #include "linalg/operator.hpp" +#include "linalg/solver.hpp" #include "linalg/vector.hpp" namespace palace @@ -18,32 +18,33 @@ namespace palace // Chebyshev smoothers and one-sided V-cycles, arXiv:2210.03179v1 (2022) for reference on // the 4th-kind Chebyshev polynomial smoother. // -class ChebyshevSmoother : public mfem::Solver +template +class ChebyshevSmoother : public Solver { private: // Number of smoother iterations and polynomial order. const int pc_it, order; // System matrix (not owned). - const ParOperator *A; + const OperType *A; // Inverse diagonal scaling of the operator. - Vector dinv; + VecType dinv; // Maximum operator eigenvalue for Chebyshev polynomial smoothing. double lambda_max; // Temporary vectors for smoother application. - mutable Vector r, d; + mutable VecType r, d; public: ChebyshevSmoother(int smooth_it, int poly_order); - void SetOperator(const Operator &op) override; + void SetOperator(const OperType &op) override; - void Mult(const Vector &x, Vector &y) const override; + void Mult(const VecType &x, VecType &y) const override; - void MultTranspose(const Vector &x, Vector &y) const override + void MultTranspose(const VecType &x, VecType &y) const override { Mult(x, y); // Assumes operator symmetry } diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index db268719f..862ca2371 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -3,17 +3,18 @@ #include "distrelaxation.hpp" +#include #include #include "linalg/chebyshev.hpp" namespace palace { -DistRelaxationSmoother::DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - int smooth_it, int cheby_smooth_it, - int cheby_order) - : mfem::Solver(), pc_it(smooth_it), A(nullptr), A_G(nullptr) +template +DistRelaxationSmoother::DistRelaxationSmoother( + mfem::ParFiniteElementSpace &nd_fespace, mfem::ParFiniteElementSpace &h1_fespace, + int smooth_it, int cheby_smooth_it, int cheby_order) + : Solver(), pc_it(smooth_it), A(nullptr), A_G(nullptr), dbc_tdof_list_G(nullptr) { // Construct discrete gradient matrix for the auxiliary space. { @@ -27,25 +28,29 @@ DistRelaxationSmoother::DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_f } // Initialize smoothers. - B = std::make_unique(cheby_smooth_it, cheby_order); - B_G = std::make_unique(cheby_smooth_it, cheby_order); - B_G->iterative_mode = false; + B = std::make_unique>(cheby_smooth_it, cheby_order); + B_G = std::make_unique>(cheby_smooth_it, cheby_order); + B_G->SetInitialGuess(false); } -void DistRelaxationSmoother::SetOperator(const Operator &op, const Operator &op_G) +template +void DistRelaxationSmoother::SetOperator(const Operator &op, const Operator &op_G) { + typedef typename std::conditional::value, + ComplexParOperator, ParOperator>::type ParOperType; + MFEM_VERIFY(op.Height() == G->Height() && op.Width() == G->Height() && op_G.Height() == G->Width() && op_G.Width() == G->Width(), "Invalid operator sizes for DistRelaxationSmoother!"); - const auto *PtAP = dynamic_cast(&op); - const auto *PtAP_G = dynamic_cast(&op_G); - MFEM_VERIFY(PtAP && PtAP_G, "ChebyshevSmoother requires a ParOperator operator!"); - A = PtAP; - A_G = PtAP_G; - - height = A->Height(); - width = A->Width(); - r.SetSize(height); + A = &op; + A_G = &op_G; + + const auto *PtAP_G = dynamic_cast(&op_G); + MFEM_VERIFY(PtAP_G, + "ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!"); + dbc_tdof_list_G = PtAP_G->GetEssentialTrueDofs(); + + r.SetSize(A->Height()); x_G.SetSize(A_G->Height()); y_G.SetSize(A_G->Height()); @@ -54,55 +59,88 @@ void DistRelaxationSmoother::SetOperator(const Operator &op, const Operator &op_ B_G->SetOperator(*A_G); } -void DistRelaxationSmoother::Mult(const Vector &x, Vector &y) const +namespace +{ + +inline void RealAddMult(Operator &op, const Vector &x, Vector &y) +{ + op.AddMult(x, y, 1.0); +} + +inline void RealAddMult(Operator &op, const ComplexVector &x, ComplexVector &y) +{ + op.AddMult(x.Real(), y.Real(), 1.0); + op.AddMult(x.Imag(), y.Imag(), 1.0); +} + +inline void RealMultTranspose(Operator &op, const Vector &x, Vector &y) +{ + op.MultTranspose(x, y); +} + +inline void RealMultTranspose(Operator &op, const ComplexVector &x, ComplexVector &y) +{ + op.MultTranspose(x.Real(), y.Real()); + op.MultTranspose(x.Imag(), y.Imag()); +} + +} // namespace + +template +void DistRelaxationSmoother::Mult(const VecType &x, VecType &y) const { // Apply smoother. for (int it = 0; it < pc_it; it++) { // y = y + B (x - A y) - B->iterative_mode = (iterative_mode || it > 0); + B->SetInitialGuess(initial_guess || it > 0); B->Mult(x, y); // y = y + G B_G Gᵀ (x - A y) A->Mult(y, r); - subtract(x, r, r); - G->MultTranspose(r, x_G); - if (A_G->GetEssentialTrueDofs()) + linalg::AXPBY(1.0, x, -1.0, r); + RealMultTranspose(*G, r, x_G); + if (dbc_tdof_list_G) { - x_G.SetSubVector(*A_G->GetEssentialTrueDofs(), 0.0); + x_G.SetSubVector(*dbc_tdof_list_G, 0.0); } B_G->Mult(x_G, y_G); - G->AddMult(y_G, y, 1.0); + RealAddMult(*G, y_G, y); } } -void DistRelaxationSmoother::MultTranspose(const Vector &x, Vector &y) const +template +void DistRelaxationSmoother::MultTranspose(const VecType &x, VecType &y) const { // Apply transpose. - B->iterative_mode = true; + B->SetInitialGuess(true); for (int it = 0; it < pc_it; it++) { // y = y + G B_Gᵀ Gᵀ (x - A y) - if (iterative_mode || it > 0) + if (initial_guess || it > 0) { A->Mult(y, r); - subtract(x, r, r); - G->MultTranspose(r, x_G); + linalg::AXPBY(1.0, x, -1.0, r); + RealMultTranspose(*G, r, x_G); } else { y = 0.0; + RealMultTranspose(*G, x, x_G); } - if (A_G->GetEssentialTrueDofs()) + if (dbc_tdof_list_G) { - x_G.SetSubVector(*A_G->GetEssentialTrueDofs(), 0.0); + x_G.SetSubVector(*dbc_tdof_list_G, 0.0); } B_G->MultTranspose(x_G, y_G); - G->AddMult(y_G, y, 1.0); + RealAddMult(*G, y_G, y); // y = y + Bᵀ (x - A y) B->MultTranspose(x, y); } } +template class DistRelaxationSmoother; +template class DistRelaxationSmoother; + } // namespace palace diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index 78c43ee64..1b670c797 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -5,11 +5,19 @@ #define PALACE_LINALG_DIST_RELAXATION_SMOOTHER_HPP #include -#include -#include #include "linalg/operator.hpp" +#include "linalg/solver.hpp" #include "linalg/vector.hpp" +namespace mfem +{ + +template +class Array; +class ParFiniteElementSpace; + +} // namespace mfem + namespace palace { @@ -19,40 +27,42 @@ namespace palace // Reference: Hiptmair, Multigrid method for Maxwell's equations, SIAM J. Numer. Anal. // (1998). // -class DistRelaxationSmoother : public mfem::Solver +template +class DistRelaxationSmoother : public Solver { private: // Number of smoother iterations. const int pc_it; - // System matrix and its projection Gᵀ A G (not owned). - const ParOperator *A, *A_G; + // System matrix and its projection GᵀAG (not owned). + const OperType *A, *A_G; + const mfem::Array *dbc_tdof_list_G; // Discrete gradient matrix. - std::unique_ptr G; + std::unique_ptr G; // Point smoother objects for each matrix. - mutable std::unique_ptr B; - std::unique_ptr B_G; + mutable std::unique_ptr> B; + std::unique_ptr> B_G; // Temporary vectors for smoother application. - mutable Vector r, x_G, y_G; + mutable VecType r, x_G, y_G; public: DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_fespace, mfem::ParFiniteElementSpace &h1_fespace, int smooth_it, int cheby_smooth_it, int cheby_order); - void SetOperator(const Operator &op) override + void SetOperator(const OperType &op) override { MFEM_ABORT("SetOperator with a single operator is not implemented for " "DistRelaxationSmoother, use the two argument signature instead!"); } - void SetOperator(const Operator &op, const Operator &op_G); + void SetOperators(const OperType &op, const OperType &op_G); - void Mult(const Vector &x, Vector &y) const override; + void Mult(const VecType &x, VecType &y) const override; - void MultTranspose(const Vector &x, Vector &y) const override; + void MultTranspose(const VecType &x, VecType &y) const override; }; } // namespace palace diff --git a/palace/linalg/eps.hpp b/palace/linalg/eps.hpp index 34b0d4809..ff0229e2a 100644 --- a/palace/linalg/eps.hpp +++ b/palace/linalg/eps.hpp @@ -6,14 +6,13 @@ #include #include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { -class DivFreeSolver; class ComplexKspSolver; -class ComplexOperator; -class ComplexVector; +class DivFreeSolver; // // Pure abstract base class for solving generalized linear eigenvalue problems problems or diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 5de18df9b..2c537be08 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -3,49 +3,42 @@ #include "gmg.hpp" +#include #include "linalg/chebyshev.hpp" #include "linalg/distrelaxation.hpp" namespace palace { -GeometricMultigridSolver::GeometricMultigridSolver( - std::unique_ptr &&coarse_solver, +template +GeometricMultigridSolver::GeometricMultigridSolver( + std::unique_ptr> &&coarse_solver, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces, int cycle_it, int smooth_it, int cheby_order) - : mfem::Solver(), pc_it(cycle_it) + : Solver(), pc_it(cycle_it) { // Configure levels of geometric coarsening. Multigrid vectors will be configured at first // call to Mult. The multigrid operator size is set based on the finest space dimension. const int n_levels = fespaces.GetNumLevels(); MFEM_VERIFY(n_levels > 0, "Empty finite element space hierarchy during multigrid solver setup!"); - A_.resize(n_levels, nullptr); - P_.resize(n_levels - 1, nullptr); - x_.resize(n_levels, Vector()); - y_.resize(n_levels, Vector()); - r_.resize(n_levels, Vector()); - xrefs_.resize(n_levels, std::vector()); - yrefs_.resize(n_levels, std::vector()); - rrefs_.resize(n_levels, std::vector()); - X_.resize(n_levels, mfem::Array()); - Y_.resize(n_levels, mfem::Array()); - R_.resize(n_levels, mfem::Array()); + A.resize(n_levels, nullptr); + P.resize(n_levels - 1, nullptr); + dbc_tdof_lists.resize(n_levels - 1, nullptr); + X.resize(n_levels, Vector()); + Y.resize(n_levels, Vector()); + R.resize(n_levels, Vector()); // Configure prolongation operators. for (int l = 0; l < n_levels - 1; l++) { - const auto *PtAP_l = - dynamic_cast(fespaces.GetProlongationAtLevel(l)); - MFEM_VERIFY(PtAP_l, - "GeometricMultigridSolver requires ParOperator prolongation operators!"); - P_[l] = PtAP_l; + P_[l] = fespaces.GetProlongationAtLevel(l); } // Use the supplied level 0 (coarse) solver. - B_.reserve(n_levels); - B_.push_back(std::move(coarse_solver)); + B.reserve(n_levels); + B.push_back(std::move(coarse_solver)); // Configure level smoothers. Use distributive relaxation smoothing if an auxiliary // finite element space was provided. @@ -53,7 +46,7 @@ GeometricMultigridSolver::GeometricMultigridSolver( { for (int l = 1; l < n_levels; l++) { - B_.push_back(std::make_unique( + B.push_back(std::make_unique>( fespaces.GetFESpaceAtLevel(l), aux_fespaces->GetFESpaceAtLevel(l), smooth_it, 1, cheby_order)); } @@ -62,129 +55,140 @@ GeometricMultigridSolver::GeometricMultigridSolver( { for (int l = 1; l < n_levels; l++) { - B_.push_back(std::make_unique(smooth_it, cheby_order)); + B.push_back(std::make_unique>(smooth_it, cheby_order)); } } } -void GeometricMultigridSolver::SetOperator( - const std::vector> &ops, - const std::vector> *aux_ops) +template +void GeometricMultigridSolver::SetOperator(const OperType &op) { - const int n_levels = static_cast(A_.size()); - MFEM_VERIFY(static_cast(ops.size()) == n_levels && - (!aux_ops || static_cast(aux_ops->size()) == n_levels), - "Invalid number of levels for operators in multigrid solver setup!"); + typedef typename std::conditional::value, + ComplexParOperator, ParOperator>::type ParOperType; + + const auto *mg_op = dynamic_cast *>(&op); + MFEM_VERIFY(mg_op, "GeometricMultigridSolver requires a MultigridOperator argument " + "provided to SetOperator!"); + + const int n_levels = static_cast(A.size()); + MFEM_VERIFY( + mg_op->GetNumLevels() == n_levels && + (!mg_op->HasAuxiliaryOperators() || mg_op->GetNumAuxiliaryLevels() == n_levels), + "Invalid number of levels for operators in multigrid solver setup!"); for (int l = 0; l < n_levels; l++) { - A_[l] = ops[l].get(); - auto *dist_smoother = dynamic_cast(B_[l].get()); + A[l] = &mg_op->GetOperatorAtLevel(l); + MFEM_VERIFY(A[l]->Height() == P[l]->Width() && A[l]->Width() == P[l]->Width(), + "Invalid operator sizes for GeometricMultigridSolver!"); + + const auto *PtAP_l = dynamic_cast(A[l]); + MFEM_VERIFY( + PtAP_l, + "GeometricMultigridSolver requires ParOperator or ComplexParOperator operators!"); + if (l < n_levels - 1) + { + dbc_tdof_lists[l] = PtAP_l->GetEssentialTrueDofs(); + } + + auto *dist_smoother = dynamic_cast *>(B[l].get()); if (dist_smoother) { - MFEM_VERIFY(aux_ops, "Distributive relaxation smoother relies on both primary space " - "and auxiliary space operators for multigrid smoothing!"); - dist_smoother->SetOperator(*ops[l], *(*aux_ops)[l]); + MFEM_VERIFY(mg_op->HasAuxiliaryOperators(), + "Distributive relaxation smoother relies on both primary space and " + "auxiliary space operators for multigrid smoothing!"); + dist_smoother->SetOperators(*A[l], mg_op->GetAuxiliaryOperatorAtLevel(l)); } else { - B_[l]->SetOperator(*ops[l]); + B[l]->SetOperator(*A[l]); } - } - // Operator size is given by the fine level dimensions. - height = A_.back()->Height(); - width = A_.back()->Width(); + X[l].SetSize(A[l]->Height()); + Y[l].SetSize(A[l]->Height()); + R[l].SetSize(A[l]->Height()); + } } -void GeometricMultigridSolver::ArrayMult(const mfem::Array &X, - mfem::Array &Y) const +template +void GeometricMultigridSolver::Mult(const VecType &x, VecType &y) const { // Initialize. - const int n_levels = static_cast(A_.size()), n_rhs = X.Size(); - MFEM_ASSERT(!iterative_mode, "Geometric multigrid solver does not use iterative_mode!"); + const int n_levels = static_cast(A.size()); + MFEM_ASSERT(!initial_guess, "Geometric multigrid solver does not use initial guess!"); MFEM_ASSERT(n_levels > 1 || pc_it == 1, "Single-level geometric multigrid will not work with multiple iterations!"); - if (n_rhs * height != x_.back().Size()) - { - for (int l = 0; l < n_levels; l++) - { - MFEM_ASSERT(A_[l], "Missing operator for geometric multigrid level " << l << "!"); - x_[l].SetSize(n_rhs * A_[l]->Height()); - y_[l].SetSize(n_rhs * A_[l]->Height()); - r_[l].SetSize(n_rhs * A_[l]->Height()); - xrefs_[l].resize(n_rhs); - yrefs_[l].resize(n_rhs); - rrefs_[l].resize(n_rhs); - X_[l].SetSize(n_rhs); - Y_[l].SetSize(n_rhs); - R_[l].SetSize(n_rhs); - for (int j = 0; j < n_rhs; j++) - { - xrefs_[l][j].MakeRef(x_[l], j * A_[l]->Height(), A_[l]->Height()); - yrefs_[l][j].MakeRef(y_[l], j * A_[l]->Height(), A_[l]->Height()); - rrefs_[l][j].MakeRef(r_[l], j * A_[l]->Height(), A_[l]->Height()); - X_[l][j] = &xrefs_[l][j]; - Y_[l][j] = &yrefs_[l][j]; - R_[l][j] = &rrefs_[l][j]; - } - } - } - // Apply V-cycle. X_ and Y_ on the finest level just point to X and Y to avoid an extra - // copy. - for (int j = 0; j < n_rhs; j++) - { - X_.back()[j] = const_cast(X[j]); - Y_.back()[j] = Y[j]; - } + // Apply V-cycle. The initial guess for y is zero'd at the first pre-smooth iteration. + X.back() = x; for (int it = 0; it < pc_it; it++) { VCycle(n_levels - 1, (it > 0)); } + y = Y.back(); +} + +namespace +{ + +inline void RealMult(Operator &op, const Vector &x, Vector &y) +{ + op.Mult(x, y); } -void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const +inline void RealMult(Operator &op, const Complex &x, Complex &y) +{ + op.Mult(x.Real(), y.Real()); + op.Mult(x.Imag(), y.Imag()); +} + +inline void RealMultTranspose(Operator &op, const Vector &x, Vector &y) +{ + op.MultTranspose(x, y); +} + +inline void RealMultTranspose(Operator &op, const Complex &x, Complex &y) +{ + op.MultTranspose(x.Real(), y.Real()); + op.MultTranspose(x.Imag(), y.Imag()); +} + +} // namespace + +template +void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const { // Pre-smooth, with zero initial guess (Y = 0 set inside). This is the coarse solve at // level 0. Important to note that the smoothers must respect the iterative_mode flag // correctly (given X, Y, compute Y <- Y + B (X - A Y)) . - const int n_rhs = X_[l].Size(); - B_[l]->iterative_mode = initial_guess; - B_[l]->ArrayMult(X_[l], Y_[l]); + B[l]->SetInitialGuess(initial_guess); + B[l]->Mult(X[l], Y[l]); if (l == 0) { return; } // Compute residual. - A_[l]->ArrayMult(Y_[l], R_[l]); - for (int j = 0; j < n_rhs; j++) - { - subtract(*X_[l][j], *R_[l][j], *R_[l][j]); - } + A[l]->Mult(Y[l], R[l]); + linalg::AXPBY(1.0, X[l], -1.0, R[l]); // Coarse grid correction. - P_[l - 1]->ArrayMultTranspose(R_[l], X_[l - 1]); - if (A_[l - 1]->GetEssentialTrueDofs()) + RealMultTranspose(*P[l - 1], R[l], X[l - 1]); + if (dbc_tdof_lists[l - 1]) { - const mfem::Array &dbc_tdof_list = *A_[l - 1]->GetEssentialTrueDofs(); - for (int j = 0; j < n_rhs; j++) - { - X_[l - 1][j]->SetSubVector(dbc_tdof_list, 0.0); - } + X[l - 1]->SetSubVector(*dbc_tdof_lists[l - 1], 0.0); } VCycle(l - 1, false); // Prolongate and add. - P_[l - 1]->ArrayMult(Y_[l - 1], R_[l]); - for (int j = 0; j < n_rhs; j++) - { - *Y_[l][j] += *R_[l][j]; - } + RealMult(*P[l - 1], Y[l - 1], R[l]); + Y[l] += R[l]; // Post-smooth, with nonzero initial guess. - B_[l]->iterative_mode = true; - B_[l]->ArrayMultTranspose(X_[l], Y_[l]); + B[l]->SetInitialGuess(true); + B[l]->MultTranspose(X[l], Y[l]); } +template class GeometricMultigridSolver; +template class GeometricMultigridSolver; + } // namespace palace diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index 2193d2fe3..768369d2f 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -6,11 +6,20 @@ #include #include -#include #include "linalg/operator.hpp" +#include "linalg/solver.hpp" #include "linalg/vector.hpp" #include "utils/iodata.hpp" +namespace mfem +{ + +template +class Array; +class ParFiniteElementSpaceHierarchy; + +} // namespace mfem + namespace palace { @@ -19,34 +28,35 @@ namespace palace // hierarchy of finite element spaces. Optionally can be configured to use auxiliary space // smoothing at each level. // -class GeometricMultigridSolver : public mfem::Solver +template +class GeometricMultigridSolver : public Solver { private: // Number of V-cycles per preconditioner application. const int pc_it; // System matrices at each multigrid level and prolongation operators (not owned). - std::vector A_, P_; + std::vector A; + std::vector P; + std::vector *> dbc_tdof_lists; - // Smoothers for each level. Coarse level solver is B_[0]. - std::vector> B_; + // Smoothers for each level. Coarse level solver is B[0]. + mutable std::vector>> B; // Temporary vectors for preconditioner application. The type of these is dictated by the // MFEM Operator interface for multiple RHS. - mutable std::vector x_, y_, r_; - mutable std::vector> xrefs_, yrefs_, rrefs_; - mutable std::vector> X_, Y_, R_; + mutable std::vector X, Y, R; // Internal function to perform a single V-cycle iteration. void VCycle(int l, bool initial_guess) const; public: - GeometricMultigridSolver(std::unique_ptr &&coarse_solver, + GeometricMultigridSolver(std::unique_ptr> &&coarse_solver, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces, int cycle_it, int smooth_it, int cheby_order); GeometricMultigridSolver(const IoData &iodata, - std::unique_ptr &&coarse_solver, + std::unique_ptr> &&coarse_solver, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) : GeometricMultigridSolver(std::move(coarse_solver), fespaces, aux_fespaces, @@ -56,25 +66,9 @@ class GeometricMultigridSolver : public mfem::Solver { } - void SetOperator(const Operator &op) override - { - MFEM_ABORT("SetOperator with a single operator is not implemented for " - "GeometricMultigridSolver, use the overloaded SetOperator instead!"); - } - void SetOperator(const std::vector> &ops, - const std::vector> *aux_ops = nullptr); - - void Mult(const Vector &x, Vector &y) const override - { - mfem::Array X(1); - mfem::Array Y(1); - X[0] = &x; - Y[0] = &y; - ArrayMult(X, Y); - } + void SetOperator(const OperType &op) override; - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override; + void Mult(const VecType &x, VecType &y) const override; }; } // namespace palace diff --git a/palace/linalg/jacobi.hpp b/palace/linalg/jacobi.hpp index 2b240d3af..25e4735f0 100644 --- a/palace/linalg/jacobi.hpp +++ b/palace/linalg/jacobi.hpp @@ -12,9 +12,8 @@ namespace palace { // -// Simple Jacobi smoother using a provided diagonal vector, usually the output of -// AssembleDiagonal() which allows for (approximatE) diagonal construction for matrix-free -// operators. +// Simple Jacobi smoother using the diagonal vector from Operator::AssembleDiagonal(), +// which allows for (approximate) diagonal construction for matrix-free operators. // class JacobiSmoother : public mfem::Solver { diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index ae36b6d30..f5721d415 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -3,9 +3,9 @@ #include "ksp.hpp" +#include #include "linalg/amg.hpp" #include "linalg/ams.hpp" -#include "linalg/complex.hpp" #include "linalg/gmg.hpp" #include "linalg/mumps.hpp" #include "linalg/strumpack.hpp" @@ -19,8 +19,9 @@ namespace palace namespace { -std::unique_ptr ConfigureKrylovSolver(MPI_Comm comm, - const IoData &iodata) +template +std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, + const IoData &iodata) { // Configure solver settings as needed based on inputs. config::LinearSolverData::KspType type = iodata.solver.linear.ksp_type; @@ -37,66 +38,95 @@ std::unique_ptr ConfigureKrylovSolver(MPI_Comm comm, type = config::LinearSolverData::KspType::GMRES; } } - mfem::IterativeSolver::PrintLevel print = - mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - if (iodata.problem.verbose > 0) - { - print.Summary(); - if (iodata.problem.verbose > 1) - { - print.Iterations(); - if (iodata.problem.verbose > 2) - { - print.All(); - } - } - } - - // XX TODO: We may want to replace the MFEM Krylov solvers with Hypre ones for performance - // (for examples, Hypre has a COGMRES solver which uses CGS (or CGS2) for - // orthogonalization). These will require some wrappers to allow operability with - // an mfem::Operator operator and mfem::Solver preconditioner. // Create the solver. - std::unique_ptr ksp; + std::unique_ptr> ksp; switch (type) { case config::LinearSolverData::KspType::CG: - ksp = std::make_unique(comm); - break; - case config::LinearSolverData::KspType::MINRES: - ksp = std::make_unique(comm); + ksp = std::make_unique>(comm, iodata.problem.verbose); break; case config::LinearSolverData::KspType::GMRES: { - auto gmres = std::make_unique(comm); - gmres->SetKDim(iodata.solver.linear.max_size); + auto gmres = std::make_unique>(comm, iodata.problem.verbose); + gmres->SetRestartDim(iodata.solver.linear.max_size); ksp = std::move(gmres); } break; case config::LinearSolverData::KspType::FGMRES: { - auto fgmres = std::make_unique(comm); - fgmres->SetKDim(iodata.solver.linear.max_size); + auto fgmres = + std::make_unique>(comm, iodata.problem.verbose); + fgmres->SetRestartDim(iodata.solver.linear.max_size); ksp = std::move(fgmres); } break; case config::LinearSolverData::KspType::BICGSTAB: - ksp = std::make_unique(comm); - break; case config::LinearSolverData::KspType::DEFAULT: case config::LinearSolverData::KspType::INVALID: MFEM_ABORT("Unexpected solver type for Krylov solver configuration!"); break; } - ksp->iterative_mode = iodata.solver.linear.initial_guess; + ksp->SetInitialGuess(iodata.solver.linear.initial_guess); ksp->SetRelTol(iodata.solver.linear.tol); ksp->SetMaxIter(iodata.solver.linear.max_it); - ksp->SetPrintLevel(print); + + // Configure preconditioning side (only for GMRES). + if (iodata.solver.linear.pc_side_type != config::LinearSolverData::SideType::DEFAULT) + { + if (type != config::LinearSolverData::KspType::GMRES) + { + Mpi::Warning( + comm, "Preconditioner side will be ignored for non-GMRES iterative solvers!\n"); + } + else + { + auto *gmres = static_cast>(ksp.get()); + switch (iodata.solver.linear.pc_side_type) + { + case config::LinearSolverData::SideType::LEFT: + gmres->SetPrecSide(GmresSolver::PrecSide::LEFT); + break; + case config::LinearSolverData::SideType::RIGHT: + gmres->SetPrecSide(GmresSolver::PrecSide::RIGHT); + break; + } + } + } + + // Configure orthogonalization method for GMRES/FMGRES. + if (iodata.solver.linear.orthog_type != config::LinearSolverData::OrthogType::DEFAULT) + { + if (type != config::LinearSolverData::KspType::GMRES || + type != config::LinearSolverData::KspType::FGMRES) + { + Mpi::Warning(comm, "Orthogonalization method will be ignored for non-GMRES/FGMRES " + "iterative solvers!\n"); + } + else + { + // Because FGMRES inherits from GMRES, this is OK. + auto *gmres = static_cast>(ksp.get()); + switch (iodata.solver.linear.orthog_type) + { + case config::LinearSolverData::OrthogType::MGS: + gmres->SetOrthogonalization(GmresSolver::OrthogType::MGS); + break; + case config::LinearSolverData::OrthogType::CGS: + gmres->SetOrthogonalization(GmresSolver::OrthogType::CGS); + break; + case config::LinearSolverData::OrthogType::CGS2: + gmres->SetOrthogonalization(GmresSolver::OrthogType::CGS2); + break; + } + } + } + return ksp; } -std::unique_ptr +template +std::unique_ptr> ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) @@ -132,7 +162,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, } int print = iodata.problem.verbose - 1; - // Create the solver. + // Create the real-valued solver first. std::unique_ptr pc; switch (type) { @@ -179,10 +209,13 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, "Solver was not built with MUMPS support, please choose a different solver!"); #endif break; - default: + case config::LinearSolverData::Type::DEFAULT: + case config::LinearSolverData::Type::INVALID: MFEM_ABORT("Unexpected solver type for preconditioner configuration!"); break; } + + // Construct the actual solver, which has the right value type. if (iodata.solver.linear.pc_mg) { // This will construct the multigrid hierarchy using pc as the coarse solver @@ -193,163 +226,73 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, { MFEM_VERIFY(aux_fespaces, "Multigrid with auxiliary space smoothers requires both " "primary space and auxiliary spaces for construction!"); - return std::make_unique(iodata, std::move(pc), fespaces, - aux_fespaces); + return std::make_unique>(iodata, std::move(pc), + fespaces, aux_fespaces); } else { - return std::make_unique(iodata, std::move(pc), fespaces, - nullptr); + return std::make_unique>(iodata, std::move(pc), + fespaces, nullptr); } } else { - return pc; + return std::make_unique>(std::move(pc)); } } -class ComplexBlockDiagonalSolver : public mfem::Solver -{ -private: - std::unique_ptr op_; - -public: - ComplexBlockDiagonalSolver(std::unique_ptr &&op) - : mfem::Solver(2 * op->Height(), 2 * op->Width()), op_(std::move(op)) - { - } - - mfem::Solver &GetSolver() { return *op_; } - - void SetOperator(const Operator &op) override {} - - void Mult(const Vector &x, Vector &y) const override - { - MFEM_ASSERT(x.Size() == 2 * op_->Width() && y.Size() == 2 * op_->Height(), - "Incompatible dimensions for ComplexBlockDiagonalSolver::Mult!"); - Vector xr, xi, yr, yi; - xr.MakeRef(const_cast(x), 0, op_->Width()); - xi.MakeRef(const_cast(x), op_->Width(), op_->Width()); - yr.MakeRef(y, 0, op_->Height()); - yi.MakeRef(y, op_->Height(), op_->Height()); - mfem::Array X(2); - mfem::Array Y(2); - X[0] = &xr; - X[1] = ξ - Y[0] = &yr; - Y[1] = &yi; - op_->ArrayMult(X, Y); - yr.SyncAliasMemory(y); - yi.SyncAliasMemory(y); - } -}; - } // namespace -KspSolver::KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) - : KspSolver(ConfigureKrylovSolver(fespaces.GetFinestFESpace().GetComm(), iodata), - ConfigurePreconditionerSolver(fespaces.GetFinestFESpace().GetComm(), iodata, - fespaces, aux_fespaces)) +template +KspSolver::KspSolver(const IoData &iodata, + mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) + : KspSolver( + ConfigureKrylovSolver(fespaces.GetFinestFESpace().GetComm(), iodata), + ConfigurePreconditionerSolver(fespaces.GetFinestFESpace().GetComm(), + iodata, fespaces, aux_fespaces)) { } -KspSolver::KspSolver(std::unique_ptr &&ksp, - std::unique_ptr &&pc) - : mfem::Solver(), ksp_(std::move(ksp)), pc_(std::move(pc)), ksp_mult(0), ksp_mult_it(0) +template +KspSolver::KspSolver(std::unique_ptr> &&ksp, + std::unique_ptr> &&pc) + : ksp(std::move(ksp)), pc(std::move(pc)), ksp_mult(0), ksp_mult_it(0) { } -void KspSolver::SetOperatorFinalize(const Operator &op) +template +void KspSolver::SetOperators(const OperType &op, const OperType &pc_op) { - // Unset the preconditioner before so that IterativeSolver::SetOperator does not set the - // preconditioner operator again. - ksp_->SetPreconditioner(nullptr); - ksp_->SetOperator(op); - ksp_->SetPreconditioner(*pc_); - height = op.Height(); - width = op.Width(); -} - -void KspSolver::SetOperator(const Operator &op, const Operator &pc_op) -{ - pc_->SetOperator(pc_op); - SetOperatorFinalize(op); -} - -void KspSolver::SetOperator(const Operator &op, - const std::vector> &pc_ops, - const std::vector> *aux_pc_ops) -{ - auto *gmg = dynamic_cast(pc_.get()); - if (gmg) + ksp->SetOperator(op); + const auto *mg_op = dynamic_cast *>(&pc_op); + const auto *mg_pc = dynamic_cast *>(pc.get()); + if (mg_op && !mg_pc) { - gmg->SetOperator(pc_ops, aux_pc_ops); + pc->SetOperator(mg_op->GetFinestOperator()); } else { - pc_->SetOperator(*pc_ops.back()); + pc->SetOperator(pc_op); } - SetOperatorFinalize(op); } -void KspSolver::Mult(const Vector &x, Vector &y) const +template +void KspSolver::Mult(const VecType &x, VecType &y) const { - ksp_->Mult(x, y); - if (!ksp_->GetConverged()) + ksp->Mult(x, y); + if (!ksp->GetConverged()) { Mpi::Warning( - ksp_->GetComm(), + ksp->GetComm(), "Linear solver did not converge, norm(Ax-b)/norm(b) = {:.3e} (norm(b) = {:.3e})!\n", - ksp_->GetFinalRelNorm(), ksp_->GetInitialNorm()); + ksp->GetFinalRes() / ksp->GetInitialRes(), ksp->GetInitialRes()); } ksp_mult++; - ksp_mult_it += ksp_->GetNumIterations(); -} - -ComplexKspSolver::ComplexKspSolver(const IoData &iodata, - mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) - : KspSolver(ConfigureKrylovSolver(fespaces.GetFinestFESpace().GetComm(), iodata), - std::make_unique(ConfigurePreconditionerSolver( - fespaces.GetFinestFESpace().GetComm(), iodata, fespaces, aux_fespaces))) -{ + ksp_mult_it += ksp->GetNumIterations(); } -ComplexKspSolver::ComplexKspSolver(std::unique_ptr &&ksp, - std::unique_ptr &&pc) - : KspSolver(std::move(ksp), std::make_unique(std::move(pc))) -{ -} - -void ComplexKspSolver::SetOperator(const ComplexOperator &op, const Operator &pc_op) -{ - auto &block = static_cast(pc_.get())->GetSolver(); - block.SetOperator(pc_op); - SetOperatorFinalize(op); -} - -void ComplexKspSolver::SetOperator( - const ComplexOperator &op, const std::vector> &pc_ops, - const std::vector> *aux_pc_ops) -{ - auto &block = static_cast(pc_.get())->GetSolver(); - auto *gmg = dynamic_cast(&block); - if (gmg) - { - gmg->SetOperator(pc_ops, aux_pc_ops); - } - else - { - block.SetOperator(*pc_ops.back()); - } - SetOperatorFinalize(op); -} - -void ComplexKspSolver::Mult(const ComplexVector &x, ComplexVector &y) const -{ - KspSolver::Mult(x, y); - y.Sync(); -} +template class KspSolver; +template class KspSolver; } // namespace palace diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp index 214f0ca1e..83196db9f 100644 --- a/palace/linalg/ksp.hpp +++ b/palace/linalg/ksp.hpp @@ -1,96 +1,65 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -#ifndef PALACE_LINALG_KSP_SOLVER_HPP -#define PALACE_LINALG_KSP_SOLVER_HPP +#ifndef PALACE_LINALG_KSP_HPP +#define PALACE_LINALG_KSP_HPP #include -#include -#include +#include +#include "linalg/iterative.hpp" #include "linalg/operator.hpp" -#include "linalg/vector.hpp" +#include "linalg/solver.hpp" + +namespace mfem +{ + +class ParFiniteElementSpaceHierarchy; + +} // namespace mfem namespace palace { -class ComplexParOperator; -class ComplexVector; class IoData; -class KspSolver : public mfem::Solver +// +// Linear solver class composing an iterative solver and preconditioner object. +// +template +class KspSolver { + static_assert(std::is_same::value || + std::is_same::value, + "Solver can only be defined for OperType = Operator or ComplexOperator!"); + protected: + typedef typename std::conditional::value, + ComplexVector, Vector>::type VecType; + // The actual solver and preconditioner objects. - std::unique_ptr ksp_; - std::unique_ptr pc_; + std::unique_ptr> ksp; + std::unique_ptr> pc; // Counters for number of calls to Mult method for linear solves, and cumulative number // of iterations. mutable int ksp_mult, ksp_mult_it; -protected: - KspSolver() : ksp_(nullptr), pc_(nullptr), ksp_mult(0), ksp_mult_it(0) {} - - void SetOperatorFinalize(const Operator &op); - public: KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); - KspSolver(std::unique_ptr &&ksp, - std::unique_ptr &&pc); + KspSolver(std::unique_ptr> &&ksp, + std::unique_ptr> &&pc); int NumTotalMult() const { return ksp_mult; } - int NumTotalMultIter() const { return ksp_mult_it; } - - void SetOperator(const Operator &op) override - { - MFEM_ABORT("SetOperator with a single operator is not implemented for KspSolver, you " - "must specify the preconditioner operator as well!"); - } + int NumTotalMultIterations() const { return ksp_mult_it; } - virtual void SetOperator(const Operator &op, const Operator &pc_op); - virtual void - SetOperator(const Operator &op, const std::vector> &pc_ops, - const std::vector> *pc_aux_ops = nullptr); + void SetOperators(const OperType &op, const OperType &pc_op); - void Mult(const Vector &x, Vector &y) const override; + void Mult(const VecType &x, VecType &y) const; }; -class ComplexKspSolver : public KspSolver -{ -public: - ComplexKspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); - ComplexKspSolver(std::unique_ptr &&ksp, - std::unique_ptr &&pc); - - using KspSolver::SetOperator; - void SetOperator(const Operator &op, const Operator &pc_op) override - { - MFEM_ABORT("SetOperator with a real-valued operator is not implemented for " - "ComplexKspSolver, use the complex-valued signature instead!"); - } - void SetOperator( - const Operator &op, const std::vector> &pc_ops, - const std::vector> *pc_aux_ops = nullptr) override - { - MFEM_ABORT("SetOperator with a real-valued operator is not implemented for " - "ComplexKspSolver, use the complex-valued signature instead!"); - } - - void SetOperator(const ComplexOperator &op, const Operator &pc_op); - void SetOperator(const ComplexOperator &op, - const std::vector> &pc_ops, - const std::vector> *pc_aux_ops = nullptr); - - void Mult(const Vector &x, Vector &y) const override - { - MFEM_ABORT("Mult with a real-valued vector is not implemented for " - "ComplexKspSolver, use the complex-valued signature instead!"); - } - void Mult(const ComplexVector &x, ComplexVector &y) const; -}; +using ComplexKspSolver = KspSolver; } // namespace palace -#endif // PALACE_LINALG_KSP_SOLVER_HPP +#endif // PALACE_LINALG_KSP_HPP diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index f483155a1..a9770efd7 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -11,7 +11,6 @@ #include #include "linalg/divfree.hpp" #include "linalg/ksp.hpp" -#include "linalg/vector.hpp" #include "utils/communication.hpp" static PetscErrorCode __mat_apply_EPS_A0(Mat, Vec, Vec); @@ -161,9 +160,9 @@ PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm // or SVD solvers, namely MATOP_MULT and MATOP_MULT_HERMITIAN_TRANSPOSE (if the matrix // is not Hermitian). Mat A0; - ComplexVector x(A.Height()), y(A.Height()); + PetscInt n = A.Height(); + ComplexVector x(n), y(n); MatShellContext ctx = {A, x, y}; - PetscInt n = A.Height() / 2; PalacePetscCall( MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)&ctx, &A0)); PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_shell)); @@ -302,26 +301,23 @@ void SlepcEigenSolver::SetShiftInvert(PetscScalar s, bool precond) void SlepcEigenSolver::SetOrthogonalization(bool mgs, bool cgs2) { + // The SLEPc default is CGS with refinement if needed. if (mgs || cgs2) { + BV bv = GetBV(); BVOrthogType type; BVOrthogRefineType refine; - PetscReal eta; - BVOrthogBlockType btype; - BV bv = GetBV(); if (mgs) { type = BV_ORTHOG_MGS; - PalacePetscCall(BVGetOrthogonalization(bv, nullptr, &refine, &eta, &btype)); + refine = BV_ORTHOG_REFINE_NEVER; } else // cgs2 { type = BV_ORTHOG_CGS; refine = BV_ORTHOG_REFINE_ALWAYS; - eta = 1.0; - PalacePetscCall(BVGetOrthogonalization(bv, nullptr, nullptr, nullptr, &btype)); } - PalacePetscCall(BVSetOrthogonalization(bv, type, refine, eta, btype)); + PalacePetscCall(BVSetOrthogonalization(bv, type, refine, 1.0, BV_ORTHOG_BLOCK_GS)); } } @@ -546,8 +542,7 @@ void SlepcEPSSolverBase::SetInitialSpace(const ComplexVector &v) PetscInt n; PalacePetscCall(VecGetLocalSize(v0, &n)); - MFEM_VERIFY(v.Size() == 2 * n, - "Invalid size mismatch for provided initial space vector!"); + MFEM_VERIFY(v.Size() == n, "Invalid size mismatch for provided initial space vector!"); PetscScalar *pv0; PalacePetscCall(VecGetArrayWrite(v0, &pv0)); @@ -590,7 +585,7 @@ int SlepcEPSSolverBase::Solve() Mpi::Print(GetComm(), " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", - opInv->NumTotalMult(), opInv->NumTotalMultIter()); + opInv->NumTotalMult(), opInv->NumTotalMultIterations()); } // Compute and store the eigenpair residuals. @@ -618,7 +613,7 @@ void SlepcEPSSolverBase::GetEigenvector(int i, ComplexVector &x) const PetscInt n; PalacePetscCall(VecGetLocalSize(v0, &n)); - MFEM_VERIFY(x.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + MFEM_VERIFY(x.Size() == n, "Invalid size mismatch for provided eigenvector!"); const PetscScalar *pv0; PalacePetscCall(VecGetArrayRead(v0, &pv0)); @@ -663,14 +658,19 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato opK = &K; opM = &M; - PetscInt n = opK->Height() / 2; - PalacePetscCall( - MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); - PalacePetscCall( - MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); - PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A0)); - PalacePetscCall(MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A1)); - PalacePetscCall(EPSSetOperators(eps, A0, A1)); + if (first) + { + PetscInt n = opK->Height(); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A0)); + PalacePetscCall( + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A1)); + PalacePetscCall(EPSSetOperators(eps, A0, A1)); + } if (first && type != ScaleType::NONE) { @@ -757,16 +757,19 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO opC = &C; opM = &M; - PetscInt n = opK->Height() / 2; - PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, - (void *)this, &A0)); - PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, - (void *)this, &A1)); - PalacePetscCall( - MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L0)); - PalacePetscCall( - MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L1)); - PalacePetscCall(EPSSetOperators(eps, A0, A1)); + if (first) + { + PetscInt n = opK->Height(); + PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, + (void *)this, &A0)); + PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, + (void *)this, &A1)); + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L0)); + PalacePetscCall( + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L1)); + PalacePetscCall(EPSSetOperators(eps, A0, A1)); + } if (first && type != ScaleType::NONE) { @@ -825,7 +828,7 @@ void SlepcPEPLinearSolver::SetInitialSpace(const ComplexVector &v) PetscInt n; PalacePetscCall(VecGetLocalSize(v0, &n)); - MFEM_VERIFY(2 * v.Size() == 2 * n, + MFEM_VERIFY(2 * v.Size() == n, "Invalid size mismatch for provided initial space vector!"); PetscScalar *pv0; @@ -848,7 +851,7 @@ void SlepcPEPLinearSolver::GetEigenvector(int i, ComplexVector &x) const PalacePetscCall(EPSGetEigenvector(eps, i, v0, nullptr)); PetscInt n; PalacePetscCall(VecGetLocalSize(v0, &n)); - MFEM_VERIFY(2 * x.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + MFEM_VERIFY(2 * x.Size() == n, "Invalid size mismatch for provided eigenvector!"); const PetscScalar *pv0; PalacePetscCall(VecGetArrayRead(v0, &pv0)); @@ -1047,8 +1050,7 @@ void SlepcPEPSolverBase::SetInitialSpace(const ComplexVector &v) PetscInt n; PalacePetscCall(VecGetLocalSize(v0, &n)); - MFEM_VERIFY(v.Size() == 2 * n, - "Invalid size mismatch for provided initial space vector!"); + MFEM_VERIFY(v.Size() == n, "Invalid size mismatch for provided initial space vector!"); PetscScalar *pv0; PalacePetscCall(VecGetArrayWrite(v0, &pv0)); @@ -1091,7 +1093,7 @@ int SlepcPEPSolverBase::Solve() Mpi::Print(GetComm(), " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", - opInv->NumTotalMult(), opInv->NumTotalMultIter()); + opInv->NumTotalMult(), opInv->NumTotalMultIterations()); } // Compute and store the eigenpair residuals. @@ -1119,7 +1121,7 @@ void SlepcPEPSolverBase::GetEigenvector(int i, ComplexVector &x) const PetscInt n; PalacePetscCall(VecGetLocalSize(v0, &n)); - MFEM_VERIFY(x.Size() == 2 * n, "Invalid size mismatch for provided eigenvector!"); + MFEM_VERIFY(x.Size() == n, "Invalid size mismatch for provided eigenvector!"); const PetscScalar *pv0; PalacePetscCall(VecGetArrayRead(v0, &pv0)); @@ -1166,18 +1168,24 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato opC = &C; opM = &M; - PetscInt n = opK->Height() / 2; - PalacePetscCall( - MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); - PalacePetscCall( - MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); - PalacePetscCall( - MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2)); - PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A0)); - PalacePetscCall(MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A1)); - PalacePetscCall(MatShellSetOperation(A2, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A2)); - Mat A[3] = {A0, A1, A2}; - PalacePetscCall(PEPSetOperators(pep, 3, A)); + if (first) + { + PetscInt n = opK->Height(); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2)); + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A0)); + PalacePetscCall( + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A1)); + PalacePetscCall( + MatShellSetOperation(A2, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A2)); + Mat A[3] = {A0, A1, A2}; + PalacePetscCall(PEPSetOperators(pep, 3, A)); + } if (first && type != ScaleType::NONE) { diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp index 3e2821961..e63de5054 100644 --- a/palace/linalg/slepc.hpp +++ b/palace/linalg/slepc.hpp @@ -15,9 +15,9 @@ #include #include #include -#include "linalg/complex.hpp" #include "linalg/eps.hpp" #include "linalg/operator.hpp" +#include "linalg/vector.hpp" // Forward declarations of SLEPc objects. typedef struct _p_EPS *EPS; diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp index e6eb8fd44..46aaef00f 100644 --- a/palace/utils/configfile.cpp +++ b/palace/utils/configfile.cpp @@ -1532,6 +1532,12 @@ NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::CompressionType, {LinearSolverData::CompressionType::BLR_HODLR, "BLR-HODLR"}, {LinearSolverData::CompressionType::ZFP_BLR_HODLR, "ZFP-BLR-HODLR"}}) +NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::OrthogType, + {{LinearSolverData::OrthogType::INVALID, nullptr}, + {LinearSolverData::OrthogType::MGS, "MGS"}, + {LinearSolverData::OrthogType::CGS, "CGS"}, + {LinearSolverData::OrthogType::CGS2, "CGS2"}, + {LinearSolverData::OrthogType::DEFAULT, "Default"}}) void LinearSolverData::SetUp(json &solver) { @@ -1585,8 +1591,10 @@ void LinearSolverData::SetUp(json &solver) divfree_tol = linear->value("DivFreeTol", divfree_tol); divfree_max_it = linear->value("DivFreeMaxIts", divfree_max_it); - orthog_mgs = linear->value("OrthogUseMGS", orthog_mgs); - orthog_cgs2 = linear->value("OrthogUseCGS2", orthog_cgs2); + orthog_type = linear->value("Orthogonalization", orthog_type); + MFEM_VERIFY( + orthog_type != LinearSolverData::OrthogType::INVALID, + "Invalid value for config[\"Linear\"][\"Orthogonalization\"] in configuration file!"); // Cleanup linear->erase("Type"); @@ -1613,8 +1621,7 @@ void LinearSolverData::SetUp(json &solver) linear->erase("AMSVector"); linear->erase("DivFreeTol"); linear->erase("DivFreeMaxIts"); - linear->erase("OrthogUseMGS"); - linear->erase("OrthogUseCGS2"); + linear->erase("Orthogonalization"); MFEM_VERIFY(linear->empty(), "Found an unsupported configuration file keyword under \"Linear\"!\n" << linear->dump(2)); @@ -1644,8 +1651,7 @@ void LinearSolverData::SetUp(json &solver) // std::cout << "AMSVector: " << ams_vector << '\n'; // std::cout << "DivFreeTol: " << divfree_tol << '\n'; // std::cout << "DivFreeMaxIts: " << divfree_max_it << '\n'; - // std::cout << "OrthogUseMGS: " << orthog_mgs << '\n'; - // std::cout << "OrthogUseCGS2: " << orthog_cgs2 << '\n'; + // std::cout << "Orthogonalization: " << orthog_type << '\n'; } void SolverData::SetUp(json &config) diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp index 5f9f58671..894c8fbe6 100644 --- a/palace/utils/configfile.hpp +++ b/palace/utils/configfile.hpp @@ -818,10 +818,17 @@ struct LinearSolverData // Maximum number of iterations for solving linear systems in divergence-free projector. int divfree_max_it = 100; - // Enable modified Gram-Schmidt orthogonalization instead of classical for GMRES/FGMRES - // Krylov solvers and SLEPc eigenvalue solver. - bool orthog_mgs = false; - bool orthog_cgs2 = false; + // Enable different variants of Gram-Schmidt orthogonalization for GMRES/FGMRES iterative + // solvers and SLEPc eigenvalue solver. + enum class OrthogType + { + MGS, + CGS, + CGS2, + DEFAULT, + INVALID = -1 + }; + OrthogType orthog_type = OrthogType::DEFAULT; void SetUp(json &solver); }; From 4cee83d65ab99023cbc1df6539e4991e58448982 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Wed, 24 May 2023 14:20:35 -0700 Subject: [PATCH 16/41] WIP: Update driver classes for new linear solver functionality --- palace/drivers/basesolver.cpp | 7 ++- palace/drivers/basesolver.hpp | 6 +-- palace/drivers/drivensolver.cpp | 45 +++++++---------- palace/drivers/eigensolver.cpp | 63 +++++++++++------------ palace/drivers/electrostaticsolver.cpp | 15 +++--- palace/drivers/magnetostaticsolver.cpp | 15 +++--- palace/drivers/transientsolver.cpp | 3 +- palace/linalg/curlcurl.cpp | 69 +++++++++++++++----------- palace/linalg/curlcurl.hpp | 19 ++++--- palace/linalg/divfree.cpp | 26 +++++----- palace/linalg/divfree.hpp | 29 +++++++---- palace/models/timeoperator.cpp | 64 +++++++++++++----------- palace/models/timeoperator.hpp | 2 +- palace/models/waveportoperator.cpp | 41 ++++++++------- palace/models/waveportoperator.hpp | 9 ++-- 15 files changed, 213 insertions(+), 200 deletions(-) diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp index e07917e84..5baaea553 100644 --- a/palace/drivers/basesolver.cpp +++ b/palace/drivers/basesolver.cpp @@ -52,10 +52,9 @@ void WriteMetadata(const std::string &post_dir, const json &meta) } // namespace -BaseSolver::BaseSolver(const IoData &iodata_, bool root_, int size, int num_thread, +BaseSolver::BaseSolver(const IoData &iodata, bool root, int size, int num_thread, const char *git_tag) - : iodata(iodata_), post_dir(GetPostDir(iodata_.problem.output)), root(root_), - table(8, 9, 6) + : iodata(iodata), post_dir(GetPostDir(iodata.problem.output)), root(root), table(8, 9, 6) { // Create directory for output. if (root && !std::filesystem::exists(post_dir)) @@ -111,7 +110,7 @@ void BaseSolver::SaveMetadata(const KspSolver &ksp) const { json meta = LoadMetadata(post_dir); meta["LinearSolver"]["TotalSolves"] = ksp.NumTotalMult(); - meta["LinearSolver"]["TotalIts"] = ksp.NumTotalMultIter(); + meta["LinearSolver"]["TotalIts"] = ksp.NumTotalMultIterations(); WriteMetadata(post_dir, meta); } } diff --git a/palace/drivers/basesolver.hpp b/palace/drivers/basesolver.hpp index 444ca5778..5ffc3a5de 100644 --- a/palace/drivers/basesolver.hpp +++ b/palace/drivers/basesolver.hpp @@ -46,9 +46,7 @@ class BaseSolver int p; // Floating point precision for data int w1; // First column width = precision + 7 extra int p1; // Floating point precision for first column - Table(int sp_, int p_, int p1_) : w(sp_ + p_ + 7), sp(sp_), p(p_), w1(p1_ + 7), p1(p1_) - { - } + Table(int sp, int p, int p1_) : w(sp + p + 7), sp(sp), p(p), w1(p1_ + 7), p1(p1_) {} }; const Table table; @@ -72,7 +70,7 @@ class BaseSolver void PostprocessFields(const PostOperator &postop, int step, double time) const; public: - BaseSolver(const IoData &iodata_, bool root_, int size = 0, int num_thread = 0, + BaseSolver(const IoData &iodata, bool root, int size = 0, int num_thread = 0, const char *git_tag = nullptr); virtual ~BaseSolver() = default; diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index 001937666..76cb5f422 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -5,7 +5,6 @@ #include #include -#include "linalg/complex.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" @@ -116,26 +115,21 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in // Because the Dirichlet BC is always homogenous, no special elimination is required on // the RHS. Assemble the linear system for the initial frequency (so we can call // KspSolver::SetOperators). Compute everything at the first frequency step. - std::unique_ptr K = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::STIFFNESS, Operator::DIAG_ONE); - std::unique_ptr C = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); - std::unique_ptr M = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::MASS, Operator::DIAG_ZERO); - std::unique_ptr A2 = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::EXTRA, omega0, Operator::DIAG_ZERO); - std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); - std::unique_ptr A = spaceop.GetComplexSystemMatrix( - 1.0, 1i * omega0, -omega0 * omega0, K.get(), C.get(), M.get(), A2.get()); + auto K = spaceop.GetComplexStiffnessMatrix(Operator::DIAG_ONE); + auto C = spaceop.GetComplexDampingMatrix(Operator::DIAG_ZERO); + auto M = spaceop.GetComplexMassMatrix(Operator::DIAG_ZERO); + auto A2 = spaceop.GetComplexExtraSystemMatrix(omega0, Operator::DIAG_ZERO); + auto Curl = spaceop.GetComplexCurlMatrix(); // Set up the linear solver and set operators for the first frequency step. The // preconditioner for the complex linear system is constructed from a real approximation // to the complex system matrix. - std::vector> P, AuxP; - spaceop.GetPreconditionerMatrix(1.0, omega0, -omega0 * omega0, omega0, P, AuxP); + auto A = spaceop.GetSystemMatrix(1.0, 1i * omega0, -omega0 * omega0, K.get(), C.get(), + M.get(), A2.get()); + auto P = spaceop.GetPreconditionerMatrix(1.0, omega0, -omega0 * omega0, omega0); ComplexKspSolver ksp(iodata, spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); - ksp.SetOperator(*A, P, &AuxP); + ksp.SetOperators(*A, *P); // Set up RHS vector for the incident field at port boundaries, and the vector for the // first frequency step. @@ -158,12 +152,11 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in if (step > step0) { // Update frequency-dependent excitation and operators. - A2 = spaceop.GetComplexSystemMatrix(SpaceOperator::OperatorType::EXTRA, omega, - Operator::DIAG_ZERO); - A = spaceop.GetComplexSystemMatrix(1.0, 1i * omega, -omega * omega, K.get(), C.get(), - M.get(), A2.get()); - spaceop.GetPreconditionerMatrix(1.0, omega, -omega * omega, omega, P, AuxP); - ksp.SetOperator(*A, P, &AuxP); + A2 = spaceop.GetComplexExtraSystemMatrix(omega, Operator::DIAG_ZERO); + A = spaceop.GetSystemMatrix(1.0, 1i * omega, -omega * omega, K.get(), C.get(), + M.get(), A2.get()); + P = spaceop.GetPreconditionerMatrix(1.0, omega, -omega * omega, omega); + ksp.SetOperators(*A, *P); } spaceop.GetExcitationVector(omega, RHS); timer.construct_time += timer.Lap(); @@ -180,8 +173,9 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); - Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n", linalg::Norml2(A->GetComm(), E), - linalg::Norml2(A->GetComm(), RHS)); + Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n", + linalg::Norml2(spaceop.GetComm(), E), + linalg::Norml2(spaceop.GetComm(), RHS)); if (!iodata.solver.driven.only_port_post) { E_elec = postop.GetEFieldEnergy(); @@ -238,7 +232,7 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // Allocate negative curl matrix for postprocessing the B-field and vectors for the // high-dimensional field solution. - std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); + auto Curl = spaceop.GetComplexCurlMatrix(); ComplexVector E(Curl->Width()), B(Curl->Height()); E = std::complex(0.0, 0.0); B = std::complex(0.0, 0.0); @@ -339,8 +333,7 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); - // Mpi::Print(" Sol. ||E|| = {:.6e}\n", linalg::Norml2(A->GetComm(), E)); //XX TODO - // PROM + Mpi::Print(" Sol. ||E|| = {:.6e}\n", linalg::Norml2(spaceop.GetComm(), E)); if (!iodata.solver.driven.only_port_post) { E_elec = postop.GetEFieldEnergy(); diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index a8392d5bc..9a40a78ae 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -5,11 +5,11 @@ #include #include "linalg/arpack.hpp" -#include "linalg/complex.hpp" #include "linalg/divfree.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/slepc.hpp" +#include "linalg/vector.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" #include "models/spaceoperator.hpp" @@ -30,13 +30,10 @@ void EigenSolver::Solve(std::vector> &mesh, // computational range. The damping matrix may be nullptr. timer.Lap(); SpaceOperator spaceop(iodata, mesh); - std::unique_ptr K = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::STIFFNESS, Operator::DIAG_ONE); - std::unique_ptr C = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); - std::unique_ptr M = spaceop.GetComplexSystemMatrix( - SpaceOperator::OperatorType::MASS, Operator::DIAG_ZERO); - std::unique_ptr Curl = spaceop.GetComplexCurlMatrix(); + auto K = spaceop.GetComplexStiffnessMatrix(Operator::DIAG_ONE); + auto C = spaceop.GetComplexDampingMatrix(Operator::DIAG_ZERO); + auto M = spaceop.GetComplexMassMatrix(Operator::DIAG_ZERO); + auto Curl = spaceop.GetComplexCurlMatrix(); SaveMetadata(spaceop.GetNDSpace()); // Configure objects for postprocessing. @@ -82,13 +79,13 @@ void EigenSolver::Solve(std::vector> &mesh, Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); if (C) { - eigen = - std::make_unique(K->GetComm(), iodata.problem.verbose); + eigen = std::make_unique(spaceop.GetComm(), + iodata.problem.verbose); } else { - eigen = - std::make_unique(K->GetComm(), iodata.problem.verbose); + eigen = std::make_unique(spaceop.GetComm(), + iodata.problem.verbose); } #endif } @@ -101,25 +98,29 @@ void EigenSolver::Solve(std::vector> &mesh, { if (!iodata.solver.eigenmode.pep_linear) { - slepc = - std::make_unique(K->GetComm(), iodata.problem.verbose); + slepc = std::make_unique(spaceop.GetComm(), + iodata.problem.verbose); slepc->SetType(slepc::SlepcEigenSolver::Type::TOAR); } else { - slepc = std::make_unique(K->GetComm(), + slepc = std::make_unique(spaceop.GetComm(), iodata.problem.verbose); slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); } } else { - slepc = std::make_unique(K->GetComm(), iodata.problem.verbose); + slepc = std::make_unique(spaceop.GetComm(), + iodata.problem.verbose); slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); } slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); - slepc->SetOrthogonalization(iodata.solver.linear.orthog_mgs, - iodata.solver.linear.orthog_cgs2); + slepc->SetOrthogonalization( + iodata.solver.linear.orthog_type == config::LinearSolverData::OrthogType::MGS || + iodata.solver.linear.orthog_type == + config::LinearSolverData::OrthogType::DEFAULT, + iodata.solver.linear.orthog_type == config::LinearSolverData::OrthogType::CGS2); eigen = std::move(slepc); #endif } @@ -143,20 +144,16 @@ void EigenSolver::Solve(std::vector> &mesh, // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The // constructed matrix just references the real SPD part of the mass matrix (no copy is // performed). Boundary conditions don't need to be eliminated here. - std::unique_ptr Mr; + std::unique_ptr KM; if (iodata.solver.eigenmode.mass_orthog) { // Mpi::Print(" Basis uses M-inner product\n"); - // Mr = std::make_unique( - // std::make_unique(M->LocalOperator().Real(), 1.0), - // M->GetFESpace()); - // eigen->SetBMat(*Mr); + // KM = spaceop.GetInnerProductMatrix(0.0, 1.0, nullptr, M.get()); + // eigen->SetBMat(*KM); Mpi::Print(" Basis uses (K + M)-inner product\n"); - auto KM = std::make_unique(M->LocalOperator().Real(), 1.0); - KM->AddOperator(K->LocalOperator().Real(), 1.0); - Mr = std::make_unique(std::move(KM), M->GetFESpace()); - eigen->SetBMat(*Mr); + KM = spaceop.GetInnerProductMatrix(1.0, 1.0, K.get(), M.get()); + eigen->SetBMat(*KM); } // Construct a divergence-free projector so the eigenvalue solve is performed in the space @@ -204,8 +201,7 @@ void EigenSolver::Solve(std::vector> &mesh, // closest to the specified target, σ. const double target = iodata.solver.eigenmode.target; const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, target); - std::unique_ptr A; - std::vector> P, AuxP; + std::unique_ptr A, P; std::unique_ptr ksp; { Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); @@ -246,14 +242,13 @@ void EigenSolver::Solve(std::vector> &mesh, // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The // preconditioner for complex linear systems is constructed from a real approximation // to the complex system matrix. - A = spaceop.GetComplexSystemMatrix(1.0, 1i * target, -target * target, K.get(), C.get(), - M.get(), nullptr); - - spaceop.GetPreconditionerMatrix(1.0, target, -target * target, target, P, AuxP); + A = spaceop.GetSystemMatrix(1.0, 1i * target, -target * target, K.get(), C.get(), + M.get()); + P = spaceop.GetPreconditionerMatrix(1.0, target, -target * target, target); ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); - ksp->SetOperator(*A, P, &AuxP); + ksp->SetOperators(*A, *P); eigen->SetLinearSolver(*ksp); } timer.construct_time += timer.Lap(); diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp index 15b8e879a..b04ba1006 100644 --- a/palace/drivers/electrostaticsolver.cpp +++ b/palace/drivers/electrostaticsolver.cpp @@ -23,14 +23,13 @@ void ElectrostaticSolver::Solve(std::vector> &mes // dofs. The eliminated matrix is stored in order to construct the RHS vector for nonzero // prescribed BC values. timer.Lap(); - std::vector> K; LaplaceOperator laplaceop(iodata, mesh); - laplaceop.GetStiffnessMatrix(K); + auto K = laplaceop.GetStiffnessMatrix(K); SaveMetadata(laplaceop.GetH1Space()); // Set up the linear solver. KspSolver ksp(iodata, laplaceop.GetH1Spaces()); - ksp.SetOperator(*K.back(), K); + ksp.SetOperators(K, K); // Terminal indices are the set of boundaries over which to compute the capacitance // matrix. Terminal boundaries are aliases for ports. @@ -39,7 +38,7 @@ void ElectrostaticSolver::Solve(std::vector> &mes MFEM_VERIFY(nstep > 0, "No terminal boundaries specified for electrostatic simulation!"); // Right-hand side term and solution vector storage. - Vector RHS(K.back()->Height()); + Vector RHS(K->Height()); std::vector V(nstep); timer.construct_time += timer.Lap(); @@ -63,8 +62,8 @@ void ElectrostaticSolver::Solve(std::vector> &mes timer.solve_time += timer.Lap(); Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n", - linalg::Norml2(K.back()->GetComm(), V[step]), - linalg::Norml2(K.back()->GetComm(), RHS)); + linalg::Norml2(laplaceop.GetComm(), V[step]), + linalg::Norml2(laplaceop.GetComm(), RHS)); timer.postpro_time += timer.Lap(); // Next terminal. @@ -87,7 +86,7 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & // charges from the prescribed voltage to get C directly as: // Q_i = ∫ ρ dV = ∫ ∇ ⋅ (ε E) dV = ∫ (ε E) ⋅ n dS // and C_ij = Q_i/V_j. The energy formulation avoids having to locally integrate E = -∇V. - std::unique_ptr Grad = laplaceop.GetGradMatrix(); + auto Grad = laplaceop.GetGradMatrix(); const std::map> &terminal_sources = laplaceop.GetSources(); int nstep = static_cast(terminal_sources.size()); mfem::DenseMatrix C(nstep), Cm(nstep); @@ -136,7 +135,7 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & } else if (j > i) { - add(V[i], V[j], Vij); + Vector::add(V[i], V[j], Vij); E = 0.0; Grad->AddMult(Vij, E, -1.0); postop.SetEGridFunction(E); diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp index b16609bb0..e52d62f60 100644 --- a/palace/drivers/magnetostaticsolver.cpp +++ b/palace/drivers/magnetostaticsolver.cpp @@ -23,14 +23,13 @@ void MagnetostaticSolver::Solve(std::vector> &mes // handled eliminating the rows and columns of the system matrix for the corresponding // dofs. timer.Lap(); - std::vector> K; CurlCurlOperator curlcurlop(iodata, mesh); - curlcurlop.GetStiffnessMatrix(K); + auto K = curlcurlop.GetStiffnessMatrix(); SaveMetadata(curlcurlop.GetNDSpace()); // Set up the linear solver. KspSolver ksp(iodata, curlcurlop.GetNDSpaces(), &curlcurlop.GetH1Spaces()); - ksp.SetOperator(*K.back(), K); + ksp.SetOperators(K, K); // Terminal indices are the set of boundaries over which to compute the inductance matrix. PostOperator postop(iodata, curlcurlop, "magnetostatic"); @@ -39,7 +38,7 @@ void MagnetostaticSolver::Solve(std::vector> &mes "No surface current boundaries specified for magnetostatic simulation!"); // Source term and solution vector storage. - Vector RHS(K.back()->Height()); + Vector RHS(K->Height()); std::vector A(nstep); timer.construct_time += timer.Lap(); @@ -64,8 +63,8 @@ void MagnetostaticSolver::Solve(std::vector> &mes timer.solve_time += timer.Lap(); Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n", - linalg::Norml2(K.back()->GetComm(), A[step]), - linalg::Norml2(K.back()->GetComm(), RHS)); + linalg::Norml2(curlcurlop.GetComm(), A[step]), + linalg::Norml2(curlcurlop.GetComm(), RHS)); timer.postpro_time += timer.Lap(); // Next source. @@ -89,7 +88,7 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator // Φ_i = ∫ B ⋅ n_j dS // and M_ij = Φ_i/I_j. The energy formulation avoids having to locally integrate B = // ∇ x A. - std::unique_ptr Curl = curlcurlop.GetCurlMatrix(); + auto Curl = curlcurlop.GetCurlMatrix(); const SurfaceCurrentOperator &surf_j_op = curlcurlop.GetSurfaceCurrentOp(); int nstep = static_cast(surf_j_op.Size()); mfem::DenseMatrix M(nstep), Mm(nstep); @@ -143,7 +142,7 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator } else if (j > i) { - add(A[i], A[j], Aij); + Vector::add(A[i], A[j], Aij); Curl->Mult(Aij, B); postop.SetBGridFunction(B); double Um = postop.GetHFieldEnergy(); diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp index bcd821b51..8dc1c814d 100644 --- a/palace/drivers/transientsolver.cpp +++ b/palace/drivers/transientsolver.cpp @@ -106,8 +106,7 @@ void TransientSolver::Solve(std::vector> &mesh, postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp()); Mpi::Print(" Sol. ||E|| = {:.6e}, ||B|| = {:.6e}\n", - linalg::Norml2(mesh.back()->GetComm(), E), - linalg::Norml2(mesh.back()->GetComm(), B)); + linalg::Norml2(spaceop.GetComm(), E), linalg::Norml2(spaceop.GetComm(), B)); if (!iodata.solver.transient.only_port_post) { E_elec = postop.GetEFieldEnergy(); diff --git a/palace/linalg/curlcurl.cpp b/palace/linalg/curlcurl.cpp index 7cb7ae78c..2235aca7f 100644 --- a/palace/linalg/curlcurl.cpp +++ b/palace/linalg/curlcurl.cpp @@ -3,9 +3,11 @@ #include "curlcurl.hpp" +#include #include "fem/coefficient.hpp" #include "linalg/ams.hpp" #include "linalg/gmg.hpp" +#include "linalg/iterative.hpp" #include "models/materialoperator.hpp" namespace palace @@ -17,40 +19,47 @@ CurlCurlMassSolver::CurlCurlMassSolver( const std::vector> &nd_dbc_tdof_lists, const std::vector> &h1_dbc_tdof_lists, double tol, int max_it, int print) - : mfem::Solver(nd_fespaces.GetFinestFESpace().GetTrueVSize()) { constexpr MaterialPropertyType MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; constexpr MaterialPropertyType MatTypeEps = MaterialPropertyType::PERMITTIVITY_REAL; MaterialPropertyCoefficient muinv_func(mat_op); MaterialPropertyCoefficient epsilon_func(mat_op); - for (int s = 0; s < 2; s++) { - auto &A_ = (s == 0) ? A : AuxA; - auto &fespaces = (s == 0) ? nd_fespaces : h1_fespaces; - auto &dbc_tdof_lists = (s == 0) ? nd_dbc_tdof_lists : h1_dbc_tdof_lists; - A_.clear(); - A_.reserve(fespaces.GetNumLevels()); - for (int l = 0; l < fespaces.GetNumLevels(); l++) + auto A_mg = std::make_unique(nd_fespaces.GetNumLevels()); + for (int s = 0; s < 2; s++) { - auto &fespace_l = fespaces.GetFESpaceAtLevel(l); - auto a = std::make_unique(&fespace_l); - if (s == 0) + auto &fespaces = (s == 0) ? nd_fespaces : h1_fespaces; + auto &dbc_tdof_lists = (s == 0) ? nd_dbc_tdof_lists : h1_dbc_tdof_lists; + for (int l = 0; l < fespaces.GetNumLevels(); l++) { - a->AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); - a->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); + auto &fespace_l = fespaces.GetFESpaceAtLevel(l); + auto a = std::make_unique(&fespace_l); + if (s == 0) + { + a->AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); + a->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); + } + else + { + a->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); + } + // XX TODO: Partial assembly option? + a->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + a->Assemble(0); + a->Finalize(0); + auto A_l = std::make_unique(std::move(a), fespace_l); + A_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + if (s == 0) + { + A_mg.AddOperator(std::move(A_l)); + } + else + { + A_mg.AddAuxiliaryOperator(std::move(A_l)); + } } - else - { - a->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); - } - // XX TODO: Partial assembly option? - a->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); - a->Assemble(0); - a->Finalize(0); - A_.push_back(std::make_unique(std::move(a), fespace_l)); - A_.back()->SetEssentialTrueDofs(dbc_tdof_lists[l], - Operator::DiagonalPolicy::DIAG_ONE); } + A = std::move(A_mg); } // The system matrix K + M is real and SPD. We use Hypre's AMS solver as the coarse-level @@ -58,17 +67,17 @@ CurlCurlMassSolver::CurlCurlMassSolver( auto ams = std::make_unique(nd_fespaces.GetFESpaceAtLevel(0), h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, false, false, 0); - auto gmg = std::make_unique(std::move(ams), nd_fespaces, - &h1_fespaces, 1, 1, 2); + auto gmg = std::make_unique>( + std::move(ams), nd_fespaces, &h1_fespaces, 1, 1, 2); - auto pcg = std::make_unique(nd_fespaces.GetFinestFESpace().GetComm()); - pcg->iterative_mode = false; + auto pcg = + std::make_unique>(nd_fespaces.GetFinestFESpace().GetComm(), print); + pcg->SetInitialGuess(false); pcg->SetRelTol(tol); pcg->SetMaxIter(max_it); - pcg->SetPrintLevel(print); ksp = std::make_unique(std::move(pcg), std::move(gmg)); - ksp->SetOperator(*A.back(), A, &AuxA); + ksp->SetOperators(*A, *A); } } // namespace palace diff --git a/palace/linalg/curlcurl.hpp b/palace/linalg/curlcurl.hpp index 684546581..e07513932 100644 --- a/palace/linalg/curlcurl.hpp +++ b/palace/linalg/curlcurl.hpp @@ -6,26 +6,32 @@ #include #include -#include -#include "linalg/complex.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" +namespace mfem +{ + +template +class Array; +class ParFiniteElementSpaceHierarchy; + +} // namespace mfem + namespace palace { class MaterialOperator; -class KspSolver; // // This solver implements a solver for the operator K + M in a Nedelec space. // -class CurlCurlMassSolver : public mfem::Solver +class CurlCurlMassSolver { private: // H(curl) norm operator A = K + M and its projection Gᵀ A G. - std::vector> A, AuxA; + std::unique_ptr A; // Linear solver for the linear system A y = x; std::unique_ptr ksp; @@ -38,9 +44,8 @@ class CurlCurlMassSolver : public mfem::Solver const std::vector> &h1_dbc_tdof_lists, double tol, int max_it, int print); - void SetOperator(const Operator &op) override {} + void Mult(const Vector &x, Vector &y) const { ksp->Mult(x, y); } - void Mult(const Vector &x, Vector &y) const override { ksp->Mult(x, y); } void Mult(const ComplexVector &x, ComplexVector &y) { Mult(x.Real(), y.Real()); diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index d9118c022..33d85dac9 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -4,9 +4,11 @@ #include "divfree.hpp" #include +#include #include "fem/coefficient.hpp" #include "linalg/amg.hpp" #include "linalg/gmg.hpp" +#include "linalg/iterative.hpp" #include "models/materialoperator.hpp" namespace palace @@ -17,13 +19,11 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, const std::vector> &h1_bdr_tdof_lists, double tol, int max_it, int print) - : mfem::Solver(nd_fespace.GetTrueVSize()) { constexpr MaterialPropertyType MatType = MaterialPropertyType::PERMITTIVITY_REAL; MaterialPropertyCoefficient epsilon_func(mat_op); { - M.clear(); - M.reserve(h1_fespaces.GetNumLevels()); + auto M_mg = std::make_unique(h1_fespaces.GetNumLevels()); for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) { auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); @@ -33,10 +33,11 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, m->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); m->Assemble(0); m->Finalize(0); - M.push_back(std::make_unique(std::move(m), h1_fespace_l)); - M.back()->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], - Operator::DiagonalPolicy::DIAG_ONE); + auto M_l = std::make_unique(std::move(m), h1_fespace_l); + M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + M_mg.AddOperator(std::move(M_l)); } + M = std::move(M_mg); } { // XX TODO: Partial assembly option? @@ -61,22 +62,23 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, Grad = std::make_unique(std::move(grad), h1_fespaces.GetFinestFESpace(), nd_fespace, true); } + dbc_tdof_list_M = &h1_dbc_tdof_lists.back(); // The system matrix for the projection is real and SPD. For the coarse-level AMG solve, // we don't use an exact solve on the coarsest level. auto amg = std::make_unique(1, 1, 0); - auto gmg = std::make_unique(std::move(amg), h1_fespaces, - nullptr, 1, 1, 2); + auto gmg = std::make_unique>( + std::move(amg), h1_fespaces, nullptr, 1, 1, 2); - auto pcg = std::make_unique(h1_fespaces.GetFinestFESpace().GetComm()); - pcg->iterative_mode = false; + auto pcg = + std::make_unique>(h1_fespaces.GetFinestFESpace().GetComm(), print); + pcg->SetInitialGuess(false); pcg->SetRelTol(tol); pcg->SetAbsTol(std::numeric_limits::epsilon()); pcg->SetMaxIter(max_it); - pcg->SetPrintLevel(print); ksp = std::make_unique(std::move(pcg), std::move(gmg)); - ksp->SetOperator(*M.back(), M); + ksp->SetOperators(*M, *M); psi.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); rhs.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp index bf400df6c..c74a95036 100644 --- a/palace/linalg/divfree.hpp +++ b/palace/linalg/divfree.hpp @@ -6,29 +6,35 @@ #include #include -#include -#include "linalg/complex.hpp" #include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" +namespace mfem +{ + +template +class Array; +class ParFiniteElementSpaceHierarchy; + +} // namespace mfem + namespace palace { class MaterialOperator; -class KspSolver; // // This solver implements a projection onto a divergence-free space satisfying Gᵀ M x = 0, // where G represents the discrete gradient matrix with columns spanning the nullspace of // the curl-curl operator. // -class DivFreeSolver : public mfem::Solver +class DivFreeSolver { private: // Operators for the divergence-free projection. - std::unique_ptr WeakDiv, Grad; - std::vector> M; + std::unique_ptr WeakDiv, Grad, M; + const mfem::Array *dbc_tdof_list_M; // Linear solver for the projected linear system (Gᵀ M G) y = x. std::unique_ptr ksp; @@ -42,8 +48,6 @@ class DivFreeSolver : public mfem::Solver const std::vector> &h1_bdr_tdof_lists, double tol, int max_it, int print); - void SetOperator(const Operator &op) override {} - // Given a vector of Nedelec dofs for an arbitrary vector field, compute the Nedelec dofs // of the irrotational portion of this vector field. The resulting vector will satisfy // ∇ x y = 0. @@ -53,25 +57,28 @@ class DivFreeSolver : public mfem::Solver WeakDiv->Mult(y, rhs); // Apply essential BC and solve the linear system. - if (M.back()->GetEssentialTrueDofs()) + if (dbc_tdof_list_M) { - rhs.SetSubVector(*M.back()->GetEssentialTrueDofs(), 0.0); + rhs.SetSubVector(*dbc_tdof_list_M, 0.0); } ksp->Mult(rhs, psi); // Compute the irrotational portion of y and subtract. Grad->AddMult(psi, y, 1.0); } - void Mult(const Vector &x, Vector &y) const override + + void Mult(const Vector &x, Vector &y) const { y = x; Mult(y); } + void Mult(ComplexVector &y) const { Mult(y.Real()); Mult(y.Imag()); } + void Mult(const ComplexVector &x, ComplexVector &y) const { y = x; diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index ff37ab041..6651a7cc7 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -4,8 +4,10 @@ #include "timeoperator.hpp" #include +#include "linalg/iterative.hpp" #include "linalg/jacobi.hpp" #include "linalg/ksp.hpp" +#include "linalg/solver.hpp" #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" @@ -19,8 +21,11 @@ namespace class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOperator { public: + // MPI communicator. + MPI_comm comm; + // System matrices and excitation RHS. - std::unique_ptr K, M, C; + std::unique_ptr K, M, C; Vector NegJ; // Time dependence of current pulse for excitation: -J'(t) = -g'(t) J. This function @@ -30,29 +35,27 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Internal objects for solution of linear systems during time stepping. double a0_, a1_; std::unique_ptr kspM, kspA; - std::unique_ptr A; - std::vector> B, AuxB; + std::unique_ptr A, B; mutable Vector RHS; // Bindings to SpaceOperator functions to get the system matrix and preconditioner, and // construct the linear solver. - std::function(double a0, double a1)> ConfigureLinearSolver; + std::function ConfigureLinearSolver; public: TimeDependentCurlCurlOperator(const IoData &iodata, SpaceOperator &spaceop, std::function &djcoef, double t0, mfem::TimeDependentOperator::Type type) : mfem::SecondOrderTimeDependentOperator(spaceop.GetNDSpace().GetTrueVSize(), t0, type), - dJcoef(djcoef) + comm(spaceop.GetComm()), dJcoef(djcoef) { // Construct the system matrices defining the linear operator. PEC boundaries are // handled simply by setting diagonal entries of the mass matrix for the corresponding // dofs. Because the Dirichlet BC is always homogenous, no special elimination is // required on the RHS. Diagonal entries are set in M (so M is non-singular). - K = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::STIFFNESS, - Operator::DIAG_ZERO); - C = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::DAMPING, Operator::DIAG_ZERO); - M = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::MASS, Operator::DIAG_ONE); + K = spaceop.GetStiffnessMatrix(Operator::DIAG_ZERO); + C = spaceop.GetDampingMatrix(Operator::DIAG_ZERO); + M = spaceop.GetMassMatrix(Operator::DIAG_ONE); // Set up RHS vector for the current source term: -g'(t) J, where g(t) handles the time // dependence. @@ -61,33 +64,33 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Set up linear solvers. { - // PCG with a simple Jacobi preconditioner for mass matrix systems. - auto pcg = std::make_unique(M->GetComm()); - pcg->iterative_mode = iodata.solver.linear.initial_guess; + auto pcg = std::make_unique>(comm, 0); + pcg->SetInitialGuess(iodata.solver.linear.initial_guess); pcg->SetRelTol(iodata.solver.linear.tol); pcg->SetMaxIter(iodata.solver.linear.max_it); - pcg->SetPrintLevel(0); - kspM = - std::make_unique(std::move(pcg), std::make_unique()); - kspM->SetOperator(*M, *M); + auto jac = + std::make_unique>(std::make_unique()); + kspM = std::make_unique(std::move(pcg), std::move(jac)); + kspM->SetOperators(*M, *M); } { // For explicit schemes, recommended to just use cheaper preconditioners. Otherwise, // use AMS or a direct solver. The system matrix is formed as a sequence of matrix // vector products, and is only assembled for preconditioning. - ConfigureLinearSolver = [this, &iodata, - &spaceop](double a0, double a1) -> std::unique_ptr + ConfigureLinearSolver = [this, &iodata, &spaceop](double a0, double a1) { // Configure the system matrix and also the matrix (matrices) from which the // preconditioner will be constructed. A = spaceop.GetSystemMatrix(a0, a1, 1.0, K.get(), C.get(), M.get()); - spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0, B, AuxB); + B = spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0); // Configure the solver. - auto ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), - &spaceop.GetH1Spaces()); - ksp->SetOperator(*A, B, &AuxB); - return ksp; + if (!kspA) + { + kspA = std::make_unique(iodata, spaceop.GetNDSpaces(), + &spaceop.GetH1Spaces()); + } + ksp->SetOperators(*A, *B); }; } } @@ -100,7 +103,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera { C->AddMult(du, rhs, 1.0); } - add(-1.0, rhs, dJcoef(t), NegJ, rhs); + Vector::add(-1.0, rhs, dJcoef(t), NegJ, rhs); } void Mult(const Vector &u, const Vector &du, Vector &ddu) const override @@ -125,7 +128,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera { // Configure the linear solver, including the system matrix and also the matrix // (matrices) from which the preconditioner will be constructed. - kspA = ConfigureLinearSolver(a0, a1); + ConfigureLinearSolver(a0, a1); a0_ = a0; a1_ = a1; k = 0.0; @@ -196,13 +199,14 @@ const KspSolver &TimeOperator::GetLinearSolver() const double TimeOperator::GetMaxTimeStep() const { const auto &curlcurl = dynamic_cast(*op); - const ParOperator &M = *curlcurl.M; - const ParOperator &K = *curlcurl.K; + MPI_comm comm = curlcurl.comm; + const Operator &M = *curlcurl.M; + const Operator &K = *curlcurl.K; // Solver for M⁻¹. constexpr double lin_tol = 1.0e-9; constexpr int max_lin_it = 500; - mfem::CGSolver pcg(M.GetComm()); + mfem::CGSolver pcg(comm); pcg.SetRelTol(lin_tol); pcg.SetMaxIter(max_lin_it); pcg.SetPrintLevel(0); @@ -213,8 +217,8 @@ double TimeOperator::GetMaxTimeStep() const pcg.SetPreconditioner(jac); // Power iteration to estimate largest eigenvalue of undamped system matrix M⁻¹ K. - SymmetricProductOperator op(pcg, K); - double lam = linalg::SpectralNorm(M.GetComm(), op, false); + ProductOperator op(pcg, K); + double lam = linalg::SpectralNorm(comm, op, false); MFEM_VERIFY(lam > 0.0, "Error during power iteration, λ = " << lam << "!"); return 2.0 / std::sqrt(lam); } diff --git a/palace/models/timeoperator.hpp b/palace/models/timeoperator.hpp index ede63b4d1..ee6ad5c21 100644 --- a/palace/models/timeoperator.hpp +++ b/palace/models/timeoperator.hpp @@ -33,7 +33,7 @@ class TimeOperator std::unique_ptr op; // Discrete curl for B-field time integration. - std::unique_ptr Curl; + std::unique_ptr Curl; public: TimeOperator(const IoData &iodata, SpaceOperator &spaceop, diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index 601978ef0..c8398dc59 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -8,12 +8,12 @@ #include "fem/coefficient.hpp" #include "fem/integrator.hpp" #include "linalg/arpack.hpp" +#include "linalg/iterative.hpp" #include "linalg/mumps.hpp" -#include "linalg/operator.hpp" #include "linalg/slepc.hpp" +#include "linalg/solver.hpp" #include "linalg/strumpack.hpp" #include "linalg/superlu.hpp" -#include "linalg/vector.hpp" #include "models/materialoperator.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -463,7 +463,8 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera // Allocate storage for the eigenvalue problem operators. We have sparsity(A2) = // sparsity(B3) = sparsity(B4) ⊆ sparsity(A1). Precompute the frequency independent // contributions to A and B. - P = std::make_unique(*A1); + P = std::make_unique( + std::make_unique(*A1), nullptr); if (A2i) { A = std::make_unique( @@ -510,12 +511,13 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera constexpr int ksp_print = 0; constexpr double ksp_tol = 1.0e-8; constexpr double ksp_max_it = 100; - auto gmres = std::make_unique(nd_fespace.GetComm()); - gmres->iterative_mode = false; + auto gmres = + std::make_unique>(nd_fespace.GetComm(), ksp_print); + gmres->SetInitialGuess(false); gmres->SetRelTol(ksp_tol); gmres->SetMaxIter(ksp_max_it); - gmres->SetKDim(ksp_max_it); - gmres->SetPrintLevel(ksp_print); + gmres->SetRestartDim(ksp_max_it); + // gmres->SetPrecSide(GmresSolver::PrecSide::RIGHT); config::LinearSolverData::Type pc_type; #if defined(MFEM_USE_SUPERLU) @@ -527,14 +529,14 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera #else #error "Wave port solver requires building with SuperLU_DIST, STRUMPACK, or MUMPS!" #endif - std::unique_ptr pc; + std::unique_ptr> pc; if (pc_type == config::LinearSolverData::Type::SUPERLU) { #if defined(MFEM_USE_SUPERLU) auto slu = std::make_unique(nd_fespace.GetComm(), 0, false, ksp_print - 1); slu->GetSolver().SetColumnPermutation(mfem::superlu::NATURAL); - pc = std::move(slu); + pc = std::make_unique>(std::move(slu)); #endif } else if (pc_type == config::LinearSolverData::Type::STRUMPACK) @@ -544,7 +546,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera strumpack::CompressionType::NONE, 0.0, 0, 0, ksp_print - 1); strumpack->SetReorderingStrategy(strumpack::ReorderingStrategy::NATURAL); - pc = std::move(strumpack); + pc = std::make_unique>(std::move(strumpack)); #endif } else // config::LinearSolverData::Type::MUMPS @@ -554,7 +556,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera mfem::MUMPSSolver::SYMMETRIC_INDEFINITE, 0, 0.0, ksp_print - 1); mumps->SetReorderingStrategy(mfem::MUMPSSolver::AMD); - pc = std::move(mumps); + pc = std::make_unique>(std::move(mumps)); #endif } ksp = std::make_unique(std::move(gmres), std::move(pc)); @@ -620,28 +622,29 @@ void WavePortData::Initialize(double omega) // the desired wave port mode. double theta2 = mu_eps_max * omega * omega; { - *P *= 0.0; + auto &Pr = *static_cast(P->Real()); + Pr *= 0.0; - auto &Ar = *static_cast(&A->Real()); - auto &Br = *static_cast(&B->Real()); + auto &Ar = *static_cast(A->Real()); + auto &Br = *static_cast(B->Real()); Ar.Add(-omega * omega + omega0 * omega0, *A2r); Br.Add(-omega * omega + omega0 * omega0, *A2r); Br.Add(1.0 / theta2 - (omega0 == 0.0 ? 0.0 : 1.0 / (mu_eps_max * omega0 * omega0)), *B3); - P->Add(1.0, Br); + Pr.Add(1.0, Br); if (A2i) { - auto &Ai = *static_cast(&A->Imag()); - auto &Bi = *static_cast(&B->Imag()); + auto &Ai = *static_cast(A->Imag()); + auto &Bi = *static_cast(B->Imag()); Ai.Add(-omega * omega + omega0 * omega0, *A2i); Bi.Add(-omega * omega + omega0 * omega0, *A2i); - P->Add(1.0, Bi); + Pr.Add(1.0, Bi); } } // Configure and solve the eigenvalue problem for the desired boundary mode. - ksp->SetOperator(*B, *P); + ksp->SetOperators(*B, *P); eigen->SetOperators(*A, *B, EigenvalueSolver::ScaleType::NONE); eigen->SetInitialSpace(v0); int num_conv = eigen->Solve(); diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp index f95891730..f2a1fc6fe 100644 --- a/palace/models/waveportoperator.hpp +++ b/palace/models/waveportoperator.hpp @@ -8,9 +8,10 @@ #include #include #include -#include "linalg/complex.hpp" #include "linalg/eps.hpp" #include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -40,12 +41,12 @@ class WavePortData // Marker for all boundary attributes making up this port boundary. Mutable because // some MFEM API calls are not const correct. mutable mfem::Array attr_marker; + HYPRE_BigInt attr_tdof_sizes[2]; // Operator storage for repeated boundary mode eigenvalue problem solves. double mu_eps_max; - HYPRE_BigInt attr_tdof_sizes[2]; - std::unique_ptr A2r, A2i, B3, P; - std::unique_ptr A, B; + std::unique_ptr A2r, A2i, B3; + std::unique_ptr A, B, P; ComplexVector v0, e0, e0t, e0n; // Eigenvalue solver for boundary modes. From 09c9ad59bea05fff4411a50739a3e8b6df9f5fc6 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Thu, 25 May 2023 13:48:12 -0700 Subject: [PATCH 17/41] Debugging: Build after complex-valued linear solver updates --- palace/drivers/basesolver.cpp | 6 +- palace/drivers/basesolver.hpp | 4 +- palace/drivers/drivensolver.cpp | 24 +- palace/drivers/eigensolver.cpp | 8 +- palace/drivers/electrostaticsolver.cpp | 8 +- palace/drivers/magnetostaticsolver.cpp | 4 +- palace/fem/coefficient.hpp | 55 +++-- palace/fem/integrator.hpp | 2 +- palace/fem/interpolation.hpp | 129 ----------- palace/fem/interpolator.hpp | 6 +- palace/fem/multigrid.hpp | 1 + palace/linalg/amg.cpp | 2 + palace/linalg/ams.cpp | 2 + palace/linalg/arpack.cpp | 1 - palace/linalg/arpack.hpp | 2 +- palace/linalg/chebyshev.cpp | 13 +- palace/linalg/chebyshev.hpp | 3 + palace/linalg/curlcurl.cpp | 11 +- palace/linalg/distrelaxation.cpp | 8 +- palace/linalg/distrelaxation.hpp | 3 + palace/linalg/divfree.cpp | 8 +- palace/linalg/divfree.hpp | 6 +- palace/linalg/eps.hpp | 2 +- palace/linalg/gmg.cpp | 28 +-- palace/linalg/gmg.hpp | 3 + palace/linalg/iterative.cpp | 79 ++++--- palace/linalg/iterative.hpp | 57 ++++- palace/linalg/ksp.cpp | 49 ++-- palace/linalg/ksp.hpp | 15 +- palace/linalg/mumps.cpp | 2 + palace/linalg/operator.cpp | 251 ++++++++------------ palace/linalg/operator.hpp | 241 ++++++++----------- palace/linalg/rap.cpp | 145 +++++++----- palace/linalg/rap.hpp | 103 ++++----- palace/linalg/slepc.cpp | 1 - palace/linalg/slepc.hpp | 2 +- palace/linalg/solver.hpp | 5 +- palace/linalg/strumpack.cpp | 2 + palace/linalg/superlu.cpp | 1 + palace/linalg/vector.cpp | 57 ++++- palace/linalg/vector.hpp | 18 +- palace/models/curlcurloperator.cpp | 3 +- palace/models/curlcurloperator.hpp | 3 + palace/models/laplaceoperator.cpp | 7 +- palace/models/laplaceoperator.hpp | 2 + palace/models/postoperator.cpp | 18 +- palace/models/postoperator.hpp | 2 +- palace/models/spaceoperator.cpp | 307 +++++++++++++++++-------- palace/models/spaceoperator.hpp | 3 + palace/models/timeoperator.cpp | 11 +- palace/models/timeoperator.hpp | 2 +- palace/models/waveportoperator.cpp | 27 ++- 52 files changed, 905 insertions(+), 847 deletions(-) delete mode 100644 palace/fem/interpolation.hpp diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp index 5baaea553..89fa8433c 100644 --- a/palace/drivers/basesolver.cpp +++ b/palace/drivers/basesolver.cpp @@ -100,7 +100,8 @@ void BaseSolver::SaveMetadata(const mfem::ParFiniteElementSpace &fespace) const } } -void BaseSolver::SaveMetadata(const KspSolver &ksp) const +template +void BaseSolver::SaveMetadata(const SolverType &ksp) const { if (post_dir.length() == 0) { @@ -556,4 +557,7 @@ void BaseSolver::PostprocessFields(const PostOperator &postop, int step, double Mpi::Barrier(); } +template void BaseSolver::SaveMetadata(const KspSolver &) const; +template void BaseSolver::SaveMetadata(const ComplexKspSolver &) const; + } // namespace palace diff --git a/palace/drivers/basesolver.hpp b/palace/drivers/basesolver.hpp index 5ffc3a5de..b7cbe6c8d 100644 --- a/palace/drivers/basesolver.hpp +++ b/palace/drivers/basesolver.hpp @@ -21,7 +21,6 @@ namespace palace { class IoData; -class KspSolver; class PostOperator; class Timer; @@ -79,7 +78,8 @@ class BaseSolver // These methods write different simulation metadata to a JSON file in post_dir. void SaveMetadata(const mfem::ParFiniteElementSpace &fespace) const; - void SaveMetadata(const KspSolver &ksp) const; + template + void SaveMetadata(const SolverType &ksp) const; void SaveMetadata(const Timer &timer) const; }; diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index 76cb5f422..ca2a72d00 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -124,9 +124,11 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in // Set up the linear solver and set operators for the first frequency step. The // preconditioner for the complex linear system is constructed from a real approximation // to the complex system matrix. - auto A = spaceop.GetSystemMatrix(1.0, 1i * omega0, -omega0 * omega0, K.get(), C.get(), - M.get(), A2.get()); - auto P = spaceop.GetPreconditionerMatrix(1.0, omega0, -omega0 * omega0, omega0); + auto A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * omega0, + std::complex(-omega0 * omega0, 0.0), K.get(), + C.get(), M.get(), A2.get()); + auto P = spaceop.GetPreconditionerMatrix(1.0, omega0, -omega0 * omega0, + omega0); ComplexKspSolver ksp(iodata, spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); ksp.SetOperators(*A, *P); @@ -134,8 +136,8 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in // Set up RHS vector for the incident field at port boundaries, and the vector for the // first frequency step. ComplexVector RHS(Curl->Width()), E(Curl->Width()), B(Curl->Height()); - E = std::complex(0.0, 0.0); - B = std::complex(0.0, 0.0); + E = 0.0; + B = 0.0; timer.construct_time += timer.Lap(); // Main frequency sweep loop. @@ -153,9 +155,11 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in { // Update frequency-dependent excitation and operators. A2 = spaceop.GetComplexExtraSystemMatrix(omega, Operator::DIAG_ZERO); - A = spaceop.GetSystemMatrix(1.0, 1i * omega, -omega * omega, K.get(), C.get(), - M.get(), A2.get()); - P = spaceop.GetPreconditionerMatrix(1.0, omega, -omega * omega, omega); + A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * omega, + std::complex(-omega * omega, 0.0), K.get(), + C.get(), M.get(), A2.get()); + P = spaceop.GetPreconditionerMatrix(1.0, omega, -omega * omega, + omega); ksp.SetOperators(*A, *P); } spaceop.GetExcitationVector(omega, RHS); @@ -234,8 +238,8 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // high-dimensional field solution. auto Curl = spaceop.GetComplexCurlMatrix(); ComplexVector E(Curl->Width()), B(Curl->Height()); - E = std::complex(0.0, 0.0); - B = std::complex(0.0, 0.0); + E = 0.0; + B = 0.0; // Configure the PROM operator which performs the parameter space sampling and basis // construction during the offline phase as well as the PROM solution during the online diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 9a40a78ae..0174f9f1b 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -242,9 +242,11 @@ void EigenSolver::Solve(std::vector> &mesh, // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The // preconditioner for complex linear systems is constructed from a real approximation // to the complex system matrix. - A = spaceop.GetSystemMatrix(1.0, 1i * target, -target * target, K.get(), C.get(), - M.get()); - P = spaceop.GetPreconditionerMatrix(1.0, target, -target * target, target); + A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * target, + std::complex(-target * target, 0.0), K.get(), + C.get(), M.get()); + P = spaceop.GetPreconditionerMatrix(1.0, target, -target * target, + target); ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp index b04ba1006..fd4888f0e 100644 --- a/palace/drivers/electrostaticsolver.cpp +++ b/palace/drivers/electrostaticsolver.cpp @@ -24,12 +24,12 @@ void ElectrostaticSolver::Solve(std::vector> &mes // prescribed BC values. timer.Lap(); LaplaceOperator laplaceop(iodata, mesh); - auto K = laplaceop.GetStiffnessMatrix(K); + auto K = laplaceop.GetStiffnessMatrix(); SaveMetadata(laplaceop.GetH1Space()); // Set up the linear solver. KspSolver ksp(iodata, laplaceop.GetH1Spaces()); - ksp.SetOperators(K, K); + ksp.SetOperators(*K, *K); // Terminal indices are the set of boundaries over which to compute the capacitance // matrix. Terminal boundaries are aliases for ports. @@ -55,7 +55,7 @@ void ElectrostaticSolver::Solve(std::vector> &mes // Form and solve the linear system for a prescribed nonzero voltage on the specified // terminal. Mpi::Print("\n"); - laplaceop.GetExcitationVector(idx, *K.back(), V[step], RHS); + laplaceop.GetExcitationVector(idx, *K, V[step], RHS); timer.construct_time += timer.Lap(); ksp.Mult(RHS, V[step]); @@ -135,7 +135,7 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & } else if (j > i) { - Vector::add(V[i], V[j], Vij); + linalg::AXPBYPCZ(1.0, V[i], 1.0, V[j], 0.0, Vij); E = 0.0; Grad->AddMult(Vij, E, -1.0); postop.SetEGridFunction(E); diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp index e52d62f60..38be4ae47 100644 --- a/palace/drivers/magnetostaticsolver.cpp +++ b/palace/drivers/magnetostaticsolver.cpp @@ -29,7 +29,7 @@ void MagnetostaticSolver::Solve(std::vector> &mes // Set up the linear solver. KspSolver ksp(iodata, curlcurlop.GetNDSpaces(), &curlcurlop.GetH1Spaces()); - ksp.SetOperators(K, K); + ksp.SetOperators(*K, *K); // Terminal indices are the set of boundaries over which to compute the inductance matrix. PostOperator postop(iodata, curlcurlop, "magnetostatic"); @@ -142,7 +142,7 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator } else if (j > i) { - Vector::add(A[i], A[j], Aij); + linalg::AXPBYPCZ(1.0, A[i], 1.0, A[j], 0.0, Aij); Curl->Mult(Aij, B); postop.SetBGridFunction(B); double Um = postop.GetHFieldEnergy(); diff --git a/palace/fem/coefficient.hpp b/palace/fem/coefficient.hpp index c5626267f..5b3ede0dc 100644 --- a/palace/fem/coefficient.hpp +++ b/palace/fem/coefficient.hpp @@ -309,7 +309,7 @@ inline double DielectricInterfaceCoefficient::Eval( // Substrate-air interface: 0.5 * t * (ϵ_SA * |E_t|² + 1 / ϵ_MS * |E_n|²) . double Vn = V * nor; - mfem::Vector::add(V, -Vn, nor, V); + V.Add(-Vn, nor); return 0.5 * ts * (epsilon * (V * V) + (Vn * Vn) / epsilon); } @@ -465,8 +465,6 @@ class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctio const MaterialOperator &mat_op; mutable mfem::Vector V; - const mfem::DenseMatrix &GetMaterialProperty(int attr) const; - double GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr); @@ -508,41 +506,50 @@ class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctio } }; -template -inline const mfem::DenseMatrix & -EnergyDensityCoefficient::GetMaterialProperty(int attr) const +template <> +inline double +EnergyDensityCoefficient:: + GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, + int attr) { - return mat_op.GetPermittivityReal(attr); + // Only the real part of the permittivity contributes to the energy (imaginary part + // cancels out in the inner product due to symmetry). + U.real().GetVectorValue(T, ip, V); + double res = mat_op.GetPermittivityReal(attr).InnerProduct(V, V); + U.imag().GetVectorValue(T, ip, V); + res += mat_op.GetPermittivityReal(attr).InnerProduct(V, V); + return 0.5 * res; } -template -inline const mfem::DenseMatrix & -EnergyDensityCoefficient::GetMaterialProperty(int attr) const +template <> +inline double EnergyDensityCoefficient:: + GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, + int attr) { - return mat_op.GetInvPermeability(attr); + U.GetVectorValue(T, ip, V); + return 0.5 * mat_op.GetPermittivityReal(attr).InnerProduct(V, V); } -template -inline double EnergyDensityCoefficient::GetLocalEnergyDensity( - mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr) +template <> +inline double +EnergyDensityCoefficient:: + GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, + int attr) { - // Only the real part of the permittivity contributes to the energy (imaginary part - // cancels out in the inner product due to symmetry). U.real().GetVectorValue(T, ip, V); - double res = GetMaterialProperty(attr).InnerProduct(V, V); + double res = mat_op.GetInvPermeability(attr).InnerProduct(V, V); U.imag().GetVectorValue(T, ip, V); - res += GetMaterialProperty(attr).InnerProduct(V, V); + res += mat_op.GetInvPermeability(attr).InnerProduct(V, V); return 0.5 * res; } -template -inline double EnergyDensityCoefficient::GetLocalEnergyDensity( - mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr) +template <> +inline double EnergyDensityCoefficient:: + GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, + int attr) { U.GetVectorValue(T, ip, V); - return 0.5 * GetMaterialProperty(attr).InnerProduct(V, V); + return 0.5 * mat_op.GetInvPermeability(attr).InnerProduct(V, V); } // Returns the local field evaluated on a boundary element. For internal boundary elements, diff --git a/palace/fem/integrator.hpp b/palace/fem/integrator.hpp index 546c71f84..b96938d9f 100644 --- a/palace/fem/integrator.hpp +++ b/palace/fem/integrator.hpp @@ -104,7 +104,7 @@ class BoundaryLFIntegrator : public mfem::LinearFormIntegrator, fe.CalcShape(ip, shape); double val = ip.weight * Tr.Weight() * Q.Eval(Tr, ip); - mfem::Vector::add(elvect, val, shape, elvect); + elvect.Add(val, shape); } } }; diff --git a/palace/fem/interpolation.hpp b/palace/fem/interpolation.hpp deleted file mode 100644 index f0a9f7fbb..000000000 --- a/palace/fem/interpolation.hpp +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_FEM_INTERPOLATION_HPP -#define PALACE_FEM_INTERPOLATION_HPP - -#include -#include -#include -#include -#include "utils/communication.hpp" -#include "utils/iodata.hpp" - -namespace palace -{ - -// -// A class which wraps MFEM's GSLIB interface for high-order field interpolation. -// -class InterpolationOperator -{ -private: -#if defined(MFEM_USE_GSLIB) - mfem::FindPointsGSLIB op; -#endif - std::vector op_idx; - -public: -#if defined(MFEM_USE_GSLIB) - InterpolationOperator(const IoData &iodata, mfem::ParMesh &mesh) : op(mesh.GetComm()) -#else - InterpolationOperator(const IoData &iodata, mfem::ParMesh &mesh) -#endif - { -#if defined(MFEM_USE_GSLIB) - // Set up probes interpolation. All processes search for all points. - if (iodata.domains.postpro.probe.empty()) - { - return; - } - const double bb_t = 0.1; // MFEM defaults - const double newton_tol = 1.0e-12; - const int npts = static_cast(iodata.domains.postpro.probe.size()); - MFEM_VERIFY( - mesh.Dimension() == mesh.SpaceDimension(), - "Probe postprocessing functionality requires mesh dimension == space dimension!"); - mfem::Vector xyz(npts * mesh.SpaceDimension()); - op_idx.resize(npts); - int i = 0; - for (const auto &[idx, data] : iodata.domains.postpro.probe) - { - // Use default ordering byNODES. - xyz(i) = data.x; - xyz(npts + i) = data.y; - if (mesh.SpaceDimension() == 3) - { - xyz(2 * npts + i) = data.z; - } - op_idx[i++] = idx; - } - op.Setup(mesh, bb_t, newton_tol, npts); - op.FindPoints(xyz, mfem::Ordering::byNODES); - op.SetDefaultInterpolationValue(0.0); - i = 0; - for (const auto &[idx, data] : iodata.domains.postpro.probe) - { - if (op.GetCode()[i++] == 2) - { - Mpi::Warning("Probe {:d} at ({:.3e}, {:.3e}, {:.3e}) m could not be found!\n" - "Using default value 0.0!\n", - idx, iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.x), - iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.y), - iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.z)); - } - } -#else - MFEM_VERIFY(iodata.domains.postpro.probe.empty(), - "InterpolationOperator class requires MFEM_USE_GSLIB!"); -#endif - } - - std::vector ProbeField(const mfem::ParGridFunction &U) - { -#if defined(MFEM_USE_GSLIB) - // Interpolated vector values are returned from GSLIB interpolator byNODES, which we - // transform to byVDIM for output. - const int npts = op.GetCode().Size(); - const int dim = U.VectorDim(); - std::vector vals(npts * dim); - mfem::Vector v(npts * dim); - op.Interpolate(U, v); - for (int d = 0; d < dim; d++) - { - for (int i = 0; i < npts; i++) - { - vals[i * dim + d] = v(d * npts + i); - } - } - return vals; -#else - MFEM_ABORT("InterpolationOperator class requires MFEM_USE_GSLIB!"); - return {}; -#endif - } - - std::vector> ProbeField(const mfem::ParComplexGridFunction &U, - bool has_imaginary) - { - std::vector vr = ProbeField(U.real()); - if (has_imaginary) - { - std::vector vi = ProbeField(U.imag()); - std::vector> vals(vr.size()); - std::transform(vr.begin(), vr.end(), vi.begin(), vals.begin(), - [](double xr, double xi) { return std::complex(xr, xi); }); - return vals; - } - else - { - return std::vector>(vr.begin(), vr.end()); - } - } - - const auto &GetProbes() const { return op_idx; } -}; - -} // namespace palace - -#endif // PALACE_FEM_INTERPOLATION_HPP diff --git a/palace/fem/interpolator.hpp b/palace/fem/interpolator.hpp index f0a9f7fbb..e3d736950 100644 --- a/palace/fem/interpolator.hpp +++ b/palace/fem/interpolator.hpp @@ -1,8 +1,8 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -#ifndef PALACE_FEM_INTERPOLATION_HPP -#define PALACE_FEM_INTERPOLATION_HPP +#ifndef PALACE_FEM_INTERPOLATOR_HPP +#define PALACE_FEM_INTERPOLATOR_HPP #include #include @@ -126,4 +126,4 @@ class InterpolationOperator } // namespace palace -#endif // PALACE_FEM_INTERPOLATION_HPP +#endif // PALACE_FEM_INTERPOLATOR_HPP diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp index f9a70dfaf..78edd63c8 100644 --- a/palace/fem/multigrid.hpp +++ b/palace/fem/multigrid.hpp @@ -8,6 +8,7 @@ #include #include #include "linalg/operator.hpp" +#include "linalg/rap.hpp" namespace palace::utils { diff --git a/palace/linalg/amg.cpp b/palace/linalg/amg.cpp index e303ad0b5..2b8ef4203 100644 --- a/palace/linalg/amg.cpp +++ b/palace/linalg/amg.cpp @@ -3,6 +3,8 @@ #include "amg.hpp" +#include "linalg/rap.hpp" + namespace palace { diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index e8f408eca..72f72d209 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -3,6 +3,8 @@ #include "ams.hpp" +#include "linalg/rap.hpp" + namespace palace { diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp index aa951b589..2c6bec49c 100644 --- a/palace/linalg/arpack.cpp +++ b/palace/linalg/arpack.cpp @@ -19,7 +19,6 @@ #include // clang-format on #include "linalg/divfree.hpp" -#include "linalg/ksp.hpp" #include "utils/communication.hpp" namespace diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp index fc88f14ea..053e0b046 100644 --- a/palace/linalg/arpack.hpp +++ b/palace/linalg/arpack.hpp @@ -10,13 +10,13 @@ #include #include #include "linalg/eps.hpp" +#include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" namespace palace { -class ComplexKspSolver; class DivFreeSolver; namespace arpack diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index e6269e82a..34dff2d40 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -5,6 +5,7 @@ #include #include +#include "linalg/rap.hpp" namespace palace { @@ -26,11 +27,11 @@ void GetDiagonal(const ComplexParOperator &A, ComplexVector &diag) diag = 0.0; if (A.HasReal()) { - A.Real().AssembleDiagonal(diag.Real()); + A.Real()->AssembleDiagonal(diag.Real()); } if (A.HasImag()) { - A.Imag().AssembleDiagonal(diag.Imag()); + A.Imag()->AssembleDiagonal(diag.Imag()); } } @@ -61,8 +62,8 @@ void ChebyshevSmoother::SetOperator(const OperType &op) // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See // mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing: // polynomial versus Gauss-Seidel, JCP (2003). - DiagonalOperator Dinv(dinv); - ProductOperator DinvA(Dinv, *A); + BaseDiagonalOperator Dinv(dinv); + BaseProductOperator DinvA(Dinv, *A); lambda_max = 1.1 * linalg::SpectralNorm(PtAP->GetComm(), DinvA, false); } @@ -122,7 +123,7 @@ inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &d { const double t = DII[i] * RR[i] + DIR[i] * RI[i]; DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]); - DI[i] = sd * DI[i] + sr * t + DI[i] = sd * DI[i] + sr * t; }); } @@ -134,7 +135,7 @@ void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) { - if (initial_guess || it > 0) + if (this->initial_guess || it > 0) { A->Mult(y, r); linalg::AXPBY(1.0, x, -1.0, r); diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index 8b4945142..2f2240715 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -21,6 +21,9 @@ namespace palace template class ChebyshevSmoother : public Solver { +protected: + typedef typename Solver::VecType VecType; + private: // Number of smoother iterations and polynomial order. const int pc_it, order; diff --git a/palace/linalg/curlcurl.cpp b/palace/linalg/curlcurl.cpp index 2235aca7f..184325870 100644 --- a/palace/linalg/curlcurl.cpp +++ b/palace/linalg/curlcurl.cpp @@ -8,6 +8,7 @@ #include "linalg/ams.hpp" #include "linalg/gmg.hpp" #include "linalg/iterative.hpp" +#include "linalg/rap.hpp" #include "models/materialoperator.hpp" namespace palace @@ -51,11 +52,11 @@ CurlCurlMassSolver::CurlCurlMassSolver( A_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); if (s == 0) { - A_mg.AddOperator(std::move(A_l)); + A_mg->AddOperator(std::move(A_l)); } else { - A_mg.AddAuxiliaryOperator(std::move(A_l)); + A_mg->AddAuxiliaryOperator(std::move(A_l)); } } } @@ -64,9 +65,9 @@ CurlCurlMassSolver::CurlCurlMassSolver( // The system matrix K + M is real and SPD. We use Hypre's AMS solver as the coarse-level // multigrid solve. - auto ams = std::make_unique(nd_fespaces.GetFESpaceAtLevel(0), - h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, - false, false, 0); + auto ams = std::make_unique>(std::make_unique( + nd_fespaces.GetFESpaceAtLevel(0), h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, false, + false, 0)); auto gmg = std::make_unique>( std::move(ams), nd_fespaces, &h1_fespaces, 1, 1, 2); diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 862ca2371..ffeb7966c 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -6,6 +6,7 @@ #include #include #include "linalg/chebyshev.hpp" +#include "linalg/rap.hpp" namespace palace { @@ -34,7 +35,8 @@ DistRelaxationSmoother::DistRelaxationSmoother( } template -void DistRelaxationSmoother::SetOperator(const Operator &op, const Operator &op_G) +void DistRelaxationSmoother::SetOperators(const OperType &op, + const OperType &op_G) { typedef typename std::conditional::value, ComplexParOperator, ParOperator>::type ParOperType; @@ -93,7 +95,7 @@ void DistRelaxationSmoother::Mult(const VecType &x, VecType &y) const for (int it = 0; it < pc_it; it++) { // y = y + B (x - A y) - B->SetInitialGuess(initial_guess || it > 0); + B->SetInitialGuess(this->initial_guess || it > 0); B->Mult(x, y); // y = y + G B_G Gᵀ (x - A y) @@ -117,7 +119,7 @@ void DistRelaxationSmoother::MultTranspose(const VecType &x, VecType & for (int it = 0; it < pc_it; it++) { // y = y + G B_Gᵀ Gᵀ (x - A y) - if (initial_guess || it > 0) + if (this->initial_guess || it > 0) { A->Mult(y, r); linalg::AXPBY(1.0, x, -1.0, r); diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index 1b670c797..792fb8b88 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -30,6 +30,9 @@ namespace palace template class DistRelaxationSmoother : public Solver { +protected: + typedef typename Solver::VecType VecType; + private: // Number of smoother iterations. const int pc_it; diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index 33d85dac9..3bc7a4530 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -9,6 +9,7 @@ #include "linalg/amg.hpp" #include "linalg/gmg.hpp" #include "linalg/iterative.hpp" +#include "linalg/rap.hpp" #include "models/materialoperator.hpp" namespace palace @@ -35,7 +36,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, m->Finalize(0); auto M_l = std::make_unique(std::move(m), h1_fespace_l); M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); - M_mg.AddOperator(std::move(M_l)); + M_mg->AddOperator(std::move(M_l)); } M = std::move(M_mg); } @@ -62,11 +63,12 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, Grad = std::make_unique(std::move(grad), h1_fespaces.GetFinestFESpace(), nd_fespace, true); } - dbc_tdof_list_M = &h1_dbc_tdof_lists.back(); + bdr_tdof_list_M = &h1_bdr_tdof_lists.back(); // The system matrix for the projection is real and SPD. For the coarse-level AMG solve, // we don't use an exact solve on the coarsest level. - auto amg = std::make_unique(1, 1, 0); + auto amg = + std::make_unique>(std::make_unique(1, 1, 0)); auto gmg = std::make_unique>( std::move(amg), h1_fespaces, nullptr, 1, 1, 2); diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp index c74a95036..0f721b0d5 100644 --- a/palace/linalg/divfree.hpp +++ b/palace/linalg/divfree.hpp @@ -34,7 +34,7 @@ class DivFreeSolver private: // Operators for the divergence-free projection. std::unique_ptr WeakDiv, Grad, M; - const mfem::Array *dbc_tdof_list_M; + const mfem::Array *bdr_tdof_list_M; // Linear solver for the projected linear system (Gᵀ M G) y = x. std::unique_ptr ksp; @@ -57,9 +57,9 @@ class DivFreeSolver WeakDiv->Mult(y, rhs); // Apply essential BC and solve the linear system. - if (dbc_tdof_list_M) + if (bdr_tdof_list_M) { - rhs.SetSubVector(*dbc_tdof_list_M, 0.0); + rhs.SetSubVector(*bdr_tdof_list_M, 0.0); } ksp->Mult(rhs, psi); diff --git a/palace/linalg/eps.hpp b/palace/linalg/eps.hpp index ff0229e2a..1754b19d8 100644 --- a/palace/linalg/eps.hpp +++ b/palace/linalg/eps.hpp @@ -5,13 +5,13 @@ #define PALACE_LINALG_EPS_HPP #include +#include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" namespace palace { -class ComplexKspSolver; class DivFreeSolver; // diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 2c537be08..b03151d99 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -6,6 +6,7 @@ #include #include "linalg/chebyshev.hpp" #include "linalg/distrelaxation.hpp" +#include "linalg/rap.hpp" namespace palace { @@ -26,14 +27,14 @@ GeometricMultigridSolver::GeometricMultigridSolver( A.resize(n_levels, nullptr); P.resize(n_levels - 1, nullptr); dbc_tdof_lists.resize(n_levels - 1, nullptr); - X.resize(n_levels, Vector()); - Y.resize(n_levels, Vector()); - R.resize(n_levels, Vector()); + X.resize(n_levels, VecType()); + Y.resize(n_levels, VecType()); + R.resize(n_levels, VecType()); // Configure prolongation operators. for (int l = 0; l < n_levels - 1; l++) { - P_[l] = fespaces.GetProlongationAtLevel(l); + P[l] = fespaces.GetProlongationAtLevel(l); } // Use the supplied level 0 (coarse) solver. @@ -66,9 +67,9 @@ void GeometricMultigridSolver::SetOperator(const OperType &op) typedef typename std::conditional::value, ComplexParOperator, ParOperator>::type ParOperType; - const auto *mg_op = dynamic_cast *>(&op); - MFEM_VERIFY(mg_op, "GeometricMultigridSolver requires a MultigridOperator argument " - "provided to SetOperator!"); + const auto *mg_op = dynamic_cast *>(&op); + MFEM_VERIFY(mg_op, "GeometricMultigridSolver requires a MultigridOperator or " + "ComplexMultigridOperator argument provided to SetOperator!"); const int n_levels = static_cast(A.size()); MFEM_VERIFY( @@ -114,7 +115,8 @@ void GeometricMultigridSolver::Mult(const VecType &x, VecType &y) cons { // Initialize. const int n_levels = static_cast(A.size()); - MFEM_ASSERT(!initial_guess, "Geometric multigrid solver does not use initial guess!"); + MFEM_ASSERT(!this->initial_guess, + "Geometric multigrid solver does not use initial guess!"); MFEM_ASSERT(n_levels > 1 || pc_it == 1, "Single-level geometric multigrid will not work with multiple iterations!"); @@ -130,23 +132,23 @@ void GeometricMultigridSolver::Mult(const VecType &x, VecType &y) cons namespace { -inline void RealMult(Operator &op, const Vector &x, Vector &y) +inline void RealMult(const Operator &op, const Vector &x, Vector &y) { op.Mult(x, y); } -inline void RealMult(Operator &op, const Complex &x, Complex &y) +inline void RealMult(const Operator &op, const ComplexVector &x, ComplexVector &y) { op.Mult(x.Real(), y.Real()); op.Mult(x.Imag(), y.Imag()); } -inline void RealMultTranspose(Operator &op, const Vector &x, Vector &y) +inline void RealMultTranspose(const Operator &op, const Vector &x, Vector &y) { op.MultTranspose(x, y); } -inline void RealMultTranspose(Operator &op, const Complex &x, Complex &y) +inline void RealMultTranspose(const Operator &op, const ComplexVector &x, ComplexVector &y) { op.MultTranspose(x.Real(), y.Real()); op.MultTranspose(x.Imag(), y.Imag()); @@ -175,7 +177,7 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const RealMultTranspose(*P[l - 1], R[l], X[l - 1]); if (dbc_tdof_lists[l - 1]) { - X[l - 1]->SetSubVector(*dbc_tdof_lists[l - 1], 0.0); + X[l - 1].SetSubVector(*dbc_tdof_lists[l - 1], 0.0); } VCycle(l - 1, false); diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index 768369d2f..480c46576 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -31,6 +31,9 @@ namespace palace template class GeometricMultigridSolver : public Solver { +protected: + typedef typename Solver::VecType VecType; + private: // Number of V-cycles per preconditioner application. const int pc_it; diff --git a/palace/linalg/iterative.cpp b/palace/linalg/iterative.cpp index 3fbbe0b1b..3ff5fe9aa 100644 --- a/palace/linalg/iterative.cpp +++ b/palace/linalg/iterative.cpp @@ -17,16 +17,16 @@ namespace { template -inline void CheckDot(T dot, std::string msg) +inline void CheckDot(T dot, const char *msg) { - MFEM_ASSERT(std::isfinite(dot) && dot >= 0.0, msg); + MFEM_ASSERT(std::isfinite(dot) && dot >= 0.0, msg << dot << "!"); } template -inline void CheckDot(std::complex dot, std::string msg) +inline void CheckDot(std::complex dot, const char *msg) { - MFEM_ASSERT(std::isfinite(dot.real()) && std::is_finite(dot.imag()) && dot.real() >= 0.0, - msg); + MFEM_ASSERT(std::isfinite(dot.real()) && std::isfinite(dot.imag()) && dot.real() >= 0.0, + msg << dot << "!"); } template @@ -43,6 +43,8 @@ inline constexpr T SafeMin() constexpr int fradix = std::numeric_limits::radix; constexpr int expm = std::numeric_limits::min_exponent; constexpr int expM = std::numeric_limits::max_exponent; + // Note: pow is not constexpr in C++17 so this actually might not return a constexpr for + // all compilers. return std::max(std::pow(fradix, T(expm - 1)), std::pow(fradix, T(1 - expM))); } @@ -60,6 +62,8 @@ inline constexpr T SafeMax() constexpr int fradix = std::numeric_limits::radix; constexpr int expm = std::numeric_limits::min_exponent; constexpr int expM = std::numeric_limits::max_exponent; + // Note: pow is not constexpr in C++17 so this actually might not return a constexpr for + // all compilers. return std::min(std::pow(fradix, T(1 - expm)), std::pow(fradix, T(expM - 1))); } @@ -67,6 +71,10 @@ template inline void GeneratePlaneRotation(const T dx, const T dy, T &cs, T &sn) { // See LAPACK's s/dlartg. + const T safmin = SafeMin(); + const T safmax = SafeMax(); + const T root_min = std::sqrt(safmin); + const T root_max = std::sqrt(safmax / 2); if (dy == 0.0) { cs = 1.0; @@ -79,8 +87,6 @@ inline void GeneratePlaneRotation(const T dx, const T dy, T &cs, T &sn) sn = std::copysign(1.0, dy); return; } - const T root_min = std::sqrt(SafeMin()); - const T root_max = std::sqrt(SafeMax() / 2); T dx1 = std::abs(dx); T dy1 = std::abs(dy); if (dx1 > root_min && dx1 < root_max && dy1 > root_min && dy1 < root_max) @@ -91,7 +97,7 @@ inline void GeneratePlaneRotation(const T dx, const T dy, T &cs, T &sn) } else { - T u = std::min(SafeMax(), std::max(SafeMin(), std::max(dx1, dy1))); + T u = std::min(safmax, std::max(safmin, std::max(dx1, dy1))); T dxs = dx / u; T dys = dy / u; T d = std::sqrt(dxs * dxs + dys * dys); @@ -105,9 +111,11 @@ inline void GeneratePlaneRotation(const std::complex dx, const std::complex &sn) { // Generates a plane rotation so that: - // [ cs sn ] . [ dx ] = [ r ] - // [ -conj(sn) cs ] [ dy ] [ 0 ] + // [ cs sn ] [ dx ] = [ r ] + // [ -conj(sn) cs ] [ dy ] [ 0 ] // where cs is real and cs² + |sn|² = 1. See LAPACK's c/zlartg. + const T safmin = SafeMin(); + const T safmax = SafeMax(); if (dy == 0.0) { cs = 1.0; @@ -127,8 +135,8 @@ inline void GeneratePlaneRotation(const std::complex dx, const std::complex root_min && dy1 < root_max) { @@ -136,15 +144,15 @@ inline void GeneratePlaneRotation(const std::complex dx, const std::complex dys = dy / u; sn = std::conj(dys) / std::sqrt(dys.real() * dys.real() + dys.imag() * dys.imag()); } } return; } - const T root_min = std::sqrt(SafeMin()); - const T root_max = std::sqrt(SafeMax() / 4); + const T root_min = std::sqrt(safmin); + const T root_max = std::sqrt(safmax / 4); T dx1 = std::max(std::abs(dx.real()), std::abs(dx.imag())); T dy1 = std::max(std::abs(dy.real()), std::abs(dy.imag())); if (dx1 > root_min && dx1 < root_max && dy1 > root_min && dy1 < root_max) @@ -152,7 +160,7 @@ inline void GeneratePlaneRotation(const std::complex dx, const std::complex= dz2 * SafeMin()) + if (dx2 >= dz2 * safmin) { cs = std::sqrt(dx2 / dz2); if (dx2 > root_min && dz2 < root_max * 2) @@ -173,12 +181,12 @@ inline void GeneratePlaneRotation(const std::complex dx, const std::complex dys = dy / u, dxs; T dy2 = dys.real() * dys.real() + dys.imag() * dys.imag(), dx2, dz2; if (dx1 / u < root_min) { - T v = std::min(SafeMax(), std::max(SafeMin(), dx1)); + T v = std::min(safmax, std::max(safmin, dx1)); w = v / u; dxs = dx / v; dx2 = dxs.real() * dxs.real() + dxs.imag() * dxs.imag(); @@ -191,7 +199,7 @@ inline void GeneratePlaneRotation(const std::complex dx, const std::complex= dz2 * SafeMin()) + if (dx2 >= dz2 * safmin) { cs = std::sqrt(dx2 / dz2); if (dx2 > root_min && dz2 < root_max * 2) @@ -233,7 +241,7 @@ inline void ApplyPlaneRotation(std::complex &dx, std::complex &dy, const T } // namespace template -IterativeSolver::IterativeSolver(MPI_comm comm, int print) +IterativeSolver::IterativeSolver(MPI_Comm comm, int print) : Solver(), comm(comm), A(nullptr), B(nullptr) { print_opts.Warnings(); @@ -274,7 +282,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const p.SetSize(A->Height()); // Initialize. - if (initial_guess) + if (this->initial_guess) { A->Mult(x, r); linalg::AXPBY(1.0, b, -1.0, r); @@ -293,7 +301,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const z = r; } beta = linalg::Dot(comm, z, r); - CheckDot(beta, "PCG preconditioner is not positive definite: (Br, r) = " << beta << "!"); + CheckDot(beta, "PCG preconditioner is not positive definite: (Br, r) = "); res = initial_res = std::sqrt(std::abs(beta)); eps = std::max(rel_tol * res, abs_tol); converged = (res < eps); @@ -309,8 +317,8 @@ void CgSolver::Mult(const VecType &b, VecType &x) const { if (print_opts.iterations) { - Mpi::Print(comm, "{}{:{}d} iteration, residual (B r, r) = {:.6e}\n", - std::string(tab_width, ' '), it, int_width, beta); + Mpi::Print(comm, "{}{:{}d} iteration, residual ||r||_B = {:.6e}\n", + std::string(tab_width, ' '), it, int_width, res); } if (!it) { @@ -318,12 +326,12 @@ void CgSolver::Mult(const VecType &b, VecType &x) const } else { - linalg::AXPBY(1.0, z, beta / beta_prev, p); + linalg::AXPBY(ScalarType(1.0), z, beta / beta_prev, p); } A->Mult(p, z); denom = linalg::Dot(comm, z, p); - CheckDot(denom, "PCG operator is not positive definite: (Ap, p) = " << denom << "!"); + CheckDot(denom, "PCG operator is not positive definite: (Ap, p) = "); alpha = beta / denom; x.Add(alpha, p); @@ -339,15 +347,14 @@ void CgSolver::Mult(const VecType &b, VecType &x) const z = r; } beta = linalg::Dot(comm, z, r); - CheckDot(beta, - "PCG preconditioner is not positive definite: (Br, r) = " << beta << "!"); + CheckDot(beta, "PCG preconditioner is not positive definite: (Br, r) = "); res = std::sqrt(std::abs(beta)); converged = (res < eps); } if (print_opts.iterations) { - Mpi::Print(comm, "{}{:{}d} iteration, residual (B r, r) = {:.6e}\n", - std::string(tab_width, ' '), it, int_width, beta); + Mpi::Print(comm, "{}{:{}d} iteration, residual ||r||_B = {:.6e}\n", + std::string(tab_width, ' '), it, int_width, res); } if (print_opts.summary || (print_opts.warnings && !converged)) { @@ -372,7 +379,7 @@ void GmresSolver::Initialize() const { if (!V.empty()) { - MFEM_ASSERT(V.Size() == max_dim + 1 && V[0].Size() == A->Height(), + MFEM_ASSERT(V.size() == max_dim + 1 && V[0].Size() == A->Height(), "Repeated solves with GmresSolver should not modify the operator size or " "restart dimension!"); return; @@ -405,7 +412,7 @@ void GmresSolver::Initialize() const } template -void GmresSolver::Mult(const VecType &x, VecType &y) const +void GmresSolver::Mult(const VecType &b, VecType &x) const { // Set up workspace. RealType beta = 0.0, true_beta, eps; @@ -427,7 +434,7 @@ void GmresSolver::Mult(const VecType &x, VecType &y) const // Initialize. if (B && pc_side == PrecSide::LEFT) { - if (initial_guess || restart > 0) + if (this->initial_guess || restart > 0) { A->Mult(x, V[0]); linalg::AXPBY(1.0, b, -1.0, V[0]); @@ -441,7 +448,7 @@ void GmresSolver::Mult(const VecType &x, VecType &y) const } else // !B || pc_side == PrecSide::RIGHT { - if (initial_guess || restart > 0) + if (this->initial_guess || restart > 0) { A->Mult(x, r); linalg::AXPBY(1.0, b, -1.0, r); @@ -453,7 +460,7 @@ void GmresSolver::Mult(const VecType &x, VecType &y) const } } true_beta = linalg::Norml2(comm, r); - CheckDot(true_beta, "GMRES residual norm is not valid: ||Br|| = " << true_beta << "!"); + CheckDot(true_beta, "GMRES residual norm is not valid: beta = "); if (it == 0) { initial_res = true_beta; @@ -552,7 +559,7 @@ void GmresSolver::Mult(const VecType &x, VecType &y) const ApplyPlaneRotation(s[j], s[j + 1], cs[j], sn[j]); beta = std::abs(s[j + 1]); - CheckDot(beta, "GMRES residual norm is not valid: ||Br|| = " << beta << "!"); + CheckDot(beta, "GMRES residual norm is not valid: beta = "); if (beta < eps) { converged = true; diff --git a/palace/linalg/iterative.hpp b/palace/linalg/iterative.hpp index 48beac434..0a17d8da7 100644 --- a/palace/linalg/iterative.hpp +++ b/palace/linalg/iterative.hpp @@ -25,7 +25,7 @@ template class IterativeSolver : public Solver { protected: - typedef typename double RealType; + typedef double RealType; typedef typename std::conditional::value, std::complex, RealType>::type ScalarType; @@ -53,7 +53,7 @@ class IterativeSolver : public Solver mutable int final_it; public: - IterativeSolver(MPI_comm comm, int print); + IterativeSolver(MPI_Comm comm, int print); // Set an indentation for all log printing. void SetTabWidth(int width) { tab_width = width; } @@ -100,11 +100,32 @@ template class CgSolver : public IterativeSolver { protected: + typedef typename Solver::VecType VecType; + typedef typename IterativeSolver::RealType RealType; + typedef typename IterativeSolver::ScalarType ScalarType; + + using IterativeSolver::comm; + using IterativeSolver::print_opts; + using IterativeSolver::int_width; + using IterativeSolver::tab_width; + + using IterativeSolver::rel_tol; + using IterativeSolver::abs_tol; + using IterativeSolver::max_it; + + using IterativeSolver::A; + using IterativeSolver::B; + + using IterativeSolver::converged; + using IterativeSolver::initial_res; + using IterativeSolver::final_res; + using IterativeSolver::final_it; + // Temporary workspace for solve. mutable VecType r, z, p; public: - CgSolver(MPI_comm comm, int print) : IterativeSolver(comm, print) {} + CgSolver(MPI_Comm comm, int print) : IterativeSolver(comm, print) {} void Mult(const VecType &b, VecType &x) const override; }; @@ -129,6 +150,27 @@ class GmresSolver : public IterativeSolver }; protected: + typedef typename Solver::VecType VecType; + typedef typename IterativeSolver::RealType RealType; + typedef typename IterativeSolver::ScalarType ScalarType; + + using IterativeSolver::comm; + using IterativeSolver::print_opts; + using IterativeSolver::int_width; + using IterativeSolver::tab_width; + + using IterativeSolver::rel_tol; + using IterativeSolver::abs_tol; + using IterativeSolver::max_it; + + using IterativeSolver::A; + using IterativeSolver::B; + + using IterativeSolver::converged; + using IterativeSolver::initial_res; + using IterativeSolver::final_res; + using IterativeSolver::final_it; + // Maximum subspace dimension for restarted GMRES. mutable int max_dim; @@ -152,14 +194,14 @@ class GmresSolver : public IterativeSolver // Allocate storage for solve. void Initialize() const; - GmresSolver(MPI_comm comm, int print, bool fgmres) + GmresSolver(MPI_Comm comm, int print, bool fgmres) : IterativeSolver(comm, print), max_dim(-1), orthog_type(OrthogType::MGS), pc_side(fgmres ? PrecSide::RIGHT : PrecSide::LEFT), flexible(fgmres) { } public: - GmresSolver(MPI_comm comm, int print) : GmresSolver(comm, print, false) {} + GmresSolver(MPI_Comm comm, int print) : GmresSolver(comm, print, false) {} // Set the dimension for restart. void SetRestartDim(int dim) { max_dim = dim; } @@ -179,7 +221,10 @@ template class FgmresSolver : public GmresSolver { public: - FgmresSolver(MPI_comm comm, int print) : GmresSolver(comm, print, true) {} + typedef typename GmresSolver::PrecSide PrecSide; + +public: + FgmresSolver(MPI_Comm comm, int print) : GmresSolver(comm, print, true) {} void SetPrecSide(PrecSide side) override { diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index f5721d415..e11b25132 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -56,7 +56,7 @@ std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, case config::LinearSolverData::KspType::FGMRES: { auto fgmres = - std::make_unique>(comm, iodata.problem.verbose); + std::make_unique>(comm, iodata.problem.verbose); fgmres->SetRestartDim(iodata.solver.linear.max_size); ksp = std::move(fgmres); } @@ -81,7 +81,7 @@ std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, } else { - auto *gmres = static_cast>(ksp.get()); + auto *gmres = static_cast *>(ksp.get()); switch (iodata.solver.linear.pc_side_type) { case config::LinearSolverData::SideType::LEFT: @@ -97,7 +97,7 @@ std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, // Configure orthogonalization method for GMRES/FMGRES. if (iodata.solver.linear.orthog_type != config::LinearSolverData::OrthogType::DEFAULT) { - if (type != config::LinearSolverData::KspType::GMRES || + if (type != config::LinearSolverData::KspType::GMRES && type != config::LinearSolverData::KspType::FGMRES) { Mpi::Warning(comm, "Orthogonalization method will be ignored for non-GMRES/FGMRES " @@ -106,7 +106,7 @@ std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, else { // Because FGMRES inherits from GMRES, this is OK. - auto *gmres = static_cast>(ksp.get()); + auto *gmres = static_cast *>(ksp.get()); switch (iodata.solver.linear.orthog_type) { case config::LinearSolverData::OrthogType::MGS: @@ -163,7 +163,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, int print = iodata.problem.verbose - 1; // Create the real-valued solver first. - std::unique_ptr pc; + std::unique_ptr pc0; switch (type) { case config::LinearSolverData::Type::AMS: @@ -171,15 +171,15 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, // space (in which case fespaces.GetNumLevels() == 1). MFEM_VERIFY(aux_fespaces, "AMS solver relies on both primary space " "and auxiliary spaces for construction!"); - pc = std::make_unique(iodata, fespaces.GetFESpaceAtLevel(0), - aux_fespaces->GetFESpaceAtLevel(0), print); + pc0 = std::make_unique(iodata, fespaces.GetFESpaceAtLevel(0), + aux_fespaces->GetFESpaceAtLevel(0), print); break; case config::LinearSolverData::Type::BOOMER_AMG: - pc = std::make_unique(iodata, print); + pc0 = std::make_unique(iodata, print); break; case config::LinearSolverData::Type::SUPERLU: #if defined(MFEM_USE_SUPERLU) - pc = std::make_unique(comm, iodata, print); + pc0 = std::make_unique(comm, iodata, print); #else MFEM_ABORT("Solver was not built with SuperLU_DIST support, please choose a " "different solver!"); @@ -187,7 +187,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, break; case config::LinearSolverData::Type::STRUMPACK: #if defined(MFEM_USE_STRUMPACK) - pc = std::make_unique(comm, iodata, print); + pc0 = std::make_unique(comm, iodata, print); #else MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a " "different solver!"); @@ -195,7 +195,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, break; case config::LinearSolverData::Type::STRUMPACK_MP: #if defined(MFEM_USE_STRUMPACK) - pc = std::make_unique(comm, iodata, print); + pc0 = std::make_unique(comm, iodata, print); #else MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a " "different solver!"); @@ -203,7 +203,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, break; case config::LinearSolverData::Type::MUMPS: #if defined(MFEM_USE_MUMPS) - pc = std::make_unique(comm, iodata, print); + pc0 = std::make_unique(comm, iodata, print); #else MFEM_ABORT( "Solver was not built with MUMPS support, please choose a different solver!"); @@ -216,6 +216,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, } // Construct the actual solver, which has the right value type. + auto pc = std::make_unique>(std::move(pc0)); if (iodata.solver.linear.pc_mg) { // This will construct the multigrid hierarchy using pc as the coarse solver @@ -237,17 +238,17 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, } else { - return std::make_unique>(std::move(pc)); + return pc; } } } // namespace template -KspSolver::KspSolver(const IoData &iodata, - mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) - : KspSolver( +BaseKspSolver::BaseKspSolver(const IoData &iodata, + mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) + : BaseKspSolver( ConfigureKrylovSolver(fespaces.GetFinestFESpace().GetComm(), iodata), ConfigurePreconditionerSolver(fespaces.GetFinestFESpace().GetComm(), iodata, fespaces, aux_fespaces)) @@ -255,17 +256,17 @@ KspSolver::KspSolver(const IoData &iodata, } template -KspSolver::KspSolver(std::unique_ptr> &&ksp, - std::unique_ptr> &&pc) +BaseKspSolver::BaseKspSolver(std::unique_ptr> &&ksp, + std::unique_ptr> &&pc) : ksp(std::move(ksp)), pc(std::move(pc)), ksp_mult(0), ksp_mult_it(0) { } template -void KspSolver::SetOperators(const OperType &op, const OperType &pc_op) +void BaseKspSolver::SetOperators(const OperType &op, const OperType &pc_op) { ksp->SetOperator(op); - const auto *mg_op = dynamic_cast *>(&pc_op); + const auto *mg_op = dynamic_cast *>(&pc_op); const auto *mg_pc = dynamic_cast *>(pc.get()); if (mg_op && !mg_pc) { @@ -278,7 +279,7 @@ void KspSolver::SetOperators(const OperType &op, const OperType &pc_op } template -void KspSolver::Mult(const VecType &x, VecType &y) const +void BaseKspSolver::Mult(const VecType &x, VecType &y) const { ksp->Mult(x, y); if (!ksp->GetConverged()) @@ -292,7 +293,7 @@ void KspSolver::Mult(const VecType &x, VecType &y) const ksp_mult_it += ksp->GetNumIterations(); } -template class KspSolver; -template class KspSolver; +template class BaseKspSolver; +template class BaseKspSolver; } // namespace palace diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp index 83196db9f..efa5e8a21 100644 --- a/palace/linalg/ksp.hpp +++ b/palace/linalg/ksp.hpp @@ -25,8 +25,8 @@ class IoData; // // Linear solver class composing an iterative solver and preconditioner object. // -template -class KspSolver +template +class BaseKspSolver { static_assert(std::is_same::value || std::is_same::value, @@ -45,10 +45,10 @@ class KspSolver mutable int ksp_mult, ksp_mult_it; public: - KspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); - KspSolver(std::unique_ptr> &&ksp, - std::unique_ptr> &&pc); + BaseKspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); + BaseKspSolver(std::unique_ptr> &&ksp, + std::unique_ptr> &&pc); int NumTotalMult() const { return ksp_mult; } int NumTotalMultIterations() const { return ksp_mult_it; } @@ -58,7 +58,8 @@ class KspSolver void Mult(const VecType &x, VecType &y) const; }; -using ComplexKspSolver = KspSolver; +using KspSolver = BaseKspSolver; +using ComplexKspSolver = BaseKspSolver; } // namespace palace diff --git a/palace/linalg/mumps.cpp b/palace/linalg/mumps.cpp index a63422b40..5614c729a 100644 --- a/palace/linalg/mumps.cpp +++ b/palace/linalg/mumps.cpp @@ -5,6 +5,8 @@ #if defined(MFEM_USE_MUMPS) +#include "linalg/rap.hpp" + namespace palace { diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index 17af1dc69..01d5f4fc5 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -58,53 +58,46 @@ Operator *ComplexOperator::Imag() return nullptr; } -void ComplexOperator::MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real, bool zero_imag) const +void ComplexOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const { MFEM_ABORT("Base class ComplexOperator does not implement MultTranspose!"); } -void ComplexOperator::MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real, - bool zero_imag) const +void ComplexOperator::MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const { MFEM_ABORT("Base class ComplexOperator does not implement MultHermitianTranspose!"); } -void ComplexOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a, bool zero_real, - bool zero_imag) const +void ComplexOperator::AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { MFEM_ABORT("Base class ComplexOperator does not implement AddMult!"); } -void ComplexOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, const std::complex a, - bool zero_real, bool zero_imag) const +void ComplexOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { MFEM_ABORT("Base class ComplexOperator does not implement AddMultTranspose!"); } -void ComplexOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, - const std::complex a, - bool zero_real, bool zero_imag) const +void ComplexOperator::AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { MFEM_ABORT("Base class ComplexOperator does not implement AddMultHermitianTranspose!"); } -ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&data_Ar, - std::unique_ptr &&data_Ai, - Operator *Ar, Operator *Ai) - : ComplexOperator(Ar ? Ar->Height() : (Ai ? Ai->Height() : 0), - Ar ? Ar->Width() : (Ai ? Ai->Width() : 0)), - data_Ar(std::move(data_Ar)), data_Ai(std::move(data_Ai)), - Ar(this->data_Ar ? this->data_Ar.get() : Ar), - Ai(this->data_Ai ? this->data_Ai.get() : Ai) +ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&dAr, + std::unique_ptr &&dAi, + Operator *pAr, Operator *pAi) + : ComplexOperator(), data_Ar(std::move(dAr)), data_Ai(std::move(dAi)), + Ar((data_Ar != nullptr) ? data_Ar.get() : pAr), + Ai((data_Ai != nullptr) ? data_Ai.get() : pAi) { MFEM_VERIFY(Ar || Ai, "Cannot construct ComplexWrapperOperator from an empty matrix!"); MFEM_VERIFY((!Ar || !Ai) || (Ar->Height() == Ai->Height() && Ar->Width() == Ai->Width()), "Mismatch in dimension of real and imaginary matrix parts!"); + height = Ar ? Ar->Height() : Ai->Height(); + width = Ar ? Ar->Width() : Ai->Width(); } ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&Ar, @@ -118,9 +111,14 @@ ComplexWrapperOperator::ComplexWrapperOperator(Operator *Ar, Operator *Ai) { } -void ComplexWrapperOperator::Mult(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real, bool zero_imag) const +void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); if (Ar) { if (!zero_real) @@ -150,9 +148,14 @@ void ComplexWrapperOperator::Mult(const Vector &xr, const Vector &xi, Vector &yr } } -void ComplexWrapperOperator::MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real, bool zero_imag) const +void ComplexWrapperOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); if (Ar) { if (!zero_real) @@ -182,10 +185,15 @@ void ComplexWrapperOperator::MultTranspose(const Vector &xr, const Vector &xi, V } } -void ComplexWrapperOperator::MultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, bool zero_real, - bool zero_imag) const +void ComplexWrapperOperator::MultHermitianTranspose(const ComplexVector &x, + ComplexVector &y) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); if (Ar) { if (!zero_real) @@ -215,14 +223,19 @@ void ComplexWrapperOperator::MultHermitianTranspose(const Vector &xr, const Vect } } -void ComplexWrapperOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, const std::complex a, - bool zero_real, bool zero_imag) const +void ComplexWrapperOperator::AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); if (a.real() != 0.0 && a.imag() != 0.0) { ty.SetSize(height); - Mult(xr, xi, ty.Real(), ty.Imag(), zero_real, zero_imag); + Mult(x, ty); const int N = height; const double ar = a.real(); const double ai = a.imag(); @@ -289,15 +302,19 @@ void ComplexWrapperOperator::AddMult(const Vector &xr, const Vector &xi, Vector } } -void ComplexWrapperOperator::AddMultTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, - const std::complex a, bool zero_real, - bool zero_imag) const +void ComplexWrapperOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); if (a.real() != 0.0 && a.imag() != 0.0) { tx.SetSize(width); - MultTranspose(xr, xi, tx.Real(), tx.Imag(), zero_real, zero_imag); + MultTranspose(x, tx); const int N = width; const double ar = a.real(); const double ai = a.imag(); @@ -364,15 +381,20 @@ void ComplexWrapperOperator::AddMultTranspose(const Vector &xr, const Vector &xi } } -void ComplexWrapperOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, - const std::complex a, - bool zero_real, bool zero_imag) const +void ComplexWrapperOperator::AddMultHermitianTranspose(const ComplexVector &x, + ComplexVector &y, + const std::complex a) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); if (a.real() != 0.0 && a.imag() != 0.0) { tx.SetSize(width); - MultHermitianTranspose(xr, xi, tx.Real(), tx.Imag(), zero_real, zero_imag); + MultHermitianTranspose(x, tx); const int N = width; const double ar = a.real(); const double ai = a.imag(); @@ -453,9 +475,13 @@ void SumOperator::AddOperator(const Operator &op, double c) void SumOperator::Mult(const Vector &x, Vector &y) const { - if (ops.size() == 1 && ops[0].second == 1.0) + if (ops.size() == 1) { - return ops[0].first->Mult(x, y); + ops.front().first->Mult(x, y); + if (ops.front().second != 1.0) + { + y *= ops.front().second; + } } y = 0.0; AddMult(x, y); @@ -463,9 +489,13 @@ void SumOperator::Mult(const Vector &x, Vector &y) const void SumOperator::MultTranspose(const Vector &x, Vector &y) const { - if (ops.size() == 1 && ops[0].second == 1.0) + if (ops.size() == 1) { - return ops[0].first->MultTranspose(x, y); + ops.front().first->MultTranspose(x, y); + if (ops.front().second != 1.0) + { + y *= ops.front().second; + } } y = 0.0; AddMultTranspose(x, y); @@ -487,115 +517,10 @@ void SumOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c } } -ComplexSumOperator::ComplexSumOperator(const ComplexOperator &op, std::complex c) - : ComplexOperator(op.Height(), op.Width()) -{ - AddOperator(op, c); -} - -void ComplexSumOperator::AddOperator(const ComplexOperator &op, std::complex c) -{ - MFEM_VERIFY(op.Height() == height && op.Width() == width, - "Invalid Operator dimensions for ComplexSumOperator!"); - ops.emplace_back(&op, c); -} - -bool ComplexSumOperator::IsReal() const -{ - for (const auto &[op, c] : ops) - { - if (!op->IsReal()) - { - return false; - } - } - return true; -} - -bool ComplexSumOperator::IsImag() const -{ - for (const auto &[op, c] : ops) - { - if (!op->IsImag()) - { - return false; - } - } - return true; -} - -void ComplexSumOperator::Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real, bool zero_imag) const -{ - if (ops.Size() == 1 && ops[0].second == 1.0) - { - return ops[0].first->Mult(xr, xi, yr, yi, zero_real, zero_imag); - } - yr = 0.0; - yi = 0.0; - AddMult(xr, xi, yr, yi, 1.0, zero_real, zero_imag); -} - -void ComplexSumOperator::MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real, bool zero_imag) const -{ - if (ops.Size() == 1 && ops[0].second == 1.0) - { - return ops[0].first->MultTranspose(xr, xi, yr, yi, zero_real, zero_imag); - } - yr = 0.0; - yi = 0.0; - AddMultTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); -} - -void ComplexSumOperator::MultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, bool zero_real, - bool zero_imag) const -{ - if (ops.Size() == 1 && ops[0].second == 1.0) - { - return ops[0].first->MultHermitianTranspose(xr, xi, yr, yi, zero_real, zero_imag); - } - yr = 0.0; - yi = 0.0; - AddMultHermitianTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); -} - -void ComplexSumOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a, bool zero_real, - bool zero_imag) const -{ - for (const auto &[op, c] : ops) - { - op->AddMult(xr, xi, yr, yi, a * c, zero_real, zero_imag); - } -} - -void ComplexSumOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, const std::complex a, - bool zero_real, bool zero_imag) const -{ - for (const auto &[op, c] : ops) - { - op->AddMultTranspose(xr, xi, yr, yi, a * c, zero_real, zero_imag); - } -} - -void ComplexSumOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, - const std::complex a, - bool zero_real, bool zero_imag) const -{ - for (const auto &[op, c] : ops) - { - op->AddMultTranspose(xr, xi, yr, yi, a * c, zero_real, zero_imag); - } -} - template <> -void DiagonalOperator::Mult(const Vector &x, Vector &y) const +void BaseDiagonalOperator::Mult(const Vector &x, Vector &y) const { - const int N = height; + const int N = this->height; const auto *D = d.Read(); const auto *X = x.Read(); auto *Y = y.Write(); @@ -603,9 +528,10 @@ void DiagonalOperator::Mult(const Vector &x, Vector &y) const } template <> -void DiagonalOperator::Mult(const ComplexVector &x, ComplexVector &y) const +void BaseDiagonalOperator::Mult(const ComplexVector &x, + ComplexVector &y) const { - const int N = height; + const int N = this->height; const auto *DR = d.Real().Read(); const auto *DI = d.Imag().Read(); const auto *XR = x.Real().Read(); @@ -621,10 +547,13 @@ void DiagonalOperator::Mult(const ComplexVector &x, ComplexVect } template <> -void DiagonalOperator::MultHermitianTranspose(const ComplexVector &x, - ComplexVector &y) const +void DiagonalOperatorHelper, + ComplexOperator>::MultHermitianTranspose(const ComplexVector &x, + ComplexVector &y) const { - const int N = height; + const ComplexVector &d = + static_cast *>(this)->d; + const int N = this->height; const auto *DR = d.Real().Read(); const auto *DI = d.Imag().Read(); const auto *XR = x.Real().Read(); @@ -644,7 +573,7 @@ namespace linalg double SpectralNorm(MPI_Comm comm, const Operator &A, bool sym, double tol, int max_it) { - ComplexWrapperOperator Ar(&A, nullptr); // Non-owning constructor + ComplexWrapperOperator Ar(const_cast(&A), nullptr); // Non-owning constructor return SpectralNorm(comm, Ar, sym, tol, max_it); } diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index ed8bcd346..c21c22924 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -26,8 +26,9 @@ class ComplexOperator int height, width; public: - ComplexOperator(int s) : height(s), width(s) {} + ComplexOperator(int s = 0) : height(s), width(s) {} ComplexOperator(int h, int w) : height(h), width(w) {} + virtual ~ComplexOperator() = default; // Get the height (size of output) of the operator. int Height() const { return height; } @@ -49,64 +50,26 @@ class ComplexOperator virtual const Operator *Imag() const; virtual Operator *Imag(); - virtual void Mult(const ComplexVector &x, ComplexVector &y) const - { - Mult(x.Real(), x.Imag(), y.Real(), y.Imag()); - } - - virtual void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const = 0; - - virtual void MultTranspose(const ComplexVector &x, ComplexVector &y) const - { - MultTranspose(x.Real(), x.Imag(), y.Real(), y.Imag()); - } + // Operator application. + virtual void Mult(const ComplexVector &x, ComplexVector &y) const = 0; - virtual void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const; - - virtual void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const - { - MultHermitianTranspose(x.Real(), x.Imag(), y.Real(), y.Imag()); - } + virtual void MultTranspose(const ComplexVector &x, ComplexVector &y) const; - virtual void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, bool zero_real = false, - bool zero_imag = false) const; + virtual void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const; virtual void AddMult(const ComplexVector &x, ComplexVector &y, - const std::complex a = 1.0) const - { - AddMult(x.Real(), x.Imag(), y.Real(), y.Imag(), a); - } - - virtual void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const; + const std::complex a = 1.0) const; virtual void AddMultTranspose(const ComplexVector &x, ComplexVector &y, - const std::complex a = 1.0) const - { - AddMultTranspose(x.Real(), x.Imag(), y.Real(), y.Imag(), a); - } - - virtual void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const; + const std::complex a = 1.0) const; virtual void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, - const std::complex a = 1.0) const - { - AddMultHermitianTranspose(x.Real(), x.Imag(), y.Real(), y.Imag(), a); - } - - virtual void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, const std::complex a = 1.0, - bool zero_real = false, - bool zero_imag = false) const; + const std::complex a = 1.0) const; }; -// A complex-valued operator represented using a block 2x2 equivalent-real formulation. +// A complex-valued operator represented using a block 2 x 2 equivalent-real formulation: +// [ yr ] = [ Ar -Ai ] [ xr ] +// [ yi ] [ Ai Ar ] [ xi ] . class ComplexWrapperOperator : public ComplexOperator { private: @@ -117,8 +80,8 @@ class ComplexWrapperOperator : public ComplexOperator // Temporary storage for operator application. mutable ComplexVector tx, ty; - ComplexWrapperOperator(std::unique_ptr &&data_Ar, - std::unique_ptr &&data_Ai, Operator *Ar, Operator *Ai); + ComplexWrapperOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, + Operator *pAr, Operator *pAi); public: // Construct a complex operator which inherits ownership of the input real and imaginary @@ -137,34 +100,20 @@ class ComplexWrapperOperator : public ComplexOperator const Operator *Imag() const override { return Ai; } Operator *Imag() override { return Ai; } - using ComplexOperator::AddMult; - using ComplexOperator::AddMultHermitianTranspose; - using ComplexOperator::AddMultTranspose; - using ComplexOperator::Mult; - using ComplexOperator::MultHermitianTranspose; - using ComplexOperator::MultTranspose; + void Mult(const ComplexVector &x, ComplexVector &y) const override; - void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override; + void MultTranspose(const ComplexVector &x, ComplexVector &y) const override; - void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override; + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const override; - void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, - bool zero_imag = false) const override; + void AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; - void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; + void AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; - void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; - - void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; + void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; }; // Wrap a sequence of operators of the same dimensions and optional coefficients. @@ -189,57 +138,41 @@ class SumOperator : public Operator void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override; }; -// Wrap a sequence of operators of the same dimensions and optional coefficients. -class ComplexSumOperator : public ComplexOperator +// Wraps two operators such that: (AB)ᵀ = BᵀAᵀ and, for complex symmetric operators, the +// Hermitian transpose operation is (AB)ᴴ = BᴴAᴴ. +template +class ProductOperatorHelper : public OperType { -private: - std::vector>> ops; +}; +template +class ProductOperatorHelper : public Operator +{ public: - ComplexSumOperator(int s) : ComplexOperator(s) {} - ComplexSumOperator(int h, int w) : ComplexOperator(h, w) {} - ComplexSumOperator(const ComplexOperator &op, std::complex c = 1.0); - - void AddOperator(const ComplexOperator &op, std::complex c = 1.0); - - bool IsReal() const override; - bool IsImag() const override; - - using ComplexOperator::AddMult; - using ComplexOperator::AddMultHermitianTranspose; - using ComplexOperator::AddMultTranspose; - using ComplexOperator::Mult; - using ComplexOperator::MultHermitianTranspose; - using ComplexOperator::MultTranspose; - - void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override; - - void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override; - - void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, - bool zero_imag = false) const override; - - void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; - - void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; + ProductOperatorHelper(int h, int w) : Operator(h, w) {} +}; - void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; +template +class ProductOperatorHelper : public ComplexOperator +{ +public: + ProductOperatorHelper(int h, int w) : ComplexOperator(h, w) {} + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const override + { + const ComplexOperator &A = static_cast(this)->A; + const ComplexOperator &B = static_cast(this)->B; + ComplexVector &z = static_cast(this)->z; + A.MultHermitianTranspose(x, z); + B.MultHermitianTranspose(z, y); + } }; -// Wraps two operators such that: (AB)ᵀ = BᵀAᵀ and, for complex symmetric operators, the -// Hermitian transpose operation is (AB)ᴴ = BᴴAᴴ. -template -class ProductOperator : public OperType +template +class BaseProductOperator + : public ProductOperatorHelper, OperType> { + friend class ProductOperatorHelper, OperType>; + private: typedef typename std::conditional::value, ComplexVector, Vector>::type VecType; @@ -248,8 +181,9 @@ class ProductOperator : public OperType mutable VecType z; public: - ProductOperator(const OperType &A, const OperType &B) - : OperType(A.Height(), B.Width()), A(A), B(B), z(B.Height()) + BaseProductOperator(const OperType &A, const OperType &B) + : ProductOperatorHelper, OperType>(A.Height(), B.Width()), + A(A), B(B), z(B.Height()) { } @@ -264,22 +198,38 @@ class ProductOperator : public OperType A.MultTranspose(x, z); B.MultTranspose(z, y); } +}; - template ::value>> - void MultHermitianTranspose(const VecType &x, VecType &y) const override - { - A.MultHermitianTranspose(x, z); - B.MultHermitianTranspose(z, y); - } +using ProductOperator = BaseProductOperator; +using ComplexProductOperator = BaseProductOperator; + +// Applies the simple, symmetric but not necessarily Hermitian, operator: diag(d). +template +class DiagonalOperatorHelper : public OperType +{ +}; + +template +class DiagonalOperatorHelper : public Operator +{ +public: + DiagonalOperatorHelper(int s) : Operator(s) {} }; -using ComplexProductOperator = ProductOperator; +template +class DiagonalOperatorHelper : public ComplexOperator +{ +public: + DiagonalOperatorHelper(int s) : ComplexOperator(s) {} + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const override; +}; -// Applies the simple (symmetric) operator: diag(d). -template -class DiagonalOperator : public OperType +template +class BaseDiagonalOperator + : public DiagonalOperatorHelper, OperType> { + friend class DiagonalOperatorHelper, OperType>; + private: typedef typename std::conditional::value, ComplexVector, Vector>::type VecType; @@ -287,25 +237,25 @@ class DiagonalOperator : public OperType const VecType &d; public: - DiagonalOperator(const VecType &d) : OperType(d.Size()), d(d) {} + BaseDiagonalOperator(const VecType &d) + : DiagonalOperatorHelper, OperType>(d.Size()), d(d) + { + } void Mult(const VecType &x, VecType &y) const override; void MultTranspose(const VecType &x, VecType &y) const override { Mult(x, y); } - - template ::value>> - void MultHermitianTranspose(const VecType &x, VecType &y) const override; }; -using ComplexDiagonalOperator = DiagonalOperator; +using DiagonalOperator = BaseDiagonalOperator; +using ComplexDiagonalOperator = BaseDiagonalOperator; // A container for a sequence of operators corresponding to a multigrid hierarchy. // Optionally includes operators for the auxiliary space at each level as well. The // Operators are stored from coarsest to finest level. The height and width of this operator // are never set. -template -class MultigridOperator : public OperType +template +class BaseMultigridOperator : public OperType { private: typedef typename std::conditional::value, @@ -314,7 +264,7 @@ class MultigridOperator : public OperType std::vector> ops, aux_ops; public: - MultigridOperator(int l) : OperType(0) + BaseMultigridOperator(int l) : OperType(0) { ops.reserve(l); aux_ops.reserve(l); @@ -323,8 +273,8 @@ class MultigridOperator : public OperType void AddOperator(std::unique_ptr &&op) { ops.push_back(std::move(op)); - height = ops.back()->Height(); - width = ops.back()->Width(); + this->height = ops.back()->Height(); + this->width = ops.back()->Width(); } void AddAuxiliaryOperator(std::unique_ptr &&aux_op) @@ -344,9 +294,14 @@ class MultigridOperator : public OperType const OperType &GetAuxiliaryOperatorAtLevel(int l) const { return *aux_ops[l]; } void Mult(const VecType &x, VecType &y) const override { GetFinestOperator().Mult(x, y); } + void MultTranspose(const VecType &x, VecType &y) const override + { + GetFinestOperator().MultTranspose(x, y); + } }; -using ComplexMultigridOperator = MultigridOperator; +using MultigridOperator = BaseMultigridOperator; +using ComplexMultigridOperator = BaseMultigridOperator; namespace linalg { diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp index 1412fda53..c64f5c797 100644 --- a/palace/linalg/rap.cpp +++ b/palace/linalg/rap.cpp @@ -8,12 +8,12 @@ namespace palace { -ParOperator::ParOperator(std::unique_ptr &&data_A, Operator *A, +ParOperator::ParOperator(std::unique_ptr &&dA, Operator *pA, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), - data_A(std::move(data_A)), A(this->data_A ? this->data_A.get() : A), + data_A(std::move(dA)), A((data_A != nullptr) ? data_A.get() : pA), trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict), dbc_tdof_list(nullptr), diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) { @@ -27,14 +27,14 @@ ParOperator::ParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) - : ParOperator(std::move(A), nullptr, trial_fespace, test_fespace, test_restrict), + : ParOperator(std::move(A), nullptr, trial_fespace, test_fespace, test_restrict) { } -ParOperator::ParOperator(Operator *A, const mfem::ParFiniteElementSpace &trial_fespace, +ParOperator::ParOperator(Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) - : ParOperator(nullptr, A, trial_fespace, test_fespace, test_restrict), + : ParOperator(nullptr, &A, trial_fespace, test_fespace, test_restrict) { } @@ -44,8 +44,14 @@ const Operator &ParOperator::LocalOperator() const return *A; } +Operator &ParOperator::LocalOperator() +{ + MFEM_ASSERT(A, "No local matrix available for ParOperator::LocalOperator!"); + return *A; +} + void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, - DiagonalPolicy policy); + DiagonalPolicy policy) { MFEM_VERIFY(policy == DiagonalPolicy::DIAG_ONE || policy == DiagonalPolicy::DIAG_ZERO, "Essential boundary condition true dof elimination for ParOperator supports " @@ -56,18 +62,13 @@ void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, diag_policy = policy; } -const mfem::Array *ParOperator::GetEssentialTrueDofs() const; -{ - return dbc_tdof_list; -} - void ParOperator::AssembleDiagonal(Vector &diag) const { // For an AMR mesh, a convergent diagonal is assembled with |P|ᵀ dₗ, where |P| has // entry-wise absolute values of the conforming prolongation operator. MFEM_VERIFY(&trial_fespace == &test_fespace, "Diagonal assembly is only available for square ParOperator!"); - if (auto *bfA = dynamic_cast(A)) + if (auto *bfA = dynamic_cast(A)) { if (bfA->HasSpMat()) { @@ -82,7 +83,7 @@ void ParOperator::AssembleDiagonal(Vector &diag) const MFEM_ABORT("Unable to assemble the local operator diagonal of BilinearForm!"); } } - else if (auto *sA = dynamic_cast(A)) + else if (auto *sA = dynamic_cast(A)) { sA->GetDiag(ly); } @@ -428,15 +429,29 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c } } -ComplexParOperator::ComplexParOperator(std::unique_ptr &&A, +ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, + std::unique_ptr &&dAi, Operator *pAr, + Operator *pAi, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) : ComplexOperator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), - data_A(std::move(A)), A(data_A.get()), trial_fespace(trial_fespace), - test_fespace(test_fespace), use_R(test_restrict), dbc_tdof_list(nullptr), - diag_policy(DiagonalPolicy::DIAG_ONE) + data_A((dAr != nullptr || dAi != nullptr) + ? std::make_unique(std::move(dAr), std::move(dAi)) + : std::make_unique(pAr, pAi)), + A(data_A.get()), trial_fespace(trial_fespace), test_fespace(test_fespace), + use_R(test_restrict), dbc_tdof_list(nullptr), + diag_policy(Operator::DiagonalPolicy::DIAG_ONE), + RAPr(A->HasReal() + ? std::make_unique(*A->Real(), trial_fespace, test_fespace, use_R) + : nullptr), + RAPi(A->HasImag() + ? std::make_unique(*A->Imag(), trial_fespace, test_fespace, use_R) + : nullptr) { + // We use the non-owning constructors for real and imaginary part ParOperators. We know A + // is a ComplexWrapperOperator which has separate access to the real and imaginary + // components. lx.SetSize(A->Width()); ly.SetSize(A->Height()); ty.SetSize(width); @@ -447,20 +462,17 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&Ar, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) - : ComplexParOperator( - std::make_unique(std::move(Ar), std::move(Ai)), - trial_fespace, test_fespace, test_restrict); + : ComplexParOperator(std::move(Ar), std::move(Ai), nullptr, nullptr, trial_fespace, + test_fespace, test_restrict) +{ +} + +ComplexParOperator::ComplexParOperator(Operator *Ar, Operator *Ai, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ComplexParOperator(nullptr, nullptr, Ar, Ai, trial_fespace, test_fespace, test_restrict) { - // Non-owning constructors for real and imaginary part ParOperators. We know A is a - // ComplexWrapperOperator which has separate access to the real and imaginary components. - if (A->HasReal()) - { - RAPr = std::make_unique(A->Real(), trial_fespace, test_fespace, use_R); - } - if (A->HasImag()) - { - RAPi = std::make_unique(A->Imag(), trial_fespace, test_fespace, use_R); - } } const ComplexOperator &ComplexParOperator::LocalOperator() const @@ -469,14 +481,21 @@ const ComplexOperator &ComplexParOperator::LocalOperator() const return *A; } +ComplexOperator &ComplexParOperator::LocalOperator() +{ + MFEM_ASSERT(A, "No local matrix available for ComplexParOperator::LocalOperator!"); + return *A; +} + void ComplexParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, - DiagonalPolicy policy) + Operator::DiagonalPolicy policy) { - MFEM_VERIFY(policy == DiagonalPolicy::DIAG_ONE || policy == DiagonalPolicy::DIAG_ZERO, + MFEM_VERIFY(policy == Operator::DiagonalPolicy::DIAG_ONE || + policy == Operator::DiagonalPolicy::DIAG_ZERO, "Essential boundary condition true dof elimination for ComplexParOperator " "supports only DiagonalPolicy::DIAG_ONE or DiagonalPolicy::DIAG_ZERO!"); MFEM_VERIFY( - policy != DiagonalPolicy::DIAG_ONE || RAPr, + policy != Operator::DiagonalPolicy::DIAG_ONE || RAPr, "DiagonalPolicy::DIAG_ONE specified for ComplexParOperator with no real part!"); MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " "for rectangular ComplexParOperator!"); @@ -488,19 +507,19 @@ void ComplexParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, } if (RAPi) { - RAPi->SetEssentialTrueDofs(tdof_list, DiagonalPolicy::DIAG_ZERO); + RAPi->SetEssentialTrueDofs(tdof_list, Operator::DiagonalPolicy::DIAG_ZERO); } } -const mfem::Array *ComplexParOperator::GetEssentialTrueDofs() const -{ - return dbc_tdof_list; -} - -void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a, bool zero_real, - bool zero_imag) const +void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); MFEM_ASSERT(xr.Size() == width && xi.Size() == width && yr.Size() == height && yi.Size() == height, "Incompatible dimensions for ComplexParOperator::AddMult!"); @@ -521,7 +540,7 @@ void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, // Apply the operator on the L-vector. ly = 0.0; - A->AddMult(lx.Real(), lx.Imag(), ly.Real(), ly.Imag(), a, zero_real, zero_imag); + A->AddMult(lx, ly, a); if (dbc_tdof_list) { @@ -535,7 +554,7 @@ void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, test_fespace.GetRestrictionMatrix()->Mult(ly.Real(), ty.Real()); test_fespace.GetRestrictionMatrix()->Mult(ly.Imag(), ty.Imag()); } - if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE && height == width) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -551,7 +570,7 @@ void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, TYI[id] = XI[id]; }); } - else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO || height != width) { ty.SetSubVector(*dbc_tdof_list, 0.0); } @@ -577,10 +596,15 @@ void ComplexParOperator::AddMult(const Vector &xr, const Vector &xi, Vector &yr, } } -void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, - Vector &yi, const std::complex a, - bool zero_real, bool zero_imag) const +void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); MFEM_ASSERT(xr.Size() == height && xi.Size() == height && yr.Size() == width && yi.Size() == width, "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); @@ -617,13 +641,13 @@ void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Ve // Apply the operator on the L-vector. lx = 0.0; - A->AddMultTranspose(ly.Real(), ly.Imag(), lx.Real(), lx.Imag(), a, zero_real, zero_imag); + A->AddMultTranspose(ly, lx, a); if (dbc_tdof_list) { trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); - if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE && height == width) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -639,7 +663,7 @@ void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Ve TYI[id] = XI[id]; }); } - else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO || height != width) { ty.SetSubVector(*dbc_tdof_list, 0.0); } @@ -657,11 +681,15 @@ void ComplexParOperator::AddMultTranspose(const Vector &xr, const Vector &xi, Ve } } -void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vector &xi, - Vector &yr, Vector &yi, - const std::complex a, - bool zero_real, bool zero_imag) const +void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); MFEM_ASSERT(xr.Size() == height && xi.Size() == height && yr.Size() == width && yi.Size() == width, "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); @@ -698,14 +726,13 @@ void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vecto // Apply the operator on the L-vector. lx = 0.0; - A->AddMultHermitianTranspose(ly.Real(), ly.Imag(), lx.Real(), lx.Imag(), a, zero_real, - zero_imag); + A->AddMultHermitianTranspose(ly, lx, a); if (dbc_tdof_list) { trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); - if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE && height == width) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -721,7 +748,7 @@ void ComplexParOperator::AddMultHermitianTranspose(const Vector &xr, const Vecto TYI[id] = XI[id]; }); } - else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO || height != width) { ty.SetSubVector(*dbc_tdof_list, 0.0); } diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp index 491364712..43ded445f 100644 --- a/palace/linalg/rap.hpp +++ b/palace/linalg/rap.hpp @@ -42,7 +42,7 @@ class ParOperator : public Operator // Temporary storage for operator application. mutable Vector lx, ly, ty; - ParOperator(std::unique_ptr &&data_A, Operator *A, + ParOperator(std::unique_ptr &&dA, Operator *pA, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); @@ -57,25 +57,26 @@ class ParOperator : public Operator } // Non-owning constructors. - ParOperator(Operator *A, const mfem::ParFiniteElementSpace &trial_fespace, + ParOperator(Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ParOperator(Operator *A, const mfem::ParFiniteElementSpace &fespace) + ParOperator(Operator &A, const mfem::ParFiniteElementSpace &fespace) : ParOperator(A, fespace, fespace, false) { } // Get access to the underlying local (L-vector) operator. const Operator &LocalOperator() const; + Operator &LocalOperator(); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return trial_fespace.GetComm(); } // Set essential boundary condition true dofs for square operators. void SetEssentialTrueDofs(const mfem::Array &tdof_list, DiagonalPolicy policy); // Get the essential boundary condition true dofs associated with the operator. May be // nullptr. - const mfem::Array *GetEssentialTrueDofs() const; - - // Get the associated MPI communicator. - MPI_Comm GetComm() const { return trial_fespace.GetComm(); } + const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } // Assemble the diagonal for the parallel operator. void AssembleDiagonal(Vector &diag) const override; @@ -117,8 +118,8 @@ class ComplexParOperator : public ComplexOperator { private: // Storage and access for the local operator. - std::unique_ptr data_A; - ComplexOperator *A; + std::unique_ptr data_A; + ComplexWrapperOperator *A; // Finite element spaces for parallel prolongation and restriction. const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; @@ -128,7 +129,7 @@ class ComplexParOperator : public ComplexOperator mutable const mfem::Array *dbc_tdof_list; // Diagonal policy for constrained true dofs. - DiagonalPolicy diag_policy; + Operator::DiagonalPolicy diag_policy; // Real and imaginary parts of the operator as non-owning ParOperator objects. std::unique_ptr RAPr, RAPi; @@ -136,17 +137,14 @@ class ComplexParOperator : public ComplexOperator // Temporary storage for operator application. mutable ComplexVector lx, ly, ty; -public: - // Construct the complex-valued parallel operator, inheriting ownership of the local - // operator. - ComplexParOperator(std::unique_ptr &&A, + ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, + Operator *pAr, Operator *pAi, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ComplexParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &fespace) - : ComplexParOperator(std::move(Ar), std::move(A), fespace, fespace, false) - { - } + +public: + // Construct the complex-valued parallel operator from the separate real and imaginary + // parts, inheriting ownership of the local operator. ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); @@ -156,18 +154,29 @@ class ComplexParOperator : public ComplexOperator { } + // Non-owning constructors. + ComplexParOperator(Operator *Ar, Operator *Ai, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ComplexParOperator(Operator *Ar, Operator *Ai, const mfem::ParFiniteElementSpace &fespace) + : ComplexParOperator(Ar, Ai, fespace, fespace, false) + { + } + // Get access to the underlying local (L-vector) operator. const ComplexOperator &LocalOperator() const; + ComplexOperator &LocalOperator(); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return trial_fespace.GetComm(); } // Set essential boundary condition true dofs for square operators. - void SetEssentialTrueDofs(const mfem::Array &tdof_list, DiagonalPolicy policy); + void SetEssentialTrueDofs(const mfem::Array &tdof_list, + Operator::DiagonalPolicy policy); // Get the essential boundary condition true dofs associated with the operator. May be // nullptr. - const mfem::Array *GetEssentialTrueDofs() const; - - // Get the associated MPI communicator. - MPI_Comm GetComm() const { return trial_fespace.GetComm(); } + const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } bool IsReal() const override { return A->IsReal(); } bool IsImag() const override { return A->IsImag(); } @@ -178,48 +187,32 @@ class ComplexParOperator : public ComplexOperator const Operator *Imag() const override { return RAPi.get(); } Operator *Imag() override { return RAPi.get(); } - using ComplexOperator::AddMult; - using ComplexOperator::AddMultHermitianTranspose; - using ComplexOperator::AddMultTranspose; - using ComplexOperator::Mult; - using ComplexOperator::MultHermitianTranspose; - using ComplexOperator::MultTranspose; - - void Mult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override + void Mult(const ComplexVector &x, ComplexVector &y) const override { - yr = 0.0; - yi = 0.0; - AddMult(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + y = 0.0; + AddMult(x, y); } - void MultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override + void MultTranspose(const ComplexVector &x, ComplexVector &y) const override { - yr = 0.0; - yi = 0.0; - AddMultTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + y = 0.0; + AddMultTranspose(x, y); } - void MultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - bool zero_real = false, bool zero_imag = false) const override + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const override { - yr = 0.0; - yi = 0.0; - AddMultHermitianTranspose(xr, xi, yr, yi, 1.0, zero_real, zero_imag); + y = 0.0; + AddMultHermitianTranspose(x, y); } - void AddMult(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; + void AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; - void AddMultTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; + void AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; - void AddMultHermitianTranspose(const Vector &xr, const Vector &xi, Vector &yr, Vector &yi, - const std::complex a = 1.0, bool zero_real = false, - bool zero_imag = false) const override; + void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; }; } // namespace palace diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index a9770efd7..9807825df 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -10,7 +10,6 @@ #include #include #include "linalg/divfree.hpp" -#include "linalg/ksp.hpp" #include "utils/communication.hpp" static PetscErrorCode __mat_apply_EPS_A0(Mat, Vec, Vec); diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp index e63de5054..d5ba211d2 100644 --- a/palace/linalg/slepc.hpp +++ b/palace/linalg/slepc.hpp @@ -16,6 +16,7 @@ #include #include #include "linalg/eps.hpp" +#include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" @@ -29,7 +30,6 @@ typedef struct _p_RG *RG; namespace palace { -class ComplexKspSolver; class DivFreeSolver; namespace slepc diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp index 22841295a..4146956cf 100644 --- a/palace/linalg/solver.hpp +++ b/palace/linalg/solver.hpp @@ -34,6 +34,7 @@ class Solver public: Solver(bool initial_guess = false) : initial_guess(initial_guess) {} + virtual ~Solver() = default; // Configure whether or not to use an initial guess when applying the solver. virtual void SetInitialGuess(bool guess) { initial_guess = guess; } @@ -56,7 +57,9 @@ class Solver template class WrapperSolver : public Solver { -private: +protected: + typedef typename Solver::VecType VecType; + std::unique_ptr pc; public: diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index 4ffeca81e..72f5f66bd 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -5,6 +5,8 @@ #if defined(MFEM_USE_STRUMPACK) +#include "linalg/rap.hpp" + namespace palace { diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index d4b803d09..486c14ce3 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -5,6 +5,7 @@ #if defined(MFEM_USE_SUPERLU) +#include "linalg/rap.hpp" #include "utils/communication.hpp" namespace palace diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index 1fd893796..eb56921b4 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -10,20 +10,20 @@ namespace palace { -ComplexVector::ComplexVector(int n) : x(2 * n); +ComplexVector::ComplexVector(int n) : x(2 * n) { xr.MakeRef(x, 0, n); xi.MakeRef(x, n, n); } -ComplexVector::ComplexVector(const ComplexVector &y) : x(2 * x.Size()) +ComplexVector::ComplexVector(const ComplexVector &y) : x(2 * y.Size()) { xr.MakeRef(x, 0, y.Size()); xi.MakeRef(x, y.Size(), y.Size()); Set(y.Real(), y.Imag()); } -ComplexVector::ComplexVector(const Vector &xr, const Vector &xi) : x(2 * xr.Size()) +ComplexVector::ComplexVector(const Vector &yr, const Vector &yi) : x(2 * yr.Size()) { MFEM_VERIFY(yr.Size() == yi.Size(), "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); @@ -161,10 +161,10 @@ void ComplexVector::Reciprocal(bool abs) void ComplexVector::SetSubVector(const mfem::Array &rows, std::complex s) { - const int N = dofs.Size(); + const int N = rows.Size(); const double sr = s.real(); const double si = s.imag(); - const auto *idx = dofs.Read(); + const auto *idx = rows.Read(); auto *XR = Real().ReadWrite(); auto *XI = Imag().ReadWrite(); mfem::forall(N, @@ -286,8 +286,8 @@ void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &x, { if (ai == 0.0 && bi == 0.0) { - mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { ZR[i] = ar * XR[i] + br * YR[i] }); - mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { ZI[i] = ar * XI[i] + br * YI[i] }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { ZR[i] = ar * XR[i] + br * YR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { ZI[i] = ar * XI[i] + br * YI[i]; }); } else { @@ -419,6 +419,12 @@ void AXPY(double alpha, const Vector &x, Vector &y) } } +template <> +void AXPY(double alpha, const ComplexVector &x, ComplexVector &y) +{ + y.AXPY(alpha, x); +} + template <> void AXPY(std::complex alpha, const ComplexVector &x, ComplexVector &y) { @@ -428,7 +434,7 @@ void AXPY(std::complex alpha, const ComplexVector &x, ComplexVector &y) template <> void AXPBY(double alpha, const Vector &x, double beta, Vector &y) { - Vector::add(alpha, x, beta, y, y); + add(alpha, x, beta, y, y); } template <> @@ -438,6 +444,41 @@ void AXPBY(std::complex alpha, const ComplexVector &x, std::complex +void AXPBY(double alpha, const ComplexVector &x, double beta, ComplexVector &y) +{ + y.AXPBY(alpha, x, beta); +} + +template <> +void AXPBYPCZ(double alpha, const Vector &x, double beta, const Vector &y, double gamma, + Vector &z) +{ + if (gamma == 0.0) + { + add(alpha, x, beta, y, z); + } + else + { + AXPBY(alpha, x, gamma, z); + z.Add(beta, y); + } +} + +template <> +void AXPBYPCZ(std::complex alpha, const ComplexVector &x, std::complex beta, + const ComplexVector &y, std::complex gamma, ComplexVector &z) +{ + z.AXPBYPCZ(alpha, x, beta, y, gamma); +} + +template <> +void AXPBYPCZ(double alpha, const ComplexVector &x, double beta, const ComplexVector &y, + double gamma, ComplexVector &z) +{ + z.AXPBYPCZ(alpha, x, beta, y, gamma); +} + } // namespace linalg } // namespace palace diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp index 3d5b5d4e9..a8d91df21 100644 --- a/palace/linalg/vector.hpp +++ b/palace/linalg/vector.hpp @@ -69,6 +69,11 @@ class ComplexVector // Set all entries equal to s. ComplexVector &operator=(std::complex s); + ComplexVector &operator=(double s) + { + *this = std::complex(s, 0.0); + return *this; + } // Scale all entries by s. ComplexVector &operator*=(std::complex s); @@ -78,7 +83,7 @@ class ComplexVector // Set the entries listed the given array to value. All entries in the list should be // non-negative. - void SetSubVector(const Array &rows, std::complex s); + void SetSubVector(const mfem::Array &rows, std::complex s); // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. std::complex Dot(const ComplexVector &y) const; @@ -123,10 +128,10 @@ template void SetRandomSign(MPI_Comm comm, VecType &x, int seed = 0); // Calculate the inner product yᴴ x or yᵀ x. -template -inline ScalarType Dot(MPI_Comm comm, const VecType &x, const VecType &y) +template +inline auto Dot(MPI_Comm comm, const VecType &x, const VecType &y) -> decltype(x * y) { - ScalarType dot = x * y; + auto dot = x * y; Mpi::GlobalSum(1, &dot, comm); return dot; } @@ -158,6 +163,11 @@ void AXPY(ScalarType alpha, const VecType &x, VecType &y); template void AXPBY(ScalarType alpha, const VecType &x, ScalarType beta, VecType &y); +// Addition z = alpha * x + beta * y + gamma * z. +template +void AXPBYPCZ(ScalarType alpha, const VecType &x, ScalarType beta, const VecType &y, + ScalarType gamma, VecType &z); + } // namespace linalg } // namespace palace diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index b0689fb93..508301ff4 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -6,6 +6,7 @@ #include "fem/coefficient.hpp" #include "fem/integrator.hpp" #include "fem/multigrid.hpp" +#include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -149,7 +150,7 @@ std::unique_ptr CurlCurlOperator::GetStiffnessMatrix() } auto K_l = std::make_unique(std::move(k), nd_fespace_l); K_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); - K.AddOperator(std::move(K_l)); + K->AddOperator(std::move(K_l)); } print_hdr = false; return K; diff --git a/palace/models/curlcurloperator.hpp b/palace/models/curlcurloperator.hpp index 9976c7d49..d9577131f 100644 --- a/palace/models/curlcurloperator.hpp +++ b/palace/models/curlcurloperator.hpp @@ -63,9 +63,12 @@ class CurlCurlOperator // Return the parallel finite element space objects. auto &GetNDSpaces() { return nd_fespaces; } auto &GetNDSpace() { return nd_fespaces.GetFinestFESpace(); } + const auto &GetNDSpace() const { return nd_fespaces.GetFinestFESpace(); } auto &GetH1Spaces() { return h1_fespaces; } auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); } + const auto &GetH1Space() const { return h1_fespaces.GetFinestFESpace(); } auto &GetRTSpace() { return rt_fespace; } + const auto &GetRTSpace() const { return rt_fespace; } // Construct and return system matrix representing discretized curl-curl operator for // Ampere's law. diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index 64ba4898c..2fcb5934c 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -5,6 +5,7 @@ #include "fem/coefficient.hpp" #include "fem/multigrid.hpp" +#include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -171,7 +172,7 @@ std::unique_ptr LaplaceOperator::GetStiffnessMatrix() } auto K_l = std::make_unique(std::move(k), h1_fespace_l); K_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); - K.AddOperator(std::move(K_l)); + K->AddOperator(std::move(K_l)); } print_hdr = false; return K; @@ -209,8 +210,8 @@ void LaplaceOperator::GetExcitationVector(int idx, const Operator &K, Vector &X, RHS = 0.0; x.ParallelProject(X); // Restrict to the true dofs const auto *mg_K = dynamic_cast(&K); - MFEM_VERIFY(mg_K, "LaplaceOperator requires MultigridOperator for RHS elimination!"); - const auto *PtAP_K = dynamic_cast(&mg_K->GetFinestOperator()); + const auto *PtAP_K = mg_K ? dynamic_cast(&mg_K->GetFinestOperator()) + : dynamic_cast(&K); MFEM_VERIFY(PtAP_K, "LaplaceOperator requires ParOperator for RHS elimination!"); PtAP_K->EliminateRHS(X, RHS); } diff --git a/palace/models/laplaceoperator.hpp b/palace/models/laplaceoperator.hpp index 76dd5cd21..c97e57014 100644 --- a/palace/models/laplaceoperator.hpp +++ b/palace/models/laplaceoperator.hpp @@ -60,7 +60,9 @@ class LaplaceOperator // Return the parallel finite element space objects. auto &GetH1Spaces() { return h1_fespaces; } auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); } + const auto &GetH1Space() const { return h1_fespaces.GetFinestFESpace(); } auto &GetNDSpace() { return nd_fespace; } + const auto &GetNDSpace() const { return nd_fespace; } // Construct and return system matrix representing discretized Laplace operator for // Gauss's law. diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp index 8c9f87797..94de56e80 100644 --- a/palace/models/postoperator.cpp +++ b/palace/models/postoperator.cpp @@ -73,16 +73,20 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &spaceop, Bsi = std::make_unique(B->imag(), mat_op, local_to_shared); Jsi = std::make_unique(B->imag(), mat_op, local_to_shared); Qsi = std::make_unique(E->imag(), mat_op, local_to_shared); - Ue = std::make_unique>( + Ue = std::make_unique>( *E, mat_op, local_to_shared); - Um = std::make_unique>( + Um = std::make_unique>( *B, mat_op, local_to_shared); } else { - Ue = std::make_unique>( + Ue = std::make_unique< + EnergyDensityCoefficient>( E->real(), mat_op, local_to_shared); - Um = std::make_unique>( + Um = std::make_unique< + EnergyDensityCoefficient>( B->real(), mat_op, local_to_shared); } @@ -118,7 +122,8 @@ PostOperator::PostOperator(const IoData &iodata, LaplaceOperator &laplaceop, // etc.), since only V and E fields are supplied. Esr = std::make_unique(E->real(), mat_op, local_to_shared); Vs = std::make_unique(*V, mat_op, local_to_shared); - Ue = std::make_unique>( + Ue = std::make_unique< + EnergyDensityCoefficient>( E->real(), mat_op, local_to_shared); Qsr = std::make_unique(E->real(), mat_op, local_to_shared); @@ -145,7 +150,8 @@ PostOperator::PostOperator(const IoData &iodata, CurlCurlOperator &curlcurlop, // etc.), since only the B field is supplied. Bsr = std::make_unique(B->real(), mat_op, local_to_shared); As = std::make_unique(*A, mat_op, local_to_shared); - Um = std::make_unique>( + Um = std::make_unique< + EnergyDensityCoefficient>( B->real(), mat_op, local_to_shared); Jsr = std::make_unique(B->real(), mat_op, local_to_shared); diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp index bbd807a2b..9b03f2a74 100644 --- a/palace/models/postoperator.hpp +++ b/palace/models/postoperator.hpp @@ -11,7 +11,7 @@ #include #include #include -#include "fem/interpolation.hpp" +#include "fem/interpolator.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" #include "models/domainpostoperator.hpp" diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 4b0cb586c..45ec178dd 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -6,6 +6,7 @@ #include #include "fem/integrator.hpp" #include "fem/multigrid.hpp" +#include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -227,11 +228,11 @@ auto BuildAuxOperator(mfem::ParFiniteElementSpace &fespace, T1 *f, T2 *fb, auto a = std::make_unique(&fespace); if (f && !f->empty()) { - a.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(*f)); + a->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(*f)); } if (fb && !fb->empty()) { - a.AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(*fb)); + a->AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(*fb)); } if (!no_assembly) { @@ -251,14 +252,15 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy) const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); AddStiffnessCoefficients(1.0, df, f); - AddBdrStiffnessCoefficients(1.0, fb); + AddStiffnessBdrCoefficients(1.0, fb); if (df.empty() && f.empty() && fb.empty()) { return {}; } - auto K = std::make_unique( - BuildOperator(GetNDSpace(), &df, &f, nullptr, &fb, assembly_level, skip_zeros), - GetNDSpace()); + auto K = std::make_unique(BuildOperator(GetNDSpace(), &df, &f, + (SumCoefficient *)nullptr, &fb, + assembly_level, skip_zeros), + GetNDSpace()); K->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return K; } @@ -270,13 +272,14 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy) const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); SumMatrixCoefficient f(sdim), fb(sdim); AddDampingCoefficients(1.0, f); - AddBdrDampingCoefficients(1.0, fb); + AddDampingBdrCoefficients(1.0, fb); if (f.empty() && fb.empty()) { return {}; } auto C = std::make_unique( - BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, assembly_level, skip_zeros), + BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, + &fb, assembly_level, skip_zeros), GetNDSpace()); C->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return C; @@ -294,7 +297,8 @@ std::unique_ptr SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy return {}; } auto M = std::make_unique( - BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, assembly_level, skip_zeros), + BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, + &fb, assembly_level, skip_zeros), GetNDSpace()); M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return M; @@ -307,13 +311,14 @@ SpaceOperator::GetComplexStiffnessMatrix(Operator::DiagonalPolicy diag_policy) const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); AddStiffnessCoefficients(1.0, df, f); - AddBdrStiffnessCoefficients(1.0, fb); + AddStiffnessBdrCoefficients(1.0, fb); if (df.empty() && f.empty() && fb.empty()) { return {}; } auto K = std::make_unique( - BuildOperator(GetNDSpace(), &df, &f, nullptr, &fb, assembly_level, skip_zeros), + BuildOperator(GetNDSpace(), &df, &f, (SumCoefficient *)nullptr, &fb, assembly_level, + skip_zeros), nullptr, GetNDSpace()); K->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return K; @@ -326,13 +331,14 @@ SpaceOperator::GetComplexDampingMatrix(Operator::DiagonalPolicy diag_policy) const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); SumMatrixCoefficient f(sdim), fb(sdim); AddDampingCoefficients(1.0, f); - AddBdrDampingCoefficients(1.0, fb); + AddDampingBdrCoefficients(1.0, fb); if (f.empty() && fb.empty()) { return {}; } auto C = std::make_unique( - BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, assembly_level, skip_zeros), + BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, + &fb, assembly_level, skip_zeros), nullptr, GetNDSpace()); C->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return C; @@ -350,19 +356,20 @@ SpaceOperator::GetComplexMassMatrix(Operator::DiagonalPolicy diag_policy) std::unique_ptr mr, mi; if (!fr.empty() || !fbr.empty()) { - mr = BuildOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, assembly_level, - skip_zeros); + mr = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &fr, + (SumCoefficient *)nullptr, &fbr, assembly_level, skip_zeros); } if (!fi.empty()) { - mi = BuildOperator(GetNDSpace(), nullptr, &fi, nullptr, nullptr, assembly_level, + mi = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &fi, + (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, assembly_level, skip_zeros); } if (!mr && !mi) { return {}; } - auto M = std::make_unique(std::move(mr), std::move(mi)), GetNDSpace(); + auto M = std::make_unique(std::move(mr), std::move(mi), GetNDSpace()); M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return M; } @@ -379,23 +386,152 @@ SpaceOperator::GetComplexExtraSystemMatrix(double omega, std::unique_ptr ar, ai; if (!dfbr.empty() || !fbr.empty()) { - ar = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, assembly_level, - skip_zeros); + ar = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, + &dfbr, &fbr, assembly_level, skip_zeros); } - if (!fi.empty()) + if (!dfbi.empty() || !fbi.empty()) { - ai = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, assembly_level, - skip_zeros); + ai = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, + &dfbi, &fbi, assembly_level, skip_zeros); } if (!ar && !ai) { return {}; } - auto A = std::make_unique(std::move(ar), std::move(ai)), GetNDSpace(); + auto A = std::make_unique(std::move(ar), std::move(ai), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return A; } +namespace +{ + +auto BuildParSumOperator(int h, int w, double a0, double a1, double a2, + const ParOperator *K, const ParOperator *C, const ParOperator *M, + const ParOperator *A2, const mfem::ParFiniteElementSpace &fespace) +{ + auto sum = std::make_unique(h, w); + if (K && a0 != 0.0) + { + sum->AddOperator(K->LocalOperator(), a0); + } + if (C && a1 != 0.0) + { + sum->AddOperator(C->LocalOperator(), a1); + } + if (M && a2 != 0.0) + { + sum->AddOperator(M->LocalOperator(), a2); + } + if (A2) + { + sum->AddOperator(A2->LocalOperator(), 1.0); + } + return std::make_unique(std::move(sum), fespace); +} + +auto BuildParSumOperator(int h, int w, std::complex a0, std::complex a1, + std::complex a2, const ComplexParOperator *K, + const ComplexParOperator *C, const ComplexParOperator *M, + const ComplexParOperator *A2, + const mfem::ParFiniteElementSpace &fespace) +{ + // Block 2 x 2 equivalent-real formulation for each term in the sum: + // [ sumr ] += [ ar -ai ] [ Ar ] + // [ sumi ] [ ai ar ] [ Ai ] . + auto sumr = std::make_unique(h, w); + auto sumi = std::make_unique(h, w); + if (K) + { + if (a0.real() != 0.0) + { + if (K->LocalOperator().HasReal()) + { + sumr->AddOperator(*K->LocalOperator().Real(), a0.real()); + } + if (K->LocalOperator().HasImag()) + { + sumi->AddOperator(*K->LocalOperator().Imag(), a0.real()); + } + } + if (a0.imag() != 0.0) + { + if (K->LocalOperator().HasImag()) + { + sumr->AddOperator(*K->LocalOperator().Imag(), -a0.imag()); + } + if (K->LocalOperator().HasReal()) + { + sumi->AddOperator(*K->LocalOperator().Real(), a0.imag()); + } + } + } + if (C && a1 != 0.0) + { + if (a1.real() != 0.0) + { + if (C->LocalOperator().HasReal()) + { + sumr->AddOperator(*C->LocalOperator().Real(), a1.real()); + } + if (C->LocalOperator().HasImag()) + { + sumi->AddOperator(*C->LocalOperator().Imag(), a1.real()); + } + } + if (a1.imag() != 0.0) + { + if (C->LocalOperator().HasImag()) + { + sumr->AddOperator(*C->LocalOperator().Imag(), -a1.imag()); + } + if (C->LocalOperator().HasReal()) + { + sumi->AddOperator(*C->LocalOperator().Real(), a1.imag()); + } + } + } + if (M && a2 != 0.0) + { + if (a2.real() != 0.0) + { + if (M->LocalOperator().HasReal()) + { + sumr->AddOperator(*M->LocalOperator().Real(), a2.real()); + } + if (M->LocalOperator().HasImag()) + { + sumi->AddOperator(*M->LocalOperator().Imag(), a2.real()); + } + } + if (a2.imag() != 0.0) + { + if (M->LocalOperator().HasImag()) + { + sumr->AddOperator(*M->LocalOperator().Imag(), -a2.imag()); + } + if (M->LocalOperator().HasReal()) + { + sumi->AddOperator(*M->LocalOperator().Real(), a2.imag()); + } + } + } + if (A2) + { + if (A2->LocalOperator().HasReal()) + { + sumr->AddOperator(*A2->LocalOperator().Real(), 1.0); + } + if (A2->LocalOperator().HasImag()) + { + sumi->AddOperator(*A2->LocalOperator().Imag(), 1.0); + } + } + return std::make_unique(std::move(sumr), std::move(sumi), fespace); +} + +} // namespace + template std::unique_ptr SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, @@ -404,8 +540,6 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, { typedef typename std::conditional::value, ComplexParOperator, ParOperator>::type ParOperType; - typedef typename std::conditional::value, - ComplexSumOperator, SumOperator>::type SumOperType; const auto *PtAP_K = (K) ? dynamic_cast(K) : nullptr; const auto *PtAP_C = (C) ? dynamic_cast(C) : nullptr; @@ -439,24 +573,8 @@ SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, MFEM_VERIFY(height >= 0 && width >= 0, "At least one argument to GetSystemMatrix must not be empty!"); - auto sum = std::make_unique(height, width); - if (PtAP_K && a0 != 0.0) - { - sum->AddOperator(PtAP_K->LocalOperator(), a0); - } - if (PtAP_C && a1 != 0.0) - { - sum->AddOperator(PtAP_C->LocalOperator(), a1); - } - if (PtAP_M && a2 != 0.0) - { - sum->AddOperator(PtAP_M->LocalOperator(), a2); - } - if (PtAP_A2) - { - sum->AddOperator(PtAP_A2->LocalOperator(), 1.0); - } - auto A = std::make_unique(std::move(sum), GetNDSpace()); + auto A = BuildParSumOperator(height, width, a0, a1, a2, PtAP_K, PtAP_C, PtAP_M, PtAP_A2, + GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); return A; } @@ -488,11 +606,16 @@ std::unique_ptr SpaceOperator::GetInnerProductMatrix(double a0, double auto sum = std::make_unique(height, width); if (PtAP_K && a0 != 0.0) { - sum->AddOperator(PtAP_K->LocalOperator().Real(), a0); + MFEM_VERIFY( + PtAP_K->LocalOperator().HasReal(), + "Missing real part of stiffness matrix for inner product matrix construction!"); + sum->AddOperator(*PtAP_K->LocalOperator().Real(), a0); } if (PtAP_M && a2 != 0.0) { - sum->AddOperator(PtAP_M->LocalOperator().Real(), a2); + MFEM_VERIFY(PtAP_M->LocalOperator().HasReal(), + "Missing real part of mass matrix for inner product matrix construction!"); + sum->AddOperator(*PtAP_M->LocalOperator().Real(), a2); } return std::make_unique(std::move(sum), GetNDSpace()); } @@ -500,15 +623,16 @@ std::unique_ptr SpaceOperator::GetInnerProductMatrix(double a0, double namespace { -auto GetLevelOperator(std::unique_ptr &B, std::unique_ptr &&br, - std::unique_ptr &&bi, mfem::FiniteElementSpace &fespace) +auto BuildLevelOperator(const MultigridOperator &B, std::unique_ptr &&br, + std::unique_ptr &&bi, + const mfem::ParFiniteElementSpace &fespace) { return std::make_unique(std::move(br), fespace); } -auto GetLevelOperator(std::unique_ptr &B, - std::unique_ptr &&br, std::unique_ptr &&bi, - mfem::FiniteElementSpace &fespace) +auto BuildLevelOperator(const ComplexMultigridOperator &B, std::unique_ptr &&br, + std::unique_ptr &&bi, + const mfem::ParFiniteElementSpace &fespace) { return std::make_unique(std::move(br), std::move(bi), fespace); } @@ -525,7 +649,7 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub } MFEM_VERIFY(h1_fespaces.GetNumLevels() == nd_fespaces.GetNumLevels(), "Multigrid hierarchy mismatch for auxiliary space preconditioning!"); - auto B = std::make_unique>(nd_fespaces.GetNumLevels()); + auto B = std::make_unique>(nd_fespaces.GetNumLevels()); for (int s = 0; s < 2; s++) { auto &fespaces = (s == 0) ? nd_fespaces : h1_fespaces; @@ -588,28 +712,28 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub } b_loc = std::move(b); } - auto B_l = GetLevelOperator(B, std::move(b_loc), nullptr, fespace_l); + auto B_l = BuildLevelOperator(*B, std::move(b_loc), nullptr, fespace_l); B_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); if (s == 0) { - B.AddOperator(std::move(B_l)); + B->AddOperator(std::move(B_l)); } else { - B.AddAuxiliaryOperator(std::move(B_l)); + B->AddAuxiliaryOperator(std::move(B_l)); } } } print_prec_hdr = false; + return B; } namespace { -std::unique_ptr GetCurl(mfem::FiniteElementSpace &nd_fespace, - mfem::FiniteElementSpace &rt_fespace, - mfem::AssemblyLevel assembly_level, - int skip_zeros = 1) +auto BuildCurl(mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &rt_fespace, mfem::AssemblyLevel assembly_level, + int skip_zeros = 1) { auto curl = std::make_unique(&nd_fespace, &rt_fespace); curl->AddDomainInterpolator(new mfem::CurlInterpolator); @@ -619,10 +743,9 @@ std::unique_ptr GetCurl(mfem::FiniteElementSpace & return curl; } -std::unique_ptr GetGrad(mfem::FiniteElementSpace &h1_fespace, - mfem::FiniteElementSpace &nd_fespace, - mfem::AssemblyLevel assembly_level, - int skip_zeros = 1) +auto BuildGrad(mfem::ParFiniteElementSpace &h1_fespace, + mfem::ParFiniteElementSpace &nd_fespace, mfem::AssemblyLevel assembly_level, + int skip_zeros = 1) { auto grad = std::make_unique(&h1_fespace, &nd_fespace); grad->AddDomainInterpolator(new mfem::GradientInterpolator); @@ -634,35 +757,36 @@ std::unique_ptr GetGrad(mfem::FiniteElementSpace & } // namespace -std::unique_ptr SpaceOperator::GetCurlMatrix() +std::unique_ptr SpaceOperator::GetCurlMatrix() { - return std::make_unique(GetCurl(GetNDSpace(), GetRTSpace(), assembly_level), - GetNDSpace(), GetRTSpace(), true); + return std::make_unique( + BuildCurl(GetNDSpace(), GetRTSpace(), assembly_level), GetNDSpace(), GetRTSpace(), + true); } -std::unique_ptr SpaceOperator::GetComplexCurlMatrix() +std::unique_ptr SpaceOperator::GetComplexCurlMatrix() { return std::make_unique( - GetCurl(GetNDSpace(), GetRTSpace(), assembly_level), nullptr, GetNDSpace(), + BuildCurl(GetNDSpace(), GetRTSpace(), assembly_level), nullptr, GetNDSpace(), GetRTSpace(), true); } -std::unique_ptr SpaceOperator::GetGradMatrix() +std::unique_ptr SpaceOperator::GetGradMatrix() { - return std::make_unique(GetGrad(GetH1Space(), GetNDSpace(), assembly_level), - GetH1Space(), GetNDSpace(), true); + return std::make_unique( + BuildGrad(GetH1Space(), GetNDSpace(), assembly_level), GetH1Space(), GetNDSpace(), + true); } -std::unique_ptr SpaceOperator::GetComplexGradMatrix() +std::unique_ptr SpaceOperator::GetComplexGradMatrix() { return std::make_unique( - GetGrad(GetH1Space(), GetNDSpace(), assembly_level), nullptr, GetH1Space(), + BuildGrad(GetH1Space(), GetNDSpace(), assembly_level), nullptr, GetH1Space(), GetNDSpace(), true); } void SpaceOperator::AddStiffnessCoefficients(double coef, SumMatrixCoefficient &df, - SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) + SumMatrixCoefficient &f) { constexpr MaterialPropertyType MatType = MaterialPropertyType::INV_PERMEABILITY; df.AddCoefficient(std::make_unique>(mat_op, coef)); @@ -760,13 +884,12 @@ bool SpaceOperator::GetExcitationVector(double omega, ComplexVector &RHS) { // Frequency domain excitation vector: RHS = iω RHS1 + RHS2(ω). RHS.SetSize(2 * GetNDSpace().GetTrueVSize()); - RHS = std::complex(0.0, 0.0); + RHS = 0.0; bool nnz1 = AddExcitationVector1Internal(RHS.Real()); RHS *= 1i * omega; bool nnz2 = AddExcitationVector2Internal(omega, RHS); RHS.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); RHS.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); - RHS.SyncAlias(); return nnz1 || nnz2; } @@ -775,21 +898,19 @@ bool SpaceOperator::GetExcitationVector1(ComplexVector &RHS1) // Assemble the frequency domain excitation term with linear frequency dependence // (coefficient iω, see GetExcitationVector above, is accounted for later). RHS1.SetSize(2 * GetNDSpace().GetTrueVSize()); - RHS1 = std::complex(0.0, 0.0); + RHS1 = 0.0; bool nnz1 = AddExcitationVector1Internal(RHS1.Real()); RHS1.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); - RHS1.SyncAlias(); return nnz1; } bool SpaceOperator::GetExcitationVector2(double omega, ComplexVector &RHS2) { RHS2.SetSize(2 * GetNDSpace().GetTrueVSize()); - RHS2 = std::complex(0.0, 0.0); + RHS2 = 0.0; bool nnz2 = AddExcitationVector2Internal(omega, RHS2); RHS2.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); RHS2.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); - RHS2.SyncAlias(); return nnz2; } @@ -843,32 +964,30 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH void SpaceOperator::GetConstantInitialVector(ComplexVector &v) { v.SetSize(2 * GetNDSpace().GetTrueVSize()); - v = std::complex(1.0, 0.0); + v = 1.0; v.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); - v.SyncAlias(); } void SpaceOperator::GetRandomInitialVector(ComplexVector &v) { v.SetSize(2 * GetNDSpace().GetTrueVSize()); linalg::SetRandom(GetNDSpace().GetComm(), v); - v.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); - v.Imag().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); - v.SyncAlias(); + v.SetSubVector(nd_dbc_tdof_lists.back(), 0.0); } template std::unique_ptr -GetSystemMatrix(double a0, double a1, double a2, const Operator *K, - const Operator *C, const Operator *M, const Operator *A2); +SpaceOperator::GetSystemMatrix(double, double, double, const Operator *, + const Operator *, const Operator *, + const Operator *); template std::unique_ptr -GetSystemMatrix>( - std::complex a0, std::complex a1, std::complex a2, - const ComplexOperator *K, const ComplexOperator *C, const ComplexOperator *M, - const ComplexOperator *A2); +SpaceOperator::GetSystemMatrix>( + std::complex, std::complex, std::complex, + const ComplexOperator *, const ComplexOperator *, const ComplexOperator *, + const ComplexOperator *); -template std::unique_ptr GetPreconditionerMatrix(double a0, double a1, - double a2, double a3); +template std::unique_ptr +SpaceOperator::GetPreconditionerMatrix(double, double, double, double); template std::unique_ptr -GetPreconditionerMatrix(double a0, double a1, double a2, double a3); +SpaceOperator::GetPreconditionerMatrix(double, double, double, double); } // namespace palace diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp index 24c042639..ef5c8123e 100644 --- a/palace/models/spaceoperator.hpp +++ b/palace/models/spaceoperator.hpp @@ -119,9 +119,12 @@ class SpaceOperator // Return the parallel finite element space objects. auto &GetNDSpaces() { return nd_fespaces; } auto &GetNDSpace() { return nd_fespaces.GetFinestFESpace(); } + const auto &GetNDSpace() const { return nd_fespaces.GetFinestFESpace(); } auto &GetH1Spaces() { return h1_fespaces; } auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); } + const auto &GetH1Space() const { return h1_fespaces.GetFinestFESpace(); } auto &GetRTSpace() { return rt_fespace; } + const auto &GetRTSpace() const { return rt_fespace; } // Construct any part of the frequency-dependent complex linear system matrix: // A = K + iω C - ω² (Mr + i Mi) + A2(ω) . diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index 6651a7cc7..10cdcde13 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -6,7 +6,6 @@ #include #include "linalg/iterative.hpp" #include "linalg/jacobi.hpp" -#include "linalg/ksp.hpp" #include "linalg/solver.hpp" #include "models/spaceoperator.hpp" #include "utils/communication.hpp" @@ -22,7 +21,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera { public: // MPI communicator. - MPI_comm comm; + MPI_Comm comm; // System matrices and excitation RHS. std::unique_ptr K, M, C; @@ -82,7 +81,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Configure the system matrix and also the matrix (matrices) from which the // preconditioner will be constructed. A = spaceop.GetSystemMatrix(a0, a1, 1.0, K.get(), C.get(), M.get()); - B = spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0); + B = spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0); // Configure the solver. if (!kspA) @@ -90,7 +89,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera kspA = std::make_unique(iodata, spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); } - ksp->SetOperators(*A, *B); + kspA->SetOperators(*A, *B); }; } } @@ -103,7 +102,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera { C->AddMult(du, rhs, 1.0); } - Vector::add(-1.0, rhs, dJcoef(t), NegJ, rhs); + linalg::AXPBYPCZ(-1.0, rhs, dJcoef(t), NegJ, 0.0, rhs); } void Mult(const Vector &u, const Vector &du, Vector &ddu) const override @@ -199,7 +198,7 @@ const KspSolver &TimeOperator::GetLinearSolver() const double TimeOperator::GetMaxTimeStep() const { const auto &curlcurl = dynamic_cast(*op); - MPI_comm comm = curlcurl.comm; + MPI_Comm comm = curlcurl.comm; const Operator &M = *curlcurl.M; const Operator &K = *curlcurl.K; diff --git a/palace/models/timeoperator.hpp b/palace/models/timeoperator.hpp index ee6ad5c21..cdc4fc0c9 100644 --- a/palace/models/timeoperator.hpp +++ b/palace/models/timeoperator.hpp @@ -7,6 +7,7 @@ #include #include #include +#include "linalg/ksp.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" @@ -14,7 +15,6 @@ namespace palace { class IoData; -class KspSolver; class SpaceOperator; // diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index c8398dc59..ae1d1fec8 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -10,6 +10,7 @@ #include "linalg/arpack.hpp" #include "linalg/iterative.hpp" #include "linalg/mumps.hpp" +#include "linalg/rap.hpp" #include "linalg/slepc.hpp" #include "linalg/solver.hpp" #include "linalg/strumpack.hpp" @@ -310,17 +311,15 @@ void GetInitialSpace(mfem::ParFiniteElementSpace &nd_fespace, // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner // product (since we use a general non-Hermitian solver due to complex symmetric B), then // we just use v0 = y0 directly. - v.SetSize(2 * (nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize())); + v.SetSize(nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize()); linalg::SetRandom(nd_fespace.GetComm(), v); // v = std::complex(1.0, 0.0); - v.Real().SetSubVector(nd_dbc_tdof_list, 0.0); - v.Imag().SetSubVector(nd_dbc_tdof_list, 0.0); + v.SetSubVector(nd_dbc_tdof_list, 0.0); for (int i = nd_fespace.GetTrueVSize(); i < nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize(); i++) { v.Real()[i] = v.Imag()[i] = 0.0; } - v.SyncAlias(); } } // namespace @@ -474,11 +473,11 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera std::make_unique(*A1), std::make_unique(*A2i)); - auto &Br = *static_cast(&B->Real()); + auto &Br = *static_cast(B->Real()); Br.Add(-1.0 / mu_eps_max, *B4r); - auto &Ai = *static_cast(&A->Imag()); - auto &Bi = *static_cast(&B->Imag()); + auto &Ai = *static_cast(A->Imag()); + auto &Bi = *static_cast(B->Imag()); Ai *= 0.0; Bi *= 0.0; Bi.Add(-1.0 / mu_eps_max, *B4i); @@ -490,7 +489,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera B = std::make_unique( std::make_unique(*A1), nullptr); - auto &Br = *static_cast(&B->Real()); + auto &Br = *static_cast(B->Real()); Br.Add(-1.0 / mu_eps_max, *B4r); } } @@ -499,8 +498,8 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera // [0 0] ). GetInitialSpace(nd_fespace, h1_fespace, nd_dbc_tdof_list, h1_dbc_tdof_list, v0); e0.SetSize(v0.Size()); - e0t.SetSize(2 * nd_fespace.GetTrueVSize()); - e0n.SetSize(2 * h1_fespace.GetTrueVSize()); + e0t.SetSize(nd_fespace.GetTrueVSize()); + e0n.SetSize(h1_fespace.GetTrueVSize()); // Configure the eigenvalue problem solver. As for the full 3D case, the system matrices // are in general complex and symmetric. We supply the operators to the solver in @@ -669,10 +668,10 @@ void WavePortData::Initialize(double omega) eigen->GetEigenvector(mode_idx - 1, e0); { Vector e0tr, e0ti, e0nr, e0ni; - e0tr.MakeRef(e0, 0, e0t.Size() / 2); - e0nr.MakeRef(e0, e0t.Size() / 2, e0n.Size() / 2); - e0ti.MakeRef(e0, e0.Size() / 2, e0t.Size() / 2); - e0ni.MakeRef(e0, (e0.Size() + e0t.Size()) / 2, e0n.Size() / 2); + e0tr.MakeRef(e0.Real(), 0, e0t.Size()); + e0nr.MakeRef(e0.Real(), e0t.Size(), e0n.Size()); + e0ti.MakeRef(e0.Imag(), 0, e0t.Size()); + e0ni.MakeRef(e0.Imag(), e0t.Size(), e0n.Size()); e0t.Real() = e0tr; e0t.Imag() = e0ti; e0n.Real() = e0nr; From 49609b03862bf36e764d4eade45cb830331769e1 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Thu, 25 May 2023 16:35:51 -0700 Subject: [PATCH 18/41] Debugging: Real-valued test cases (electrostatics, magnetostatics, transient) --- palace/linalg/gmg.cpp | 4 +- palace/linalg/iterative.cpp | 329 ++++++++++++++++++++++++-------- palace/linalg/iterative.hpp | 62 ++++-- palace/linalg/ksp.cpp | 1 + palace/linalg/operator.hpp | 13 +- palace/models/spaceoperator.cpp | 6 +- 6 files changed, 315 insertions(+), 100 deletions(-) diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index b03151d99..a75419ea8 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -79,7 +79,9 @@ void GeometricMultigridSolver::SetOperator(const OperType &op) for (int l = 0; l < n_levels; l++) { A[l] = &mg_op->GetOperatorAtLevel(l); - MFEM_VERIFY(A[l]->Height() == P[l]->Width() && A[l]->Width() == P[l]->Width(), + MFEM_VERIFY(A[l]->Width() == A[l]->Height() && + A[l]->Height() == + ((l < n_levels - 1) ? P[l]->Width() : P[l - 1]->Height()), "Invalid operator sizes for GeometricMultigridSolver!"); const auto *PtAP_l = dynamic_cast(A[l]); diff --git a/palace/linalg/iterative.cpp b/palace/linalg/iterative.cpp index 3ff5fe9aa..8ee944997 100644 --- a/palace/linalg/iterative.cpp +++ b/palace/linalg/iterative.cpp @@ -264,7 +264,8 @@ IterativeSolver::IterativeSolver(MPI_Comm comm, int print) max_it = 100; converged = false; - initial_res = final_res = 0.0; + initial_res = 1.0; + final_res = 0.0; final_it = 0; } @@ -317,7 +318,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const { if (print_opts.iterations) { - Mpi::Print(comm, "{}{:{}d} iteration, residual ||r||_B = {:.6e}\n", + Mpi::Print(comm, "{}{:{}d} KSP residual norm ||r||_B = {:.6e}\n", std::string(tab_width, ' '), it, int_width, res); } if (!it) @@ -353,16 +354,16 @@ void CgSolver::Mult(const VecType &b, VecType &x) const } if (print_opts.iterations) { - Mpi::Print(comm, "{}{:{}d} iteration, residual ||r||_B = {:.6e}\n", + Mpi::Print(comm, "{}{:{}d} KSP residual norm ||r||_B = {:.6e}\n", std::string(tab_width, ' '), it, int_width, res); } if (print_opts.summary || (print_opts.warnings && !converged)) { - Mpi::Print(comm, "{}PCG solver {} with {:d} iteration{}", std::string(tab_width, ' '), + Mpi::Print(comm, "{}PCG solver {} in {:d} iteration{}", std::string(tab_width, ' '), converged ? "converged" : "did NOT converge", it, (it == 1) ? "" : "s"); if (it > 0) { - Mpi::Print(comm, " (avg. reduction factor: {:.6e})\n", + Mpi::Print(comm, " (avg. reduction factor: {:.3e})\n", std::pow(res / initial_res, 1.0 / it)); } else @@ -374,6 +375,31 @@ void CgSolver::Mult(const VecType &b, VecType &x) const final_it = it; } +namespace +{ + +template +inline void ApplyBA(PrecSide side, const OperType *A, const Solver *B, + const VecType &x, VecType &y, VecType &z) +{ + if (B && side == GmresSolver::PrecSide::LEFT) + { + A->Mult(x, z); + B->Mult(z, y); + } + else if (B && side == GmresSolver::PrecSide::RIGHT) + { + B->Mult(x, z); + A->Mult(z, y); + } + else + { + A->Mult(x, y); + } +} + +} // namespace + template void GmresSolver::Initialize() const { @@ -388,29 +414,29 @@ void GmresSolver::Initialize() const { max_dim = max_it; } + const int init_size = 5; V.resize(max_dim + 1); - for (int j = 0; j < std::min(5, max_dim + 1); j++) + for (int j = 0; j < std::min(init_size, max_dim + 1); j++) { V[j].SetSize(A->Height()); } - if (flexible) - { - Z.resize(max_dim + 1); - for (int j = 0; j < std::min(5, max_dim + 1); j++) - { - Z[j].SetSize(A->Height()); - } - } - else - { - r.SetSize(A->Height()); - } H.resize((max_dim + 1) * max_dim); s.resize(max_dim + 1); cs.resize(max_dim + 1); sn.resize(max_dim + 1); } +template +void GmresSolver::Update(int j) const +{ + // Add storage for basis vectors in increments. + const int add_size = 10; + for (int k = j + 1; k < std::min(j + 1 + add_size, max_dim + 1); k++) + { + V[k].SetSize(A->Height()); + } +} + template void GmresSolver::Mult(const VecType &b, VecType &x) const { @@ -419,6 +445,7 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const MFEM_VERIFY(A, "Operator must be set for GmresSolver::Mult!"); MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), "Size mismatch for GmresSolver::Mult!"); + r.SetSize(A->Height()); Initialize(); // Begin iterations. @@ -426,10 +453,10 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const int it = 0, restart = 0; if (print_opts.iterations) { - Mpi::Print(comm, "{}Residual norms for {}GMRES solve\n", flexible ? "F" : "", + Mpi::Print(comm, "{}Residual norms for GMRES solve\n", std::string(tab_width + int_width - 1, ' ')); } - for (; it < max_it && !converged; restart++) + for (; it < max_it; restart++) { // Initialize. if (B && pc_side == PrecSide::LEFT) @@ -466,14 +493,14 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const initial_res = true_beta; eps = std::max(rel_tol * true_beta, abs_tol); } - else if (beta > 0.0 && std::abs(beta - true_beta) > 0.1 * initial_res && + else if (beta > 0.0 && std::abs(beta - true_beta) > 0.1 * true_beta && print_opts.warnings) { Mpi::Print( comm, - "{}{}GMRES residual at restart ({:.6e}) is far from the residual norm estimate " - "from the recursion formula ({.6e}) (initial residual = {:.6e})\n", - std::string(tab_width, ' '), flexible ? "F" : "", true_beta, beta, initial_res); + "{}GMRES residual at restart ({:.6e}) is far from the residual norm estimate " + "from the recursion formula ({:.6e}) (initial residual = {:.6e})\n", + std::string(tab_width, ' '), true_beta, beta, initial_res); } beta = true_beta; if (beta < eps) @@ -488,51 +515,19 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const s[0] = beta; int j = 0; - for (; j < max_dim && it < max_it; j++, it++) + for (;; j++, it++) { if (print_opts.iterations) { - Mpi::Print(comm, "{}{:{}d} iteration ({:d} restarts), residual {:.6e}\n", it, - std::string(tab_width, ' '), int_width, restart, beta); + Mpi::Print(comm, "{}{:{}d} (restart {:d}) KSP residual norm {:.6e}\n", + std::string(tab_width, ' '), it, int_width, restart, beta); } VecType &w = V[j + 1]; if (w.Size() == 0) { - // Add storage for basis vectors in increments. - for (int k = j + 1; k < std::min(j + 11, max_dim + 1); k++) - { - V[k].SetSize(A->Height()); - } - if (flexible) - { - for (int k = j + 1; k < std::min(j + 11, max_dim + 1); k++) - { - Z[k].SetSize(A->Height()); - } - } - } - if (B && pc_side == PrecSide::LEFT) - { - A->Mult(V[j], r); - B->Mult(r, w); - } - else if (B && pc_side == PrecSide::RIGHT) - { - if (!flexible) - { - B->Mult(V[j], r); - A->Mult(r, w); - } - else - { - B->Mult(V[j], Z[j]); - A->Mult(Z[j], w); - } - } - else - { - A->Mult(V[j], w); + Update(j); } + ApplyBA(pc_side, A, B, V[j], w, r); ScalarType *Hj = H.data() + j * (max_dim + 1); switch (orthog_type) @@ -560,9 +555,10 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const beta = std::abs(s[j + 1]); CheckDot(beta, "GMRES residual norm is not valid: beta = "); - if (beta < eps) + converged = (beta < eps); + if (converged || j + 1 == max_dim || it + 1 == max_it) { - converged = true; + it++; break; } } @@ -572,7 +568,7 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const { ScalarType *Hi = H.data() + i * (max_dim + 1); s[i] /= Hi[i]; - for (int k = 0; k < i; k++) + for (int k = i - 1; k >= 0; k--) { s[k] -= Hi[k] * s[i]; } @@ -586,38 +582,205 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const } else // B && pc_side == PrecSide::RIGHT { - if (!flexible) + r = 0.0; + for (int k = 0; k <= j; k++) { - r = 0.0; - for (int k = 0; k <= j; k++) - { - r.Add(s[k], V[k]); - } - B->Mult(r, V[0]); - x += V[0]; + r.Add(s[k], V[k]); } - else + B->Mult(r, V[0]); + x += V[0]; + } + if (converged) + { + break; + } + } + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} (restart {:d}) KSP residual norm {:.6e}\n", + std::string(tab_width, ' '), it, int_width, restart, beta); + } + if (print_opts.summary || (print_opts.warnings && !converged)) + { + Mpi::Print(comm, "{}GMRES solver {} in {:d} iteration{}", std::string(tab_width, ' '), + converged ? "converged" : "did NOT converge", it, (it == 1) ? "" : "s"); + if (it > 0) + { + Mpi::Print(comm, " (avg. reduction factor: {:.3e})\n", + std::pow(beta / initial_res, 1.0 / it)); + } + else + { + Mpi::Print(comm, "\n"); + } + } + final_res = beta; + final_it = it; +} + +template +void FgmresSolver::Initialize() const +{ + GmresSolver::Initialize(); + const int init_size = 5; + Z.resize(max_dim + 1); + for (int j = 0; j < std::min(init_size, max_dim + 1); j++) + { + Z[j].SetSize(A->Height()); + } +} + +template +void FgmresSolver::Update(int j) const +{ + // Add storage for basis vectors in increments. + GmresSolver::Update(j); + const int add_size = 10; + for (int k = j + 1; k < std::min(j + 1 + add_size, max_dim + 1); k++) + { + Z[k].SetSize(A->Height()); + } +} + +template +void FgmresSolver::Mult(const VecType &b, VecType &x) const +{ + // Set up workspace. + RealType beta = 0.0, true_beta, eps; + MFEM_VERIFY(A && B, "Operator and preconditioner must be set for FgmresSolver::Mult!"); + MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), + "Size mismatch for FgmresSolver::Mult!"); + Initialize(); + + // Begin iterations. + converged = false; + int it = 0, restart = 0; + if (print_opts.iterations) + { + Mpi::Print(comm, "{}Residual norms for FGMRES solve\n", + std::string(tab_width + int_width - 1, ' ')); + } + for (; it < max_it; restart++) + { + // Initialize. + if (this->initial_guess || restart > 0) + { + A->Mult(x, Z[0]); + linalg::AXPBY(1.0, b, -1.0, Z[0]); + } + else + { + Z[0] = b; + x = 0.0; + } + true_beta = linalg::Norml2(comm, Z[0]); + CheckDot(true_beta, "FGMRES residual norm is not valid: beta = "); + if (it == 0) + { + initial_res = true_beta; + eps = std::max(rel_tol * true_beta, abs_tol); + } + else if (beta > 0.0 && std::abs(beta - true_beta) > 0.1 * true_beta && + print_opts.warnings) + { + Mpi::Print( + comm, + "{}FGMRES residual at restart ({:.6e}) is far from the residual norm estimate " + "from the recursion formula ({:.6e}) (initial residual = {:.6e})\n", + std::string(tab_width, ' '), true_beta, beta, initial_res); + } + beta = true_beta; + if (beta < eps) + { + converged = true; + break; + } + + V[0] = 0.0; + V[0].Add(1.0 / beta, Z[0]); + std::fill(s.begin(), s.end(), 0.0); + s[0] = beta; + + int j = 0; + for (;; j++, it++) + { + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} (restart {:d}) KSP residual norm {:.6e}\n", + std::string(tab_width, ' '), it, int_width, restart, beta); + } + VecType &w = V[j + 1]; + if (w.Size() == 0) + { + Update(j); + } + ApplyBA(PrecSide::RIGHT, A, B, V[j], w, Z[j]); + + ScalarType *Hj = H.data() + j * (max_dim + 1); + switch (orthog_type) + { + case OrthogType::MGS: + linalg::OrthogonalizeColumnMGS(comm, V, w, Hj, j + 1); + break; + case OrthogType::CGS: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1); + break; + case OrthogType::CGS2: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1, true); + break; + } + Hj[j + 1] = linalg::Norml2(comm, w); + w *= 1.0 / Hj[j + 1]; + + for (int k = 0; k < j; k++) + { + ApplyPlaneRotation(Hj[k], Hj[k + 1], cs[k], sn[k]); + } + GeneratePlaneRotation(Hj[j], Hj[j + 1], cs[j], sn[j]); + ApplyPlaneRotation(Hj[j], Hj[j + 1], cs[j], sn[j]); + ApplyPlaneRotation(s[j], s[j + 1], cs[j], sn[j]); + + beta = std::abs(s[j + 1]); + CheckDot(beta, "FGMRES residual norm is not valid: beta = "); + converged = (beta < eps); + if (converged || j + 1 == max_dim || it + 1 == max_it) { - for (int k = 0; k <= j; k++) - { - x.Add(s[k], Z[k]); - } + it++; + break; } } + + // Reconstruct the solution (for restart or due to convergence or maximum iterations). + for (int i = j; i >= 0; i--) + { + ScalarType *Hi = H.data() + i * (max_dim + 1); + s[i] /= Hi[i]; + for (int k = i - 1; k >= 0; k--) + { + s[k] -= Hi[k] * s[i]; + } + } + for (int k = 0; k <= j; k++) + { + x.Add(s[k], Z[k]); + } + if (converged) + { + break; + } } if (print_opts.iterations) { - Mpi::Print(comm, "{}{:{}d} iteration ({:d} restarts), residual {:.6e}\n", it, int_width, - std::string(tab_width, ' '), restart, beta); + Mpi::Print(comm, "{}{:{}d} (restart {:d}) KSP residual norm {:.6e}\n", + std::string(tab_width, ' '), it, int_width, restart, beta); } if (print_opts.summary || (print_opts.warnings && !converged)) { - Mpi::Print(comm, "{}{}GMRES solver {} with {:d} iteration{}", flexible ? "F" : "", - std::string(tab_width, ' '), converged ? "converged" : "did NOT converge", - it, (it == 1) ? "" : "s"); + Mpi::Print(comm, "{}FGMRES solver {} in {:d} iteration{}", std::string(tab_width, ' '), + converged ? "converged" : "did NOT converge", it, (it == 1) ? "" : "s"); if (it > 0) { - Mpi::Print(comm, " (avg. reduction factor: {:.6e})\n", + Mpi::Print(comm, " (avg. reduction factor: {:.3e})\n", std::pow(beta / initial_res, 1.0 / it)); } else diff --git a/palace/linalg/iterative.hpp b/palace/linalg/iterative.hpp index 0a17d8da7..113f723ac 100644 --- a/palace/linalg/iterative.hpp +++ b/palace/linalg/iterative.hpp @@ -181,28 +181,24 @@ class GmresSolver : public IterativeSolver // Use left or right preconditioning. PrecSide pc_side; - // Flag for flexible GMRES which stores and makes use of the preconditioned vectors. - const bool flexible; - // Temporary workspace for solve. - mutable std::vector V, Z; + mutable std::vector V; mutable VecType r; mutable std::vector H; mutable std::vector s, sn; mutable std::vector cs; // Allocate storage for solve. - void Initialize() const; + virtual void Initialize() const; + virtual void Update(int j) const; - GmresSolver(MPI_Comm comm, int print, bool fgmres) +public: + GmresSolver(MPI_Comm comm, int print) : IterativeSolver(comm, print), max_dim(-1), orthog_type(OrthogType::MGS), - pc_side(fgmres ? PrecSide::RIGHT : PrecSide::LEFT), flexible(fgmres) + pc_side(PrecSide::LEFT) { } -public: - GmresSolver(MPI_Comm comm, int print) : GmresSolver(comm, print, false) {} - // Set the dimension for restart. void SetRestartDim(int dim) { max_dim = dim; } @@ -221,16 +217,60 @@ template class FgmresSolver : public GmresSolver { public: + typedef typename GmresSolver::OrthogType OrthogType; typedef typename GmresSolver::PrecSide PrecSide; +protected: + typedef typename GmresSolver::VecType VecType; + typedef typename GmresSolver::RealType RealType; + typedef typename GmresSolver::ScalarType ScalarType; + + using GmresSolver::comm; + using GmresSolver::print_opts; + using GmresSolver::int_width; + using GmresSolver::tab_width; + + using GmresSolver::rel_tol; + using GmresSolver::abs_tol; + using GmresSolver::max_it; + + using GmresSolver::A; + using GmresSolver::B; + + using GmresSolver::converged; + using GmresSolver::initial_res; + using GmresSolver::final_res; + using GmresSolver::final_it; + + using GmresSolver::max_dim; + using GmresSolver::orthog_type; + using GmresSolver::pc_side; + using GmresSolver::V; + using GmresSolver::H; + using GmresSolver::s; + using GmresSolver::sn; + using GmresSolver::cs; + + // Temporary workspace for solve. + mutable std::vector Z; + + // Allocate storage for solve. + void Initialize() const override; + void Update(int j) const override; + public: - FgmresSolver(MPI_Comm comm, int print) : GmresSolver(comm, print, true) {} + FgmresSolver(MPI_Comm comm, int print) : GmresSolver(comm, print) + { + pc_side = PrecSide::RIGHT; + } void SetPrecSide(PrecSide side) override { MFEM_VERIFY(side == PrecSide::RIGHT, "FGMRES solver only supports right preconditioning!"); } + + void Mult(const VecType &b, VecType &x) const override; }; } // namespace palace diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index e11b25132..6608429e4 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -265,6 +265,7 @@ BaseKspSolver::BaseKspSolver(std::unique_ptr template void BaseKspSolver::SetOperators(const OperType &op, const OperType &pc_op) { + ksp->SetPreconditioner(*pc); ksp->SetOperator(op); const auto *mg_op = dynamic_cast *>(&pc_op); const auto *mg_pc = dynamic_cast *>(pc.get()); diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index c21c22924..e655e0d9e 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -290,8 +290,17 @@ class BaseMultigridOperator : public OperType const OperType &GetFinestOperator() const { return *ops.back(); } const OperType &GetFinestAuxiliaryOperator() const { return *aux_ops.back(); } - const OperType &GetOperatorAtLevel(int l) const { return *ops[l]; } - const OperType &GetAuxiliaryOperatorAtLevel(int l) const { return *aux_ops[l]; } + const OperType &GetOperatorAtLevel(int l) const + { + MFEM_ASSERT(l < GetNumLevels(), "Out of bounds multigrid level operator requested!"); + return *ops[l]; + } + const OperType &GetAuxiliaryOperatorAtLevel(int l) const + { + MFEM_ASSERT(l < GetNumAuxiliaryLevels(), + "Out of bounds multigrid level auxiliary operator requested!"); + return *aux_ops[l]; + } void Mult(const VecType &x, VecType &y) const override { GetFinestOperator().Mult(x, y); } void MultTranspose(const VecType &x, VecType &y) const override diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 45ec178dd..551c6eebd 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -199,15 +199,15 @@ auto BuildOperator(mfem::ParFiniteElementSpace &fespace, T1 *df, T2 *f, T3 *dfb, { a->AddDomainIntegrator(new mfem::CurlCurlIntegrator(*df)); } - if (df && !f->empty()) + if (f && !f->empty()) { a->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(*f)); } - if (df && !dfb->empty()) + if (dfb && !dfb->empty()) { a->AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(*dfb)); } - if (df && !fb->empty()) + if (fb && !fb->empty()) { a->AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(*fb)); } From 72667d9fe29ccdb16c395321e5935ed1b9da9088 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Thu, 25 May 2023 17:31:15 -0700 Subject: [PATCH 19/41] Debugging: Complex-valued examples --- palace/linalg/vector.cpp | 7 ++++--- palace/models/spaceoperator.cpp | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index eb56921b4..85a8967db 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -386,7 +386,8 @@ double Normalize(MPI_Comm comm, Vector &x, const Operator &B, Vector &Bx) { B.Mult(x, Bx); double dot = Dot(comm, x, Bx); - MFEM_ASSERT(dot > 0.0, "Non-positive vector norm in normalization!"); + MFEM_ASSERT(dot > 0.0, + "Non-positive vector norm in normalization (dot = " << dot << ")!"); double norm = std::sqrt(dot); x *= 1.0 / norm; return norm; @@ -399,8 +400,8 @@ double Normalize(MPI_Comm comm, ComplexVector &x, const Operator &B, ComplexVect B.Mult(x.Real(), Bx.Real()); B.Mult(x.Imag(), Bx.Imag()); std::complex dot = Dot(comm, x, Bx); - MFEM_ASSERT(dot.real() > 0.0 && dot.imag() == 0.0, - "Non-positive vector norm in normalization!"); + MFEM_ASSERT(dot.real() > 0.0 && std::abs(dot.imag()) < 1.0e-9 * dot.real(), + "Non-positive vector norm in normalization (dot = " << dot << ")!"); double norm = std::sqrt(dot.real()); x *= 1.0 / norm; return norm; diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 551c6eebd..eec21a4c9 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -883,7 +883,7 @@ bool SpaceOperator::GetExcitationVector(Vector &RHS) bool SpaceOperator::GetExcitationVector(double omega, ComplexVector &RHS) { // Frequency domain excitation vector: RHS = iω RHS1 + RHS2(ω). - RHS.SetSize(2 * GetNDSpace().GetTrueVSize()); + RHS.SetSize(GetNDSpace().GetTrueVSize()); RHS = 0.0; bool nnz1 = AddExcitationVector1Internal(RHS.Real()); RHS *= 1i * omega; @@ -897,7 +897,7 @@ bool SpaceOperator::GetExcitationVector1(ComplexVector &RHS1) { // Assemble the frequency domain excitation term with linear frequency dependence // (coefficient iω, see GetExcitationVector above, is accounted for later). - RHS1.SetSize(2 * GetNDSpace().GetTrueVSize()); + RHS1.SetSize(GetNDSpace().GetTrueVSize()); RHS1 = 0.0; bool nnz1 = AddExcitationVector1Internal(RHS1.Real()); RHS1.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); @@ -906,7 +906,7 @@ bool SpaceOperator::GetExcitationVector1(ComplexVector &RHS1) bool SpaceOperator::GetExcitationVector2(double omega, ComplexVector &RHS2) { - RHS2.SetSize(2 * GetNDSpace().GetTrueVSize()); + RHS2.SetSize(GetNDSpace().GetTrueVSize()); RHS2 = 0.0; bool nnz2 = AddExcitationVector2Internal(omega, RHS2); RHS2.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); @@ -940,7 +940,7 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH { // Assemble the contribution of wave ports to the frequency domain excitation term at the // specified frequency. - MFEM_VERIFY(RHS2.Size() == 2 * GetNDSpace().GetTrueVSize(), + MFEM_VERIFY(RHS2.Size() == GetNDSpace().GetTrueVSize(), "Invalid T-vector size for AddExcitationVector2Internal!"); SumVectorCoefficient fbr(GetNDSpace().GetParMesh()->SpaceDimension()), fbi(GetNDSpace().GetParMesh()->SpaceDimension()); @@ -963,14 +963,14 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH void SpaceOperator::GetConstantInitialVector(ComplexVector &v) { - v.SetSize(2 * GetNDSpace().GetTrueVSize()); + v.SetSize(GetNDSpace().GetTrueVSize()); v = 1.0; v.Real().SetSubVector(nd_dbc_tdof_lists.back(), 0.0); } void SpaceOperator::GetRandomInitialVector(ComplexVector &v) { - v.SetSize(2 * GetNDSpace().GetTrueVSize()); + v.SetSize(GetNDSpace().GetTrueVSize()); linalg::SetRandom(GetNDSpace().GetComm(), v); v.SetSubVector(nd_dbc_tdof_lists.back(), 0.0); } From 6dbf45f93ad13916a20d6dca75d5cd4861669a8c Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Thu, 25 May 2023 18:30:38 -0700 Subject: [PATCH 20/41] Fix initial residual norm and convergence criterion for nonzero initial guess --- palace/linalg/iterative.cpp | 168 ++++++++++++++++++++++-------------- 1 file changed, 103 insertions(+), 65 deletions(-) diff --git a/palace/linalg/iterative.cpp b/palace/linalg/iterative.cpp index 8ee944997..c207024c5 100644 --- a/palace/linalg/iterative.cpp +++ b/palace/linalg/iterative.cpp @@ -303,8 +303,19 @@ void CgSolver::Mult(const VecType &b, VecType &x) const } beta = linalg::Dot(comm, z, r); CheckDot(beta, "PCG preconditioner is not positive definite: (Br, r) = "); - res = initial_res = std::sqrt(std::abs(beta)); - eps = std::max(rel_tol * res, abs_tol); + res = std::sqrt(std::abs(beta)); + if (this->initial_guess && B) + { + B->Mult(b, p); + auto beta_rhs = linalg::Dot(comm, p, b); + CheckDot(beta_rhs, "PCG preconditioner is not positive definite: (Bb, b) = "); + initial_res = std::sqrt(std::abs(beta_rhs)); + } + else + { + initial_res = res; + } + eps = std::max(rel_tol * initial_res, abs_tol); converged = (res < eps); // Begin iterations. @@ -378,6 +389,40 @@ void CgSolver::Mult(const VecType &b, VecType &x) const namespace { +template +inline void InitialResidual(PrecSide side, const OperType *A, const Solver *B, + const VecType &b, VecType &x, VecType &r, VecType &z, + bool initial_guess) +{ + if (B && side == GmresSolver::PrecSide::LEFT) + { + if (initial_guess) + { + A->Mult(x, z); + linalg::AXPBY(1.0, b, -1.0, z); + B->Mult(z, r); + } + else + { + B->Mult(b, r); + x = 0.0; + } + } + else // !B || side == PrecSide::RIGHT + { + if (initial_guess) + { + A->Mult(x, r); + linalg::AXPBY(1.0, b, -1.0, r); + } + else + { + r = b; + x = 0.0; + } + } +} + template inline void ApplyBA(PrecSide side, const OperType *A, const Solver *B, const VecType &x, VecType &y, VecType &z) @@ -398,6 +443,28 @@ inline void ApplyBA(PrecSide side, const OperType *A, const Solver *B, } } +template +inline void OrthogonalizeIteration(OrthogType type, MPI_Comm comm, + const std::vector &V, VecType &w, + ScalarType *Hj, int j) +{ + // Orthogonalize w against the leading j + 1 columns of V. + typedef typename std::conditional::value, + ComplexOperator, Operator>::type OperType; + switch (type) + { + case GmresSolver::OrthogType::MGS: + linalg::OrthogonalizeColumnMGS(comm, V, w, Hj, j + 1); + break; + case GmresSolver::OrthogType::CGS: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1); + break; + case GmresSolver::OrthogType::CGS2: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1, true); + break; + } +} + } // namespace template @@ -459,39 +526,31 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const for (; it < max_it; restart++) { // Initialize. - if (B && pc_side == PrecSide::LEFT) - { - if (this->initial_guess || restart > 0) - { - A->Mult(x, V[0]); - linalg::AXPBY(1.0, b, -1.0, V[0]); - B->Mult(V[0], r); - } - else - { - B->Mult(b, r); - x = 0.0; - } - } - else // !B || pc_side == PrecSide::RIGHT + InitialResidual(pc_side, A, B, b, x, r, V[0], (this->initial_guess || restart > 0)); + true_beta = linalg::Norml2(comm, r); + CheckDot(true_beta, "GMRES residual norm is not valid: beta = "); + if (it == 0) { - if (this->initial_guess || restart > 0) + if (this->initial_guess) { - A->Mult(x, r); - linalg::AXPBY(1.0, b, -1.0, r); + RealType beta_rhs; + if (B && pc_side == PrecSide::LEFT) + { + B->Mult(b, V[0]); + beta_rhs = linalg::Norml2(comm, V[0]); + } + else // !B || pc_side == PrecSide::RIGHT + { + beta_rhs = linalg::Norml2(comm, b); + } + CheckDot(beta_rhs, "GMRES residual norm is not valid: beta_rhs = "); + initial_res = beta_rhs; } else { - r = b; - x = 0.0; + initial_res = true_beta; } - } - true_beta = linalg::Norml2(comm, r); - CheckDot(true_beta, "GMRES residual norm is not valid: beta = "); - if (it == 0) - { - initial_res = true_beta; - eps = std::max(rel_tol * true_beta, abs_tol); + eps = std::max(rel_tol * initial_res, abs_tol); } else if (beta > 0.0 && std::abs(beta - true_beta) > 0.1 * true_beta && print_opts.warnings) @@ -530,18 +589,7 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const ApplyBA(pc_side, A, B, V[j], w, r); ScalarType *Hj = H.data() + j * (max_dim + 1); - switch (orthog_type) - { - case OrthogType::MGS: - linalg::OrthogonalizeColumnMGS(comm, V, w, Hj, j + 1); - break; - case OrthogType::CGS: - linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1); - break; - case OrthogType::CGS2: - linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1, true); - break; - } + OrthogonalizeIteration(orthog_type, comm, V, w, Hj, j); Hj[j + 1] = linalg::Norml2(comm, w); w *= 1.0 / Hj[j + 1]; @@ -663,22 +711,23 @@ void FgmresSolver::Mult(const VecType &b, VecType &x) const for (; it < max_it; restart++) { // Initialize. - if (this->initial_guess || restart > 0) - { - A->Mult(x, Z[0]); - linalg::AXPBY(1.0, b, -1.0, Z[0]); - } - else - { - Z[0] = b; - x = 0.0; - } + InitialResidual(PrecSide::RIGHT, A, B, b, x, Z[0], V[0], + (this->initial_guess || restart > 0)); true_beta = linalg::Norml2(comm, Z[0]); CheckDot(true_beta, "FGMRES residual norm is not valid: beta = "); if (it == 0) { - initial_res = true_beta; - eps = std::max(rel_tol * true_beta, abs_tol); + if (this->initial_guess) + { + auto beta_rhs = linalg::Norml2(comm, b); + CheckDot(beta_rhs, "GMRES residual norm is not valid: beta_rhs = "); + initial_res = beta_rhs; + } + else + { + initial_res = true_beta; + } + eps = std::max(rel_tol * initial_res, abs_tol); } else if (beta > 0.0 && std::abs(beta - true_beta) > 0.1 * true_beta && print_opts.warnings) @@ -717,18 +766,7 @@ void FgmresSolver::Mult(const VecType &b, VecType &x) const ApplyBA(PrecSide::RIGHT, A, B, V[j], w, Z[j]); ScalarType *Hj = H.data() + j * (max_dim + 1); - switch (orthog_type) - { - case OrthogType::MGS: - linalg::OrthogonalizeColumnMGS(comm, V, w, Hj, j + 1); - break; - case OrthogType::CGS: - linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1); - break; - case OrthogType::CGS2: - linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1, true); - break; - } + OrthogonalizeIteration(orthog_type, comm, V, w, Hj, j); Hj[j + 1] = linalg::Norml2(comm, w); w *= 1.0 / Hj[j + 1]; From 8318e5251fb8559953ff3bee754968b6c5536a06 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 30 May 2023 21:09:29 -0700 Subject: [PATCH 21/41] Fix for GeometricMultigridSolver with 1 level --- palace/linalg/gmg.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index a75419ea8..a825d974a 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -79,10 +79,11 @@ void GeometricMultigridSolver::SetOperator(const OperType &op) for (int l = 0; l < n_levels; l++) { A[l] = &mg_op->GetOperatorAtLevel(l); - MFEM_VERIFY(A[l]->Width() == A[l]->Height() && - A[l]->Height() == - ((l < n_levels - 1) ? P[l]->Width() : P[l - 1]->Height()), - "Invalid operator sizes for GeometricMultigridSolver!"); + MFEM_VERIFY( + A[l]->Width() == A[l]->Height() && + (n_levels == 1 || + (A[l]->Height() == ((l < n_levels - 1) ? P[l]->Width() : P[l - 1]->Height()))), + "Invalid operator sizes for GeometricMultigridSolver!"); const auto *PtAP_l = dynamic_cast(A[l]); MFEM_VERIFY( @@ -162,7 +163,7 @@ template void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const { // Pre-smooth, with zero initial guess (Y = 0 set inside). This is the coarse solve at - // level 0. Important to note that the smoothers must respect the iterative_mode flag + // level 0. Important to note that the smoothers must respect the initial guess flag // correctly (given X, Y, compute Y <- Y + B (X - A Y)) . B[l]->SetInitialGuess(initial_guess); B[l]->Mult(X[l], Y[l]); From d8aefdfbc41b5acf2e92e101c12bae587ba912f4 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Fri, 26 May 2023 10:59:18 -0700 Subject: [PATCH 22/41] Minor prerequisite testing out complex-valued multigrid smoothers --- palace/linalg/chebyshev.cpp | 126 +++++++++++++++++++++++++------- palace/linalg/rap.cpp | 27 +++---- palace/linalg/solver.cpp | 3 +- palace/linalg/vector.cpp | 52 ++++++------- palace/linalg/vector.hpp | 11 ++- palace/models/spaceoperator.cpp | 108 +++++++++++++++++++-------- 6 files changed, 222 insertions(+), 105 deletions(-) diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 34dff2d40..62cf0891b 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -13,26 +13,28 @@ namespace palace namespace { -void GetDiagonal(const ParOperator &A, Vector &diag) +void GetInverseDiagonal(const ParOperator &A, Vector &dinv) { - diag.SetSize(A.Height()); - A.AssembleDiagonal(diag); + dinv.SetSize(A.Height()); + A.AssembleDiagonal(dinv); + dinv.Reciprocal(); } -void GetDiagonal(const ComplexParOperator &A, ComplexVector &diag) +void GetInverseDiagonal(const ComplexParOperator &A, ComplexVector &dinv) { MFEM_VERIFY(A.HasReal() || A.HasImag(), "Invalid zero ComplexParOperator for ChebyshevSmoother!"); - diag.SetSize(A.Height()); - diag = 0.0; + dinv.SetSize(A.Height()); + dinv = 0.0; if (A.HasReal()) { - A.Real()->AssembleDiagonal(diag.Real()); + A.Real()->AssembleDiagonal(dinv.Real()); } if (A.HasImag()) { - A.Imag()->AssembleDiagonal(diag.Imag()); + A.Imag()->AssembleDiagonal(dinv.Imag()); } + dinv.Reciprocal(); } } // namespace @@ -52,8 +54,7 @@ void ChebyshevSmoother::SetOperator(const OperType &op) const auto *PtAP = dynamic_cast(&op); MFEM_VERIFY(PtAP, "ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!"); - GetDiagonal(*PtAP, dinv); - dinv.Reciprocal(); + GetInverseDiagonal(*PtAP, dinv); A = &op; r.SetSize(A->Height()); @@ -64,12 +65,54 @@ void ChebyshevSmoother::SetOperator(const OperType &op) // polynomial versus Gauss-Seidel, JCP (2003). BaseDiagonalOperator Dinv(dinv); BaseProductOperator DinvA(Dinv, *A); - lambda_max = 1.1 * linalg::SpectralNorm(PtAP->GetComm(), DinvA, false); + lambda_max = 1.01 * linalg::SpectralNorm(PtAP->GetComm(), DinvA, false); } namespace { +template +inline void ApplyOp(const Operator &A, const Vector &x, Vector &y, const double a) +{ + if (a == 0.0) + { + A.Mult(x, y); + } + else + { + A.AddMult(x, y, a); + } +} + +template +inline void ApplyOp(const ComplexOperator &A, const ComplexVector &x, ComplexVector &y, + const double a) +{ + if constexpr (!Transpose) + { + if (a == 0.0) + { + A.Mult(x, y); + } + else + { + A.AddMult(x, y, a); + } + } + else + { + if (a == 0.0) + { + A.MultHermitianTranspose(x, y); + } + else + { + A.AddMultHermitianTranspose(x, y, a); + } + } +} + +template inline void ApplyOrder0(double sr, const Vector &dinv, const Vector &r, Vector &d) { const int N = d.Size(); @@ -79,6 +122,7 @@ inline void ApplyOrder0(double sr, const Vector &dinv, const Vector &r, Vector & mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { D[i] = sr * DI[i] * R[i]; }); } +template inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const ComplexVector &r, ComplexVector &d) { @@ -89,15 +133,29 @@ inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const Comple const auto *RI = r.Imag().Read(); auto *DR = d.Real().ReadWrite(); auto *DI = d.Imag().ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const double t = DII[i] * RR[i] + DIR[i] * RI[i]; - DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]); - DI[i] = sr * t; - }); + if constexpr (!Transpose) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = DII[i] * RR[i] + DIR[i] * RI[i]; + DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]); + DI[i] = sr * t; + }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = -DII[i] * RR[i] + DIR[i] * RI[i]; + DR[i] = sr * (DIR[i] * RR[i] + DII[i] * RI[i]); + DI[i] = sr * t; + }); + } } +template inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv, const Vector &r, Vector &d) { @@ -108,6 +166,7 @@ inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv, mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { D[i] = sd * D[i] + sr * DI[i] * R[i]; }); } +template inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &dinv, const ComplexVector &r, ComplexVector &d) { @@ -118,13 +177,26 @@ inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &d const auto *RI = r.Imag().Read(); auto *DR = d.Real().ReadWrite(); auto *DI = d.Imag().ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const double t = DII[i] * RR[i] + DIR[i] * RI[i]; - DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]); - DI[i] = sd * DI[i] + sr * t; - }); + if constexpr (!Transpose) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = DII[i] * RR[i] + DIR[i] * RI[i]; + DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]); + DI[i] = sd * DI[i] + sr * t; + }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = -DII[i] * RR[i] + DIR[i] * RI[i]; + DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] + DII[i] * RI[i]); + DI[i] = sd * DI[i] + sr * t; + }); + } } } // namespace @@ -137,7 +209,7 @@ void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const { if (this->initial_guess || it > 0) { - A->Mult(y, r); + ApplyOp(*A, y, r, 0.0); linalg::AXPBY(1.0, x, -1.0, r); } else @@ -152,7 +224,7 @@ void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const for (int k = 1; k < order; k++) { y += d; - A->AddMult(d, r, -1.0); + ApplyOp(*A, d, r, -1.0); const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); ApplyOrderK(sd, sr, dinv, r, d); diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp index c64f5c797..88cc7f96c 100644 --- a/palace/linalg/rap.cpp +++ b/palace/linalg/rap.cpp @@ -251,9 +251,6 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble() // Eliminate boundary conditions on the assembled (square) matrix. if (dbc_tdof_list) { - MFEM_VERIFY( - &trial_fespace == &test_fespace, - "Only square ParOperator should have same trial and test eliminated tdofs!"); RAP->EliminateBC(*dbc_tdof_list, diag_policy); } return *RAP; @@ -293,7 +290,7 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const { test_fespace.GetRestrictionMatrix()->AddMult(ly, b, -1.0); } - if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == DiagonalPolicy::DIAG_ONE) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -306,7 +303,7 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const B[id] = X[id]; }); } - else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { b.SetSubVector(*dbc_tdof_list, 0.0); } @@ -340,7 +337,7 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const { test_fespace.GetRestrictionMatrix()->Mult(ly, ty); } - if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == DiagonalPolicy::DIAG_ONE) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -353,7 +350,7 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const TY[id] = X[id]; }); } - else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { ty.SetSubVector(*dbc_tdof_list, 0.0); } @@ -400,7 +397,7 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c if (dbc_tdof_list) { trial_fespace.GetProlongationMatrix()->MultTranspose(lx, ty); - if (diag_policy == DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == DiagonalPolicy::DIAG_ONE) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -413,7 +410,7 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c TY[id] = X[id]; }); } - else if (diag_policy == DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { ty.SetSubVector(*dbc_tdof_list, 0.0); } @@ -554,7 +551,7 @@ void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, test_fespace.GetRestrictionMatrix()->Mult(ly.Real(), ty.Real()); test_fespace.GetRestrictionMatrix()->Mult(ly.Imag(), ty.Imag()); } - if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -570,7 +567,7 @@ void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, TYI[id] = XI[id]; }); } - else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { ty.SetSubVector(*dbc_tdof_list, 0.0); } @@ -647,7 +644,7 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector { trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); - if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -663,7 +660,7 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector TYI[id] = XI[id]; }); } - else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { ty.SetSubVector(*dbc_tdof_list, 0.0); } @@ -732,7 +729,7 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl { trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); - if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE && height == width) + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { const int N = dbc_tdof_list->Size(); const auto *idx = dbc_tdof_list->Read(); @@ -748,7 +745,7 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl TYI[id] = XI[id]; }); } - else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO || height != width) + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { ty.SetSubVector(*dbc_tdof_list, 0.0); } diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp index f402e43f8..c78a00c85 100644 --- a/palace/linalg/solver.cpp +++ b/palace/linalg/solver.cpp @@ -16,7 +16,8 @@ template <> void WrapperSolver::SetOperator(const ComplexOperator &op) { MFEM_VERIFY(op.IsReal() && op.HasReal(), - "WrapperSolver::SetOperator assumes an operator which is purely real!"); + "WrapperSolver::SetOperator requires an operator which is purely real for " + "mfem::Solver!"); pc->SetOperator(*op.Real()); } diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index 85a8967db..12cbbfaad 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -95,11 +95,6 @@ void ComplexVector::Get(std::complex *py, int n) const y.HostReadWrite(); } -void ComplexVector::Conj() -{ - Imag() *= -1.0; -} - ComplexVector &ComplexVector::operator=(std::complex s) { Real() = s.real(); @@ -132,31 +127,36 @@ ComplexVector &ComplexVector::operator*=(std::complex s) return *this; } -void ComplexVector::Reciprocal(bool abs) +void ComplexVector::Conj() +{ + Imag() *= -1.0; +} + +void ComplexVector::Abs() { const int N = Size(); auto *XR = Real().ReadWrite(); auto *XI = Imag().ReadWrite(); - if (abs) - { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const double t = 1.0 / std::sqrt(XR[i] * XR[i] + XI[i] * XI[i]); - XR[i] = t; - XI[i] = 0.0; - }); - } - else - { - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - const std::complex t = 1.0 / std::complex(XR[i], XI[i]); - XR[i] = t.real(); - XI[i] = t.imag(); - }); - } + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + XR[i] = std::sqrt(XR[i] * XR[i] + XI[i] * XI[i]); + XI[i] = 0.0; + }); +} + +void ComplexVector::Reciprocal() +{ + const int N = Size(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const std::complex t = 1.0 / std::complex(XR[i], XI[i]); + XR[i] = t.real(); + XI[i] = t.imag(); + }); } void ComplexVector::SetSubVector(const mfem::Array &rows, std::complex s) diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp index a8d91df21..395ff82ad 100644 --- a/palace/linalg/vector.hpp +++ b/palace/linalg/vector.hpp @@ -64,9 +64,6 @@ class ComplexVector // Copy the vector into an array of complex values. void Get(std::complex *py, int n) const; - // Replace entries with complex conjugate. - void Conj(); - // Set all entries equal to s. ComplexVector &operator=(std::complex s); ComplexVector &operator=(double s) @@ -78,8 +75,14 @@ class ComplexVector // Scale all entries by s. ComplexVector &operator*=(std::complex s); + // Replace entries with their complex conjugate. + void Conj(); + + // Replace entries with their absolute value. + void Abs(); + // Set all entries to their reciprocal. - void Reciprocal(bool abs = false); + void Reciprocal(); // Set the entries listed the given array to value. All entries in the list should be // non-negative. diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index eec21a4c9..b3be5e915 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -257,6 +257,7 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy) { return {}; } + auto K = std::make_unique(BuildOperator(GetNDSpace(), &df, &f, (SumCoefficient *)nullptr, &fb, assembly_level, skip_zeros), @@ -277,6 +278,7 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy) { return {}; } + auto C = std::make_unique( BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, &fb, assembly_level, skip_zeros), @@ -296,6 +298,7 @@ std::unique_ptr SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy { return {}; } + auto M = std::make_unique( BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, &fb, assembly_level, skip_zeros), @@ -316,6 +319,7 @@ SpaceOperator::GetComplexStiffnessMatrix(Operator::DiagonalPolicy diag_policy) { return {}; } + auto K = std::make_unique( BuildOperator(GetNDSpace(), &df, &f, (SumCoefficient *)nullptr, &fb, assembly_level, skip_zeros), @@ -336,6 +340,7 @@ SpaceOperator::GetComplexDampingMatrix(Operator::DiagonalPolicy diag_policy) { return {}; } + auto C = std::make_unique( BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, &fb, assembly_level, skip_zeros), @@ -353,6 +358,11 @@ SpaceOperator::GetComplexMassMatrix(Operator::DiagonalPolicy diag_policy) AddRealMassCoefficients(1.0, fr); AddRealMassBdrCoefficients(1.0, fbr); AddImagMassCoefficients(1.0, fi); + if (fr.empty() && fbr.empty() && fi.empty()) + { + return {}; + } + std::unique_ptr mr, mi; if (!fr.empty() || !fbr.empty()) { @@ -365,10 +375,6 @@ SpaceOperator::GetComplexMassMatrix(Operator::DiagonalPolicy diag_policy) (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, assembly_level, skip_zeros); } - if (!mr && !mi) - { - return {}; - } auto M = std::make_unique(std::move(mr), std::move(mi), GetNDSpace()); M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return M; @@ -383,6 +389,11 @@ SpaceOperator::GetComplexExtraSystemMatrix(double omega, SumMatrixCoefficient fbr(sdim), fbi(sdim); SumCoefficient dfbr, dfbi; AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi); + if (dfbr.empty() && fbr.empty() && dfbi.empty() && fbi.empty()) + { + return {}; + } + std::unique_ptr ar, ai; if (!dfbr.empty() || !fbr.empty()) { @@ -394,10 +405,6 @@ SpaceOperator::GetComplexExtraSystemMatrix(double omega, ai = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, &dfbi, &fbi, assembly_level, skip_zeros); } - if (!ar && !ai) - { - return {}; - } auto A = std::make_unique(std::move(ar), std::move(ai), GetNDSpace()); A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); return A; @@ -663,37 +670,73 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub fespace_l.GlobalTrueVSize()); } const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); - SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); - SumCoefficient dfb; - AddStiffnessCoefficients(a0, df, f); - AddStiffnessBdrCoefficients(a0, fb); - AddDampingCoefficients(a1, f); - AddDampingBdrCoefficients(a1, fb); - // XX TODO: Test out difference of |Mr + i Mi| vs. Mr + Mi - // AddRealMassCoefficients(pc_shifted ? std::abs(a2) : a2, f); - // AddImagMassCoefficients(a2, f); - AddAbsMassCoefficients(pc_shifted ? std::abs(a2) : a2, f); - AddRealMassBdrCoefficients(pc_shifted ? std::abs(a2) : a2, fb); - AddExtraSystemBdrCoefficients(a3, dfb, dfb, fb, fb); - auto b = (s == 0) ? BuildOperator(fespace_l, &df, &f, &dfb, &fb, assembly_level, - skip_zeros, pc_lor) - : BuildAuxOperator(fespace_l, &f, &fb, assembly_level, skip_zeros, - pc_lor); - std::unique_ptr b_loc; + SumMatrixCoefficient dfr(sdim), fr(sdim), fi(sdim), fbr(sdim), fbi(sdim); + SumCoefficient dfbr, dfbi; + // if (s > 0) + // { + + // // XX TODO: Test complex PC matrix assembly for s > 0 + // // (or s == 0 if coarse solve supports it) + // // XX TODO: Handle complex coeff a0/a1/a2 (like SumOperator) + + // AddStiffnessCoefficients(a0, dfr, fr); + // AddStiffnessBdrCoefficients(a0, fbr); + // AddDampingCoefficients(a1, fi); + // AddDampingBdrCoefficients(a1, fbi); + // AddRealMassCoefficients(pc_shifted ? std::abs(a2) : a2, fr); + // AddRealMassBdrCoefficients(pc_shifted ? std::abs(a2) : a2, fbr); + // AddImagMassCoefficients(a2, fi); + // AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi); + // } + // else + { + AddStiffnessCoefficients(a0, dfr, fr); + AddStiffnessBdrCoefficients(a0, fbr); + AddDampingCoefficients(a1, fr); + AddDampingBdrCoefficients(a1, fbr); + AddAbsMassCoefficients(pc_shifted ? std::abs(a2) : a2, fr); + AddRealMassBdrCoefficients(pc_shifted ? std::abs(a2) : a2, fbr); + AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr); + } + + std::unique_ptr br, bi; + std::unique_ptr br_loc, bi_loc; + if (!dfr.empty() || !fr.empty() || !dfbr.empty() || !fbr.empty()) + { + br = (s == 0) ? BuildOperator(fespace_l, &dfr, &fr, &dfbr, &fbr, assembly_level, + skip_zeros, pc_lor) + : BuildAuxOperator(fespace_l, &fr, &fbr, assembly_level, skip_zeros, + pc_lor); + } + if (!fi.empty() || !dfbi.empty() || !fbi.empty()) + { + bi = (s == 0) ? BuildOperator(fespace_l, (SumCoefficient *)nullptr, &fi, &dfbi, + &fbi, assembly_level, skip_zeros, pc_lor) + : BuildAuxOperator(fespace_l, &fi, &fbi, assembly_level, skip_zeros, + pc_lor); + } if (pc_lor) { // After we construct the LOR discretization we deep copy the LOR matrix and the // original bilinear form and LOR discretization are no longer needed. mfem::Array dummy_dbc_tdof_list; - mfem::LORDiscretization lor(*b, dummy_dbc_tdof_list); - auto b_lor = std::make_unique(lor.GetAssembledMatrix()); + mfem::LORDiscretization lor(*br, dummy_dbc_tdof_list); + auto br_lor = std::make_unique(lor.GetAssembledMatrix()); if (print_prec_hdr) { - HYPRE_BigInt nnz = b_lor->NumNonZeroElems(); + HYPRE_BigInt nnz = br_lor->NumNonZeroElems(); Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); Mpi::Print(", {:d} NNZ (LOR)\n", nnz); } - b_loc = std::move(b_lor); + br_loc = std::move(br_lor); + br.reset(); + if (bi) + { + mfem::LORDiscretization lori(*bi, dummy_dbc_tdof_list); + auto bi_lor = std::make_unique(lori.GetAssembledMatrix()); + bi_loc = std::move(bi_lor); + bi.reset(); + } } else { @@ -701,7 +744,7 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub { if (assembly_level == mfem::AssemblyLevel::LEGACY) { - HYPRE_BigInt nnz = b->SpMat().NumNonZeroElems(); + HYPRE_BigInt nnz = br->SpMat().NumNonZeroElems(); Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); Mpi::Print(", {:d} NNZ\n", nnz); } @@ -710,9 +753,10 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub Mpi::Print("\n"); } } - b_loc = std::move(b); + br_loc = std::move(br); + bi_loc = std::move(bi); } - auto B_l = BuildLevelOperator(*B, std::move(b_loc), nullptr, fespace_l); + auto B_l = BuildLevelOperator(*B, std::move(br_loc), std::move(bi_loc), fespace_l); B_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); if (s == 0) { From 845101183cf26caa95bb881a3a0c149fc68b7936 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Fri, 26 May 2023 16:57:03 -0700 Subject: [PATCH 23/41] Re-enable adaptive frequency sweep using RomOperator after refactor, using Eigen for PROM dense matrix operations --- palace/drivers/drivensolver.cpp | 86 ++-- palace/linalg/curlcurl.hpp | 2 + palace/linalg/vector.cpp | 16 +- palace/linalg/vector.hpp | 10 +- palace/models/romoperator.cpp | 675 ++++++++++++++------------------ palace/models/romoperator.hpp | 100 +++-- palace/utils/prettyprint.hpp | 29 +- 7 files changed, 408 insertions(+), 510 deletions(-) diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index ca2a72d00..26d3c52a5 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -248,63 +248,63 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // removes it from P \ P_S. timer.construct_time += timer.Lap(); Timer local_timer; + const double f0 = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, 1.0); + Mpi::Print("\nBeginning PROM construction offline phase:\n" " {:d} points for frequency sweep over [{:.3e}, {:.3e}] GHz\n", - nstep - step0, - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega0), - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, - omega0 + (nstep - step0 - 1) * delta_omega)); - spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port stuff for offline - // RomOperator prom(iodata, spaceop, nmax); - // prom.Initialize(nstep - step0, omega0, delta_omega); //XX TODO PROM + nstep - step0, omega0 * f0, (omega0 + (nstep - step0 - 1) * delta_omega) * f0); + RomOperator prom(iodata, spaceop); + prom.Initialize(omega0, delta_omega, nstep - step0, nmax); + spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port output for offline local_timer.construct_time += local_timer.Lap(); - // prom.SolveHDM(omega0, E, true); // Print matrix stats at first HDM solve - // prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E, false); - // local_timer.solve_time += local_timer.Lap(); //XX TODO PROM + prom.SolveHDM(omega0, E); // Print matrix stats at first HDM solve + local_timer.solve_time += local_timer.Lap(); + prom.AddHDMSample(omega0, E); + local_timer.construct_time += local_timer.Lap(); + prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E); + local_timer.solve_time += local_timer.Lap(); + prom.AddHDMSample(omega0 + (nstep - step0 - 1) * delta_omega, E); + local_timer.construct_time += local_timer.Lap(); // Greedy procedure for basis construction (offline phase). Basis is initialized with // solutions at frequency sweep endpoints. - // int iter = static_cast(prom.GetSampleFrequencies().size()), iter0 = iter; - // double max_error = 1.0; + int iter = static_cast(prom.GetSampleFrequencies().size()), iter0 = iter; + double max_error; + ; while (true) { // Compute maximum error in parameter domain with current PROM. - // double omega_star; - // max_error = prom.ComputeMaxError(ncand, omega_star); //XX TODO PROM - // local_timer.construct_time += local_timer.Lap(); - // if (max_error < offline_tol || iter == nmax) - // { - // break; - // } + double omega_star; + max_error = prom.ComputeMaxError(ncand, omega_star); + local_timer.construct_time += local_timer.Lap(); + if (max_error < offline_tol || iter == nmax) + { + break; + } // Sample HDM and add solution to basis. - // Mpi::Print( - // "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n", - // iter - iter0 + 1, prom.GetReducedDimension(), - // iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega_star), omega_star, - // max_error); - // prom.SolveHDM(omega_star, E); //XX TODO PROM - // local_timer.solve_time += local_timer.Lap(); - // iter++; - } - { - // std::vector samples(prom.GetSampleFrequencies()); - // for (auto &sample : samples) - // { - // sample = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, sample); - // } - // Mpi::Print("\nAdaptive sampling{} {:d} frequency samples:\n" - // " n = {:d}, error = {:.3e}, tol = {:.3e}\n", - // (iter == nmax) ? " reached maximum" : " converged with", iter, - // prom.GetReducedDimension(), max_error, offline_tol); //XX TODO PROM - // utils::PrettyPrint(samples, " Sampled frequencies (GHz):"); + Mpi::Print( + "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n", + iter - iter0 + 1, prom.GetReducedDimension(), omega_star * f0, omega_star, + max_error); + prom.SolveHDM(omega_star, E); + local_timer.solve_time += local_timer.Lap(); + prom.AddHDMSample(omega_star, E); + local_timer.construct_time += local_timer.Lap(); + iter++; } - // SaveMetadata(prom.GetLinearSolver()); //XX TODO PROM + Mpi::Print("\nAdaptive sampling{} {:d} frequency samples:\n" + " n = {:d}, error = {:.3e}, tol = {:.3e}\n", + (iter == nmax) ? " reached maximum" : " converged with", iter, + prom.GetReducedDimension(), max_error, offline_tol); + utils::PrettyPrint(prom.GetSampleFrequencies(), f0, " Sampled frequencies (GHz):"); + SaveMetadata(prom.GetLinearSolver()); + const auto local_construction_time = timer.Lap(); timer.construct_time += local_construction_time; Mpi::Print(" Total offline phase elapsed time: {:.2e} s\n" - " Parameter space sampling: {:.2e} s, HDM solves: {:.2e} s\n", + " Sampling and PROM construction: {:.2e} s, HDM solves: {:.2e} s\n", Timer::Duration(local_construction_time).count(), Timer::Duration(local_timer.construct_time).count(), Timer::Duration(local_timer.solve_time).count()); // Timings on rank 0 @@ -322,11 +322,11 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i nstep, freq, Timer::Duration(timer.Now() - t0).count()); // Assemble the linear system and solve. - // prom.AssemblePROM(omega); //XX TODO PROM + prom.AssemblePROM(omega); timer.construct_time += timer.Lap(); Mpi::Print("\n"); - // prom.SolvePROM(E); //XX TODO PROM + prom.SolvePROM(E); timer.solve_time += timer.Lap(); // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in diff --git a/palace/linalg/curlcurl.hpp b/palace/linalg/curlcurl.hpp index e07513932..f36280d5c 100644 --- a/palace/linalg/curlcurl.hpp +++ b/palace/linalg/curlcurl.hpp @@ -44,6 +44,8 @@ class CurlCurlMassSolver const std::vector> &h1_dbc_tdof_lists, double tol, int max_it, int print); + const Operator &GetOperator() { return *A; } + void Mult(const Vector &x, Vector &y) const { ksp->Mult(x, y); } void Mult(const ComplexVector &x, ComplexVector &y) diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index 12cbbfaad..f2ab410c6 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -382,29 +382,25 @@ void SetRandomSign(MPI_Comm comm, ComplexVector &x, int seed) } template <> -double Normalize(MPI_Comm comm, Vector &x, const Operator &B, Vector &Bx) +double Norml2(MPI_Comm comm, const Vector &x, const Operator &B, Vector &Bx) { B.Mult(x, Bx); - double dot = Dot(comm, x, Bx); + double dot = Dot(comm, Bx, x); MFEM_ASSERT(dot > 0.0, "Non-positive vector norm in normalization (dot = " << dot << ")!"); - double norm = std::sqrt(dot); - x *= 1.0 / norm; - return norm; + return std::sqrt(dot); } template <> -double Normalize(MPI_Comm comm, ComplexVector &x, const Operator &B, ComplexVector &Bx) +double Norml2(MPI_Comm comm, const ComplexVector &x, const Operator &B, ComplexVector &Bx) { // For SPD B, xᴴ B x is real. B.Mult(x.Real(), Bx.Real()); B.Mult(x.Imag(), Bx.Imag()); - std::complex dot = Dot(comm, x, Bx); + std::complex dot = Dot(comm, Bx, x); MFEM_ASSERT(dot.real() > 0.0 && std::abs(dot.imag()) < 1.0e-9 * dot.real(), "Non-positive vector norm in normalization (dot = " << dot << ")!"); - double norm = std::sqrt(dot.real()); - x *= 1.0 / norm; - return norm; + return std::sqrt(dot.real()); } template <> diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp index 395ff82ad..4b9136da3 100644 --- a/palace/linalg/vector.hpp +++ b/palace/linalg/vector.hpp @@ -145,6 +145,8 @@ inline double Norml2(MPI_Comm comm, const VecType &x) { return std::sqrt(std::abs(Dot(comm, x, x))); } +template +double Norml2(MPI_Comm comm, const VecType &x, const Operator &B, VecType &Bx); // Normalize the vector, possibly with respect to an SPD matrix B. template @@ -156,7 +158,13 @@ inline double Normalize(MPI_Comm comm, VecType &x) return norm; } template -double Normalize(MPI_Comm comm, VecType &x, const Operator &B, VecType &Bx); +inline double Normalize(MPI_Comm comm, VecType &x, const Operator &B, VecType &Bx) +{ + double norm = Norml2(comm, x, B, Bx); + MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + x *= 1.0 / norm; + return norm; +} // Addition y += alpha * x. template diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp index 6d99bb419..d33e7df2f 100644 --- a/palace/models/romoperator.cpp +++ b/palace/models/romoperator.cpp @@ -3,11 +3,10 @@ #include "romoperator.hpp" -#if 0 // XX TODO DISABLE ROM FOR NOW - #include #include -#include "linalg/operator.hpp" +#include +#include "linalg/orthog.hpp" #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" @@ -17,44 +16,97 @@ namespace palace using namespace std::complex_literals; -RomOperator::RomOperator(const IoData &iodata, SpaceOperator &sp, int nmax) - : spaceop(sp), - engine((unsigned)std::chrono::system_clock::now().time_since_epoch().count()) +namespace +{ + +inline void ProjectMatInternal(MPI_Comm comm, const std::vector &V, + const ComplexOperator &A, Eigen::MatrixXcd &Ar, + ComplexVector &r, int n0) +{ + // Update Ar = Vᴴ A V for the new basis dimension n0 -> n. V is real and thus the result + // is complex symmetric if A is symmetric (which we assume is the case). Ar is replicated + // across all processes as a sequential n x n matrix. + const auto n = Ar.rows(); + MFEM_VERIFY(n0 < n, "Unexpected dimensions in PROM matrix projection!"); + for (int j = n0; j < n; j++) + { + // Fill block of Vᴴ A V = [ | Vᴴ A vj ] . We can optimize the matrix-vector product + // since the columns of V are real. + MFEM_VERIFY(A.HasReal() || A.HasImag(), + "Invalid zero ComplexOperator for PROM matrix projection!"); + if (A.HasReal()) + { + A.Real()->Mult(V[j], r.Real()); + } + if (A.HasImag()) + { + A.Imag()->Mult(V[j], r.Imag()); + } + for (int i = 0; i < n; i++) + { + Ar(i, j).real(A.HasReal() ? V[i] * r.Real() : 0.0); // Local inner product + Ar(i, j).imag(A.HasImag() ? V[i] * r.Imag() : 0.0); + } + } + Mpi::GlobalSum((n - n0) * n, Ar.data() + n0 * n, comm); + + // Fill lower block of Vᴴ A V = [ ____________ | ] + // [ vjᴴ A V[1:n0] | ] . + for (int j = 0; j < n0; j++) + { + for (int i = n0; i < n; i++) + { + Ar(i, j) = Ar(j, i); + } + } +} + +inline void ProjectVecInternal(MPI_Comm comm, const std::vector &V, + const ComplexVector &b, Eigen::VectorXcd &br, int n0) +{ + // Update br = Vᴴ b for the new basis dimension n0 -> n. br is replicated across all + // processes as a sequential n-dimensional vector. + const auto n = br.size(); + MFEM_VERIFY(n0 < n, "Unexpected dimensions in PROM vector projection!"); + for (int i = n0; i < n; i++) + { + br(i).real(V[i] * b.Real()); // Local inner product + br(i).imag(V[i] * b.Imag()); + } + Mpi::GlobalSum(n - n0, br.data() + n0, comm); +} + +} // namespace + +RomOperator::RomOperator(const IoData &iodata, SpaceOperator &spaceop) : spaceop(spaceop) { // Construct the system matrices defining the linear operator. PEC boundaries are handled // simply by setting diagonal entries of the system matrix for the corresponding dofs. // Because the Dirichlet BC is always homogenous, no special elimination is required on // the RHS. The damping matrix may be nullptr. - K = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::STIFFNESS, - mfem::Operator::DIAG_ONE); - M = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::MASS, - mfem::Operator::DIAG_ZERO); - C = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::DAMPING, - mfem::Operator::DIAG_ZERO); - - // Set up the linear solver and set operators but don't set the operators yet (this will - // be done during an HDM solve at a given parameter point). The preconditioner for the - // complex linear system is constructed from a real approximation to the complex system - // matrix. - pc0 = std::make_unique(iodata, spaceop.GetDbcMarker(), - spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); - ksp0 = std::make_unique(K->GetComm(), iodata, "ksp_"); - ksp0->SetPreconditioner(*pc0); + K = spaceop.GetComplexStiffnessMatrix(Operator::DIAG_ONE); + C = spaceop.GetComplexDampingMatrix(Operator::DIAG_ZERO); + M = spaceop.GetComplexMassMatrix(Operator::DIAG_ZERO); + MFEM_VERIFY(K && M, "Invalid empty HDM matrices when constructing PROM!"); // Set up RHS vector (linear in frequency part) for the incident field at port boundaries, // and the vector for the solution, which satisfies the Dirichlet (PEC) BC. - RHS1 = std::make_unique(*K); - if (!spaceop.GetFreqDomainExcitationVector1(*RHS1)) + if (!spaceop.GetExcitationVector1(RHS1)) { - RHS1.reset(); + RHS1.SetSize(0); } - init2 = true; - hasA2 = hasRHS2 = false; + has_A2 = has_RHS2 = true; - // Initialize other data structure and storage. - E0 = std::make_unique(*K); - R0 = std::make_unique(*K); - T0 = std::make_unique(*K); + // Initialize temporary vector storage. + r.SetSize(K->Height()); + w.SetSize(K->Height()); + + // Set up the linear solver and set operators but don't set the operators yet (this will + // be done during an HDM solve at a given parameter point). The preconditioner for the + // complex linear system is constructed from a real approximation to the complex system + // matrix. + ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), + &spaceop.GetH1Spaces()); // Initialize solver for inner product solves. The system matrix for the inner product is // real and SPD. This uses the dual norm from https://ieeexplore.ieee.org/document/5313818 @@ -63,307 +115,274 @@ RomOperator::RomOperator(const IoData &iodata, SpaceOperator &sp, int nmax) { constexpr int curlcurl_verbose = 0; kspKM = std::make_unique( - spaceop.GetMaterialOp(), spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), - spaceop.GetH1Spaces(), iodata.solver.linear.tol, iodata.solver.linear.max_it, - curlcurl_verbose); - - auto KM = std::make_unique(K->GetNumRows(), K->GetNumCols()); - KM->AddOperator(*K->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - KM->AddOperator(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - opKM = std::make_unique(K->GetComm(), std::move(KM)); - opKM->SetRealSymmetric(); + spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), spaceop.GetH1Spaces(), + spaceop.GetNDDbcTDofLists(), spaceop.GetH1DbcTDofLists(), iodata.solver.linear.tol, + iodata.solver.linear.max_it, curlcurl_verbose); } - // Construct initial (empty) basis and ROM operators. Ar = Vᴴ A V when assembled is - // complex symmetric for real V. The provided nmax is the number of sample points(2 basis - // vectors per point). - MFEM_VERIFY(K && M, "Invalid empty HDM matrices constructing PROM operators!"); - MFEM_VERIFY(nmax > 0, "Reduced order basis storage must have > 0 columns!"); - dim = 0; - omega_min = delta_omega = 0.0; - V = std::make_unique(K->GetComm(), K->Height(), PETSC_DECIDE, - PETSC_DECIDE, 2 * nmax, nullptr); - - Kr = std::make_unique(dim, dim, nullptr); - Kr->CopySymmetry(*K); - Mr = std::make_unique(dim, dim, nullptr); - Mr->CopySymmetry(*M); - if (C) - { - Cr = std::make_unique(dim, dim, nullptr); - Cr->CopySymmetry(*C); - } - else - { - Cr = nullptr; - } - Ar = std::make_unique(dim, dim, nullptr); - Ar->SetSymmetric(K->GetSymmetric() && M->GetSymmetric() && (!C || C->GetSymmetric())); - - RHS1r = (RHS1) ? std::make_unique(*Ar) : nullptr; - RHSr = std::make_unique(*Ar); - Er = std::make_unique(*Ar); - - // Set up the linear solver (dense sequential on all processors). An indefinite LDLᵀ - // factorization is used when Ar has its symmetry flag set. The default sequential dense - // matrix uses LAPACK for the factorization. - int print = 0; - ksp = std::make_unique(Ar->GetComm(), print, "rom_"); - ksp->SetType(KspSolver::Type::CHOLESKY); // Symmetric indefinite factorization + // The initial PROM basis is empty. Orthogonalization uses MGS by default, else CGS2. + dim_V = 0; + orthog_mgs = + (iodata.solver.linear.orthog_type == config::LinearSolverData::OrthogType::DEFAULT || + iodata.solver.linear.orthog_type == config::LinearSolverData::OrthogType::MGS); + + // Seed the random number generator for parameter space sampling. + engine.seed(std::chrono::system_clock::now().time_since_epoch().count()); } -void RomOperator::Initialize(int steps, double start, double delta) +void RomOperator::Initialize(double start, double delta, int num_steps, int max_dim) { // Initialize P = {ω_L, ω_L+δ, ..., ω_R}. Always insert in ascending order. - MFEM_VERIFY(Ps.empty(), "RomOperator::Initialize should only be called once!"); - MFEM_VERIFY(steps > 2, "RomOperator adaptive frequency sweep should have more than two " - "frequency steps!"); - Ps.reserve(steps); - PmPs.resize(steps); + MFEM_VERIFY(PS.empty() && P_m_PS.empty(), + "RomOperator::Initialize should only be called once!"); + MFEM_VERIFY( + num_steps > 2, + "RomOperator adaptive frequency sweep should have more than two frequency steps!"); if (delta < 0.0) { - start = start + (steps - 1) * delta; + start = start + (num_steps - 1) * delta; delta = -delta; } - for (int step = 0; step < steps; step++) + auto it = P_m_PS.begin(); + for (int step = 0; step < num_steps; step++) { - PmPs[step] = start + step * delta; + it = P_m_PS.emplace_hint(it, start + step * delta); } - omega_min = start; - delta_omega = delta; - A2.resize(steps); - RHS2.resize(steps); + + // PROM operators Ar = Vᴴ A V when assembled is complex symmetric for real V. The provided + // max_dim is the number of sample points (2 basis vectors per point). + MFEM_VERIFY(max_dim > 0, "Reduced order basis storage must have > 0 columns!"); + V.resize(2 * max_dim, Vector()); } -void RomOperator::SolveHDM(double omega, petsc::PetscParVector &E, bool print) +void RomOperator::SolveHDM(double omega, ComplexVector &e) { - // Compute HDM solution at the given frequency and add solution to the reduced-order - // basis, updating the PROM operators. Update P_S and P\P_S sets. - auto it = std::lower_bound(PmPs.begin(), PmPs.end(), omega); - MFEM_VERIFY(it != PmPs.end(), - "Sample frequency " << omega << " not found in parameter set!"); - PmPs.erase(it); - Ps.push_back(omega); - - // Set up HDM system and solve. The system matrix A = K + iω C - ω² M + A2(ω) is built - // by summing the underlying operator contributions (to save memory). + // Compute HDM solution at the given frequency. The system matrix, A = K + iω C - ω² M + + // A2(ω) is built by summing the underlying operator contributions. + A2 = spaceop.GetComplexExtraSystemMatrix(omega, Operator::DIAG_ZERO); + has_A2 = (A2 != nullptr); + auto A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * omega, + std::complex(-omega * omega, 0.0), K.get(), + C.get(), M.get(), A2.get()); + auto P = + spaceop.GetPreconditionerMatrix(1.0, omega, -omega * omega, omega); + ksp->SetOperators(*A, *P); + + // The HDM excitation vector is computed as RHS = iω RHS1 + RHS2(ω). + Mpi::Print("\n"); + if (has_RHS2) { - const auto step = std::lround((omega - omega_min) / delta_omega); - MFEM_VERIFY(step >= 0 && static_cast(step) < A2.size(), - "Invalid out-of-range frequency for PROM solution!"); - std::vector> P, AuxP; - A2[step] = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::EXTRA, omega, - mfem::Operator::DIAG_ZERO, print); - - //XX TODO FIX WITH SUM OPERATOR - - auto A = utils::GetSystemMatrixShell(omega, *K, *M, C.get(), A2[step].get()); - spaceop.GetPreconditionerMatrix(omega, P, AuxP, print); - pc0->SetOperator(P, &AuxP); - ksp0->SetOperator(*A); - - Mpi::Print("\n"); - spaceop.GetFreqDomainExcitationVector( - omega, *R0); // XX TODO ASSEMBLE PIECE WISE LIKE OPERATOR... - E.SetZero(); - ksp0->Mult(*R0, E); + has_RHS2 = spaceop.GetExcitationVector2(omega, r); } + else + { + r = 0.0; + } + if (RHS1.Size()) + { + r.Add(1i * omega, RHS1); + } + ksp->Mult(r, e); +} - double norm = E.Normlinf(), ntol = 1.0e-12; - mfem::Vector Er_(E.GetSize()), Ei_(E.GetSize()); - E.GetToVectors(Er_, Ei_); - bool has_real = (std::sqrt(mfem::InnerProduct(E.GetComm(), Er_, Er_)) > ntol * norm); - bool has_imag = (std::sqrt(mfem::InnerProduct(E.GetComm(), Ei_, Ei_)) > ntol * norm); +void RomOperator::AddHDMSample(double omega, ComplexVector &e) +{ + // Use the given HDM solution at the given frequency to update the reduced-order basis + // updating the PROM operators. + auto it = P_m_PS.lower_bound(omega); + MFEM_VERIFY(it != P_m_PS.end(), + "Sample frequency " << omega << " not found in parameter set!"); + P_m_PS.erase(it); + auto ret = PS.insert(omega); + MFEM_VERIFY(ret.second, "Sample frequency " + << omega << " already exists in the sampled parameter set!"); // Update V. The basis is always real (each complex solution adds two basis vectors if it // has a nonzero real and imaginary parts). - PetscInt nmax = V->GetGlobalNumCols(), dim0 = dim; - dim = (has_real) + (has_imag) + static_cast(dim0); - MFEM_VERIFY(dim <= nmax, "Unable to increase basis storage size, increase maximum number " - "of vectors!"); - bool mgs = false, cgs2 = true; - if (has_real && has_imag) + const double normr = linalg::Norml2(spaceop.GetComm(), e.Real()); + const double normi = linalg::Norml2(spaceop.GetComm(), e.Imag()); + const bool has_real = (normr > 1.0e-12 * std::sqrt(normr * normr + normi * normi)); + const bool has_imag = (normi > 1.0e-12 * std::sqrt(normr * normr + normi * normi)); + MFEM_VERIFY(dim_V + has_real + has_imag <= static_cast(V.size()), + "Unable to increase basis storage size, increase maximum number of vectors!"); + const int dim_V0 = dim_V; + std::vector H(dim_V + 1); + if (has_real) { + V[dim_V] = e.Real(); + if (orthog_mgs) { - petsc::PetscParVector v = V->GetColumn(dim - 2); - v.SetFromVector(Er_); - V->RestoreColumn(dim - 2, v); - if (opKM) - { - V->OrthonormalizeColumn(dim - 2, mgs, cgs2, *opKM, *T0); - } - else - { - V->OrthonormalizeColumn(dim - 2, mgs, cgs2); - } + linalg::OrthogonalizeColumnMGS(spaceop.GetComm(), V, V[dim_V], H.data(), dim_V); } + else { - petsc::PetscParVector v = V->GetColumn(dim - 1); - v.SetFromVector(Ei_); - V->RestoreColumn(dim - 1, v); - if (opKM) - { - V->OrthonormalizeColumn(dim - 1, mgs, cgs2, *opKM, *T0); - } - else - { - V->OrthonormalizeColumn(dim - 1, mgs, cgs2); - } + linalg::OrthogonalizeColumnCGS(spaceop.GetComm(), V, V[dim_V], H.data(), dim_V, true); } + V[dim_V] *= 1.0 / linalg::Norml2(spaceop.GetComm(), V[dim_V]); + dim_V++; } - else + if (has_imag) { + V[dim_V] = e.Imag(); + if (orthog_mgs) { - petsc::PetscParVector v = V->GetColumn(dim - 1); - v.Copy(E); - V->RestoreColumn(dim - 1, v); - if (opKM) - { - V->OrthonormalizeColumn(dim - 1, mgs, cgs2, *opKM, *T0); - } - else - { - V->OrthonormalizeColumn(dim - 1, mgs, cgs2); - } + linalg::OrthogonalizeColumnMGS(spaceop.GetComm(), V, V[dim_V], H.data(), dim_V); } + else + { + linalg::OrthogonalizeColumnCGS(spaceop.GetComm(), V, V[dim_V], H.data(), dim_V, true); + } + V[dim_V] *= 1.0 / linalg::Norml2(spaceop.GetComm(), V[dim_V]); + dim_V++; } // Update reduced-order operators. Resize preserves the upper dim0 x dim0 block of each // matrix and first dim0 entries of each vector and the projection uses the values // computed for the unchanged basis vectors. - bool init = (dim0 > 0); - Kr->Resize(dim, dim, init); - Mr->Resize(dim, dim, init); - BVMatProjectInternal(*V, *K, *Kr, *R0, dim0, dim); - BVMatProjectInternal(*V, *M, *Mr, *R0, dim0, dim); + Kr.conservativeResize(dim_V, dim_V); + ProjectMatInternal(spaceop.GetComm(), V, *K, Kr, r, dim_V0); if (C) { - Cr->Resize(dim, dim, init); - BVMatProjectInternal(*V, *C, *Cr, *R0, dim0, dim); + Cr.conservativeResize(dim_V, dim_V); + ProjectMatInternal(spaceop.GetComm(), V, *C, Cr, r, dim_V0); } - if (RHS1) + Mr.conservativeResize(dim_V, dim_V); + ProjectMatInternal(spaceop.GetComm(), V, *M, Mr, r, dim_V0); + Ar.resize(dim_V, dim_V); + if (RHS1.Size()) { - RHS1r->Resize(dim, init); - BVDotVecInternal(*V, *RHS1, *RHS1r, dim0, dim); + RHS1r.conservativeResize(dim_V); + ProjectVecInternal(spaceop.GetComm(), V, RHS1, RHS1r, dim_V0); } - Ar->Resize(dim, dim); - RHSr->Resize(dim); - Er->Resize(dim); - if (init) - { - ksp->Reset(); // Operator size change - } - ksp->SetOperator(*Ar); + RHSr.resize(dim_V); } void RomOperator::AssemblePROM(double omega) { - // Assemble the PROM linear system at the given frequency. Do some additional set up at - // the first solve call. The PROM system is defined by the matrix Aᵣ(ω) = Kᵣ + iω Cᵣ - // - ω² Mᵣ + Vᴴ A2ᵣ V(ω) and source vector RHSᵣ(ω) = iω RHS1ᵣ + Vᴴ RHS2ᵣ(ω) V. - const auto step = std::lround((omega - omega_min) / delta_omega); - MFEM_VERIFY(step >= 0 && static_cast(step) < A2.size(), - "Invalid out-of-range frequency for PROM solution!"); - - // Construct A2(ω) and RHS2(ω) if required (only nonzero on boundaries, will be empty - // if not needed). - if (init2) + // Assemble the PROM linear system at the given frequency. The PROM system is defined by + // the matrix Aᵣ(ω) = Kᵣ + iω Cᵣ - ω² Mᵣ + Vᴴ A2 V(ω) and source vector RHSᵣ(ω) = + // iω RHS1ᵣ + Vᴴ RHS2(ω). A2(ω) and RHS2(ω) are constructed only if required and are + // only nonzero on boundaries, will be empty if not needed. + if (has_A2) { - auto tA2 = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::EXTRA, omega, - mfem::Operator::DIAG_ZERO, false); - if (tA2) - { - hasA2 = true; - A2[step] = std::move(tA2); - } - auto tRHS2 = std::make_unique(*K); - if (spaceop.GetFreqDomainExcitationVector2(omega, *tRHS2)) - { - hasRHS2 = true; - RHS2[step] = std::move(tRHS2); - } - init2 = false; + A2 = spaceop.GetComplexExtraSystemMatrix(omega, Operator::DIAG_ZERO); + ProjectMatInternal(spaceop.GetComm(), V, *A2, Ar, r, 0); } - - // Set up PROM linear system. - Ar->Scale(0.0); - if (hasA2) + else { - if (!A2[step]) - { - // Debug - // Mpi::Print("Inserting cache value for omega = {:e}\n", omega); - A2[step] = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::EXTRA, omega, - mfem::Operator::DIAG_ZERO, false); - } - else - { - // Debug - // Mpi::Print("Found cache value for omega = {:e} (step = {:d})\n", omega, step); - } - BVMatProjectInternal(*V, *A2[step], *Ar, *R0, 0, dim); + Ar.setZero(); } - Ar->AXPY(1.0, *Kr, petsc::PetscParMatrix::NNZStructure::SAME); - Ar->AXPY(-omega * omega, *Mr, petsc::PetscParMatrix::NNZStructure::SAME); + Ar.noalias() += Kr; if (C) { - Ar->AXPY(1i * omega, *Cr, petsc::PetscParMatrix::NNZStructure::SAME); + Ar.noalias() += (1i * omega) * Cr; } + Ar.noalias() += (-omega * omega) * Mr; - RHSr->SetZero(); - if (hasRHS2) + if (has_RHS2) { - if (!RHS2[step]) - { - RHS2[step] = std::make_unique(*K); - spaceop.GetFreqDomainExcitationVector2(omega, *RHS2[step]); - } - BVDotVecInternal(*V, *RHS2[step], *RHSr, 0, dim); + spaceop.GetExcitationVector2(omega, RHS2); + ProjectVecInternal(spaceop.GetComm(), V, RHS2, RHSr, 0); + } + else + { + RHSr.setZero(); } - if (RHS1) + if (RHS1.Size()) { - RHSr->AXPY(1i * omega, *RHS1r); + RHSr.noalias() += (1i * omega) * RHS1r; } } -void RomOperator::SolvePROM(petsc::PetscParVector &E) +void RomOperator::SolvePROM(ComplexVector &e) { - // Compute PROM solution at the given frequency and expand into high- dimensional space. - // The PROM is solved on every process so the matrix- vector product for vector expansion - // is sequential. - ksp->Mult(*RHSr, *Er); + // Compute PROM solution at the given frequency and expand into high-dimensional space. + // The PROM is solved on every process so the matrix-vector product for vector expansion + // does not require communication. + RHSr = Ar.partialPivLu().solve(RHSr); + // RHSr = Ar.ldlt().solve(RHSr); + // RHSr = Ar.selfadjointView().ldlt().solve(RHSr); + + e = 0.0; + for (int j = 0; j < dim_V; j++) { - PetscScalar *pV = V->GetArray(), *pE = E.GetArray(); - petsc::PetscDenseMatrix locV(V->Height(), dim, pV); - petsc::PetscParVector locE(V->Height(), pE); - locV.Mult(*Er, locE); - V->RestoreArray(pV); - E.RestoreArray(pE); + e.Real().Add(RHSr(j).real(), V[j]); + e.Imag().Add(RHSr(j).imag(), V[j]); } } -double RomOperator::ComputeMaxError(int Nc, double &omega_star) +double RomOperator::ComputeError(double omega) { - // Greedy iteration: Find argmax_{ω ∈ P_C} η(E; ω). We sample Nc candidates from P \ P_S. - MPI_Comm comm = K->GetComm(); - Nc = std::min(Nc, static_cast(PmPs.size())); - std::vector Pc; - if (Mpi::Root(comm)) + // Compute the error metric associated with the approximate PROM solution at the given + // frequency. The HDM residual -r = [K + iω C - ω² M + A2(ω)] x - [iω RHS1 + RHS2(ω)] is + // computed using the most recently computed A2(ω) and RHS2(ω). + AssemblePROM(omega); + SolvePROM(w); + + // Residual error. + r = 0.0; + if (RHS1.Size()) + { + r.Add(-1i * omega, RHS1); + } + if (has_RHS2) + { + r.Add(-1.0, RHS2); + } + double den = !kspKM ? linalg::Norml2(spaceop.GetComm(), r) : 0.0; + + K->AddMult(w, r, 1.0); + if (C) + { + C->AddMult(w, r, 1i * omega); + } + M->AddMult(w, r, -omega * omega); + if (has_A2) + { + A2->AddMult(w, r, 1.0); + } + + double num; + if (!kspKM) + { + num = linalg::Norml2(spaceop.GetComm(), r); + } + else + { + z.SetSize(r.Size()); + kspKM->Mult(r, z); + auto dot = linalg::Dot(spaceop.GetComm(), z, r); + MFEM_ASSERT(dot.real() > 0.0 && std::abs(dot.imag()) < 1.0e-9 * dot.real(), + "Non-positive vector norm in normalization (dot = " << dot << ")!"); + num = std::sqrt(dot.real()); + den = linalg::Norml2(spaceop.GetComm(), w, kspKM->GetOperator(), z); + } + MFEM_VERIFY(den > 0.0, "Unexpected zero denominator in HDM residual!"); + return num / den; +} + +double RomOperator::ComputeMaxError(int num_cand, double &omega_star) +{ + // Greedy iteration: Find argmax_{ω ∈ P_C} η(e; ω). We sample num_cand candidates from + // P \ P_S. + num_cand = std::min(num_cand, static_cast(P_m_PS.size())); + std::vector PC; + if (Mpi::Root(spaceop.GetComm())) { // Sample with uniform probability. - Pc.reserve(Nc); - std::sample(PmPs.begin(), PmPs.end(), std::back_inserter(Pc), Nc, engine); + PC.reserve(num_cand); + std::sample(P_m_PS.begin(), P_m_PS.end(), std::back_inserter(PC), num_cand, engine); #if 0 // Sample with weighted probability by distance from the set of already sampled // points. - std::vector weights(PmPs.size()); + std::vector weights(P_m_PS.size()); weights = static_cast(weights.Size()); - Pc.reserve(Nc); - for (auto sample : Ps) + PC.reserve(num_cand); + for (auto sample : PS) { - int i = std::distance(PmPs.begin(), - std::lower_bound(PmPs.begin(), PmPs.end(), sample)); + int i = std::distance(P_m_PS.begin(), + std::lower_bound(P_m_PS.begin(), P_m_PS.end(), sample)); int il = i-1; while (il >= 0) { @@ -377,30 +396,30 @@ double RomOperator::ComputeMaxError(int Nc, double &omega_star) iu++; } } - for (int i = 0; i < Nc; i++) + for (int i = 0; i < num_cand; i++) { std::discrete_distribution dist(weights.begin(), weights.end()); int res = dist(engine); - Pc.push_back(PmPs[res]); + PC.push_back(P_m_PS[res]); weights[res] = 0.0; // No replacement } #endif } else { - Pc.resize(Nc); + PC.resize(num_cand); } - Mpi::Broadcast(Nc, Pc.data(), 0, comm); + Mpi::Broadcast(num_cand, PC.data(), 0, spaceop.GetComm()); // Debug // Mpi::Print("Candidate sampling:\n"); - // Mpi::Print(" P_S: {}", Ps); - // Mpi::Print(" P\\P_S: {}\n", PmPs); - // Mpi::Print(" P_C: {}\n", Pc); + // Mpi::Print(" P_S: {}", PS); + // Mpi::Print(" P\\P_S: {}\n", P_m_PS); + // Mpi::Print(" P_C: {}\n", PC); // For each candidate, compute the PROM solution and associated error metric. double err_max = 0.0; - for (auto omega : Pc) + for (auto omega : PC) { double err = ComputeError(omega); @@ -416,126 +435,4 @@ double RomOperator::ComputeMaxError(int Nc, double &omega_star) return err_max; } -double RomOperator::ComputeError(double omega) -{ - // Compute the error metric associated with the approximate PROM solution at the given - // frequency. The HDM residual R = [K + iω C - ω² M + A2(ω)] x - [iω RHS1 + RHS2(ω)] is - // computed using the most recently computed A2(ω) and RHS2(ω). - AssemblePROM(omega); - SolvePROM(*E0); - - // Residual error. - const auto step = std::lround((omega - omega_min) / delta_omega); - MFEM_VERIFY(step >= 0 && static_cast(step) < A2.size(), - "Invalid out-of-range frequency for PROM solution!"); - double num, den = 1.0; - R0->SetZero(); - if (RHS1) - { - R0->AXPY(-1i * omega, *RHS1); - } - if (hasRHS2) - { - MFEM_VERIFY(RHS2[step], "Unexpected uncached frequency for RHS2 vector in PROM!"); - R0->AXPY(-1.0, *RHS2[step]); - } - if (!kspKM) - { - den = R0->Norml2(); - } - - K->MultAdd(*E0, *R0); - M->Mult(*E0, *T0); - R0->AXPY(-omega * omega, *T0); - if (C) - { - C->Mult(*E0, *T0); - R0->AXPY(1i * omega, *T0); - } - if (hasA2) - { - MFEM_VERIFY(A2[step], "Unexpected uncached frequency for A2 matrix in PROM!"); - A2[step]->MultAdd(*E0, *R0); - } - if (!kspKM) - { - num = R0->Norml2(); - } - else - { - kspKM->Mult(*R0, *T0); - num = std::sqrt(std::real(R0->Dot(*T0))); - opKM->Mult(*E0, *T0); - den = std::sqrt(std::real(E0->Dot(*T0))); - } - MFEM_VERIFY(den > 0.0, "Unexpected zero denominator in HDM residual!"); - return num / den; -} - -void RomOperator::BVMatProjectInternal(petsc::PetscDenseMatrix &V, petsc::PetscParMatrix &A, - petsc::PetscDenseMatrix &Ar, - petsc::PetscParVector &r, int n0, int n) -{ - // Update Ar = Vᴴ A V for the new basis dimension n0 => n. We assume V is real and thus - // the result is complex symmetric if A is symmetric. Ar is replicated across all - // processes (sequential n x n matrix). - MFEM_VERIFY(n0 < n, "Unexpected dimensions in BVMatProjectInternal!"); - MFEM_VERIFY(A.GetSymmetric() && Ar.GetSymmetric(), - "BVMatProjectInternal is specialized for symmetric matrices!"); - mfem::Vector vr(V.Height()); - for (int j = n0; j < n; j++) - { - // Fill block of Vᴴ A V = [ | Vᴴ A vj ] . We optimize matrix-vector product since we - // know columns of V are real. - { - petsc::PetscParVector v = V.GetColumn(j); - v.GetToVector(vr); - A.Mult(vr, r); - // A.Mult(v, r); - V.RestoreColumn(j, v); - } - { - PetscScalar *pV = V.GetArray(), *pr = r.GetArray(), *pAr = Ar.GetArray(); - petsc::PetscDenseMatrix locV(V.Height(), n, pV); - petsc::PetscParVector locr(V.Height(), pr), arn(n, pAr + j * n); - locV.MultTranspose(locr, arn); // Vᴴ = Vᵀ - V.RestoreArray(pV); - r.RestoreArray(pr); - Ar.RestoreArray(pAr); - } - } - // Fill lower block of Vᴴ A V = [ ____________ | ] - // [ vjᴴ A V[1:n0] | ] . - { - PetscScalar *pAr = Ar.GetArray(); - Mpi::GlobalSum((n - n0) * n, pAr + n0 * n, V.GetComm()); - for (int j = 0; j < n0; j++) - { - for (int i = n0; i < n; i++) - { - pAr[i + j * n] = pAr[j + i * n]; - } - } - Ar.RestoreArray(pAr); - } -} - -void RomOperator::BVDotVecInternal(petsc::PetscDenseMatrix &V, petsc::PetscParVector &b, - petsc::PetscParVector &br, int n0, int n) -{ - // Update br = Vᴴ b for the new basis dimension n0 => n. br is replicated across all - // processes (sequential n-dimensional vector). - MFEM_VERIFY(n0 < n, "Unexpected dimensions in BVDotVecInternal!"); - PetscScalar *pV = V.GetArray(), *pb = b.GetArray(), *pbr = br.GetArray(); - petsc::PetscDenseMatrix locV(V.Height(), n - n0, pV + n0 * V.Height()); - petsc::PetscParVector locb(V.Height(), pb), brn(n - n0, pbr + n0); - locV.MultTranspose(locb, brn); // Vᴴ = Vᵀ - V.RestoreArray(pV); - b.RestoreArray(pb); - Mpi::GlobalSum(n - n0, pbr + n0, V.GetComm()); - br.RestoreArray(pbr); -} - } // namespace palace - -#endif diff --git a/palace/models/romoperator.hpp b/palace/models/romoperator.hpp index c772e5861..4a230ebc6 100644 --- a/palace/models/romoperator.hpp +++ b/palace/models/romoperator.hpp @@ -4,15 +4,20 @@ #ifndef PALACE_MODELS_ROM_OPERATOR_HPP #define PALACE_MODELS_ROM_OPERATOR_HPP -#if 0 // XX TODO DISABLE ROM FOR NOW - #include #include +#include #include -#include +#include #include "linalg/curlcurl.hpp" #include "linalg/ksp.hpp" -#include "linalg/petsc.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +// XX TODO NOTES +// - Precompute A2, RHS2 for all frequencies? This seems very dumb and especially risky +// for fine resolution sweeps, so for now remove A2, RHS2 storage (and omega = omega_0 + +// delta_omega * step) namespace palace { @@ -21,7 +26,8 @@ class IoData; class SpaceOperator; // -// A class handling PROM construction and use for adaptive fast frequency sweeps. +// A class handling projection-based reduced order model (PROM) construction and use for +// adaptive fast frequency sweeps. // class RomOperator { @@ -30,82 +36,68 @@ class RomOperator SpaceOperator &spaceop; // HDM system matrices and excitation RHS. - std::unique_ptr K, M, C; - std::unique_ptr RHS1; - - // HDM storage for terms with non-polynomial frequency dependence. - std::vector> A2; - std::vector> RHS2; - bool init2, hasA2, hasRHS2; - - // HDM linear system solver and preconditioner. - std::unique_ptr ksp0; - std::unique_ptr pc0; + std::unique_ptr K, M, C, A2; + ComplexVector RHS1, RHS2; + bool has_A2, has_RHS2; // Working storage for HDM vectors. - std::unique_ptr E0, R0, T0; + ComplexVector r, w, z; - // PROM matrices, vectors, and linear solver. - std::unique_ptr Kr, Mr, Cr, Ar; - std::unique_ptr RHS1r, RHSr, Er; - std::unique_ptr ksp; + // HDM linear system solver and preconditioner. + std::unique_ptr ksp; // Linear solver for inner product solves for error metric. std::unique_ptr kspKM; - std::unique_ptr opKM; - // PROM reduced-order basis and parameter domain samplings. - int dim; - std::unique_ptr V; - std::vector Ps, PmPs; - double omega_min, delta_omega; - std::default_random_engine engine; + // PROM matrices and vectors. + Eigen::MatrixXcd Kr, Mr, Cr, Ar; + Eigen::VectorXcd RHS1r, RHSr; - // Compute the error metric for the PROM solution (computed internally) at the specified - // frequency. - double ComputeError(double omega); + // PROM reduced-order basis (real-valued) and active dimension. + std::vector V; + int dim_V; + bool orthog_mgs; - // Helper functions for reduced-order matrix or vector construction/update. - void BVMatProjectInternal(petsc::PetscDenseMatrix &V, petsc::PetscParMatrix &A, - petsc::PetscDenseMatrix &Ar, petsc::PetscParVector &r, int n0, - int n); - void BVDotVecInternal(petsc::PetscDenseMatrix &V, petsc::PetscParVector &b, - petsc::PetscParVector &br, int n0, int n); + // Data structures for parameter domain sampling. + std::set PS, P_m_PS; + std::default_random_engine engine; public: - RomOperator(const IoData &iodata, SpaceOperator &sp, int nmax); + RomOperator(const IoData &iodata, SpaceOperator &sp); - // Return set of sampled parameter points for basis construction. - const std::vector &GetSampleFrequencies() const { return Ps; } + // Return the HDM linear solver. + const ComplexKspSolver &GetLinearSolver() const { return *ksp; } // Return PROM dimension. - int GetReducedDimension() const { return dim; } + int GetReducedDimension() const { return dim_V; } + + // Return set of sampled parameter points for basis construction. + const std::set &GetSampleFrequencies() const { return PS; } - // Return number of HDM linear solves and linear solver iterations performed during - // offline training. - int GetTotalKspMult() const { return ksp0->GetTotalNumMult(); } - int GetTotalKspIter() const { return ksp0->GetTotalNumIter(); } + // Initialize the parameter domain P = {ω_L, ω_L + δ, ..., ω_R}. Also sets the maximum + // number of sample points for the PROM construction. + void Initialize(double start, double delta, int num_steps, int max_dim); - // Initialize the solution basis with HDM samples at the minimum and maximum frequencies. - void Initialize(int steps, double start, double delta); + // Assemble and solve the HDM at the specified frequency. + void SolveHDM(double omega, ComplexVector &e); - // Assemble and solve the HDM at the specified frequency, adding the solution vector to - // the reduced-order basis. - void SolveHDM(double omega, petsc::PetscParVector &E, bool print = false); + // Add the solution vector to the reduced-order basis and update the PROM. + void AddHDMSample(double omega, ComplexVector &e); // Assemble and solve the PROM at the specified frequency, expanding the solution back // into the high-dimensional solution space. void AssemblePROM(double omega); - void SolvePROM(petsc::PetscParVector &E); + void SolvePROM(ComplexVector &e); + + // Compute the error metric for the PROM at the specified frequency. + double ComputeError(double omega); // Compute the maximum error over a randomly sampled set of candidate points. Returns the // maximum error and its correcponding frequency, as well as the number of candidate // points used (if fewer than those availble in the unsampled parameter domain). - double ComputeMaxError(int Nc, double &omega_star); + double ComputeMaxError(int num_cand, double &omega_star); }; } // namespace palace -#endif - #endif // PALACE_MODELS_ROM_OPERATOR_HPP diff --git a/palace/utils/prettyprint.hpp b/palace/utils/prettyprint.hpp index 21335cabb..2b049a21e 100644 --- a/palace/utils/prettyprint.hpp +++ b/palace/utils/prettyprint.hpp @@ -5,6 +5,7 @@ #define PALACE_UTILS_PRETTY_PRINT_HPP #include +#include #include #include #include "utils/communication.hpp" @@ -22,15 +23,15 @@ namespace internal constexpr std::size_t max_width = 60; template -inline std::size_t GetSize(const mfem::Array &v) +inline std::size_t GetSize(const T &v) { - return v.Size(); + return v.size(); } template -inline std::size_t GetSize(const std::vector &v) +inline std::size_t GetSize(const mfem::Array &v) { - return v.size(); + return v.Size(); } inline std::size_t PrePrint(MPI_Comm comm, std::size_t w, std::size_t wv, std::size_t lead) @@ -57,10 +58,13 @@ inline std::size_t PrePrint(MPI_Comm comm, std::size_t w, std::size_t wv, std::s // Fixed column width wrapped printing with range notation for the contents of a marker // array. -template -inline void PrettyPrintMarker(const T &data, const std::string &prefix = "", +template