diff --git a/.gitignore b/.gitignore index fcbd0acbb9..8b385cdeb9 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ docs/src/tutorials/*/ docs/src/tutorials/*.md docs/.CondaPkg docs/src/tutorials/Optimize!_files +docs/src/tutorials/*.html diff --git a/Changelog.md b/Changelog.md index cd48273e64..54f1eace16 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,7 +5,20 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.4.x] - dd/mm/2023 +## [0.4.32] - 23/08/2023 + +### Added + +* The adaptive regularization with cubics (ARC) solver. + +## [0.4.31] - 14/08/2023 + +### Added + +* A `:Subsolver` keyword in the `debug=` keyword argument, that activates the new `DebugWhenActive`` + to de/activate subsolver debug from the main solvers `DebugEvery`. + +## [0.4.30] - 03/08/2023 ### Changed diff --git a/Project.toml b/Project.toml index 1b2931b700..84fa30c532 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.31" +version = "0.4.32" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" @@ -13,6 +13,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" ManifoldDiff = "af67fdf4-a580-4b9f-bbec-742ef357defd" ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +PolynomialRoots = "3a141323-8675-5d76-9d11-e1df1406c778" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" @@ -41,6 +42,7 @@ LRUCache = "1.4" ManifoldDiff = "0.2, 0.3.3" Manifolds = "0.8.69" ManifoldsBase = "0.14.4" +PolynomialRoots = "1" Requires = "0.5, 1" julia = "1.6" diff --git a/docs/make.jl b/docs/make.jl index 27344e21e8..dccda1527b 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -62,6 +62,7 @@ makedocs( mathengine=MathJax3(), prettyurls=get(ENV, "CI", nothing) == "true" ), modules=[Manopt], + authors="Ronny Bergmann and contributors.", sitename="Manopt.jl", strict=[ :doctest, @@ -94,6 +95,7 @@ makedocs( ], "Solvers" => [ "Introduction" => "solvers/index.md", + "Adaptive Regularization with Cubics" => "solvers/adaptive-regularization-with-cubics.md", "Alternating Gradient Descent" => "solvers/alternating_gradient_descent.md", "Augmented Lagrangian Method" => "solvers/augmented_Lagrangian_method.md", "Chambolle-Pock" => "solvers/ChambollePock.md", diff --git a/docs/src/about.md b/docs/src/about.md index aacede1aaa..e38f780134 100644 --- a/docs/src/about.md +++ b/docs/src/about.md @@ -7,7 +7,8 @@ The following people contributed * [Constantin Ahlmann-Eltze](https://const-ae.name) implemented the [gradient and differential check functions](helpers/checks.md) * [Renée Dornig](https://github.com/r-dornig) implemented the [particle swarm](@ref ParticleSwarmSolver), the [Riemannian Augmented Lagrangian Method](@ref AugmentedLagrangianSolver), the [Exact Penalty Method](@ref ExactPenaltySolver), as well as the [`NonmonotoneLinesearch`](@ref) * [Willem Diepeveen](https://www.maths.cam.ac.uk/person/wd292) implemented the [primal-dual Riemannian semismooth Newton](@ref PDRSSNSolver) solver. -* Even Stephansen Kjemsås contributed to the implementation of the [Frank Wolfe Method](@ref FrankWolfe) +* Even Stephansen Kjemsås contributed to the implementation of the [Frank Wolfe Method](@ref FrankWolfe) solver +* Mathias Ravn Munkvold contributed most of the implementation of the [Adaptive Regularization with Cubics](@ref ARSSection) solver * [Tom-Christian Riemer](https://www.tu-chemnitz.de/mathematik/wire/mitarbeiter.php) Riemer implemented the [trust regions](@ref trust_regions) and [quasi Newton](solvers/quasi_Newton.md) solvers. * [Manuel Weiss](https://scoop.iwr.uni-heidelberg.de/author/manuel-weiß/) implemented most of the [conjugate gradient update rules](@ref cg-coeffs) diff --git a/docs/src/references.bib b/docs/src/references.bib index b8cb9d265c..5f7655f7ca 100644 --- a/docs/src/references.bib +++ b/docs/src/references.bib @@ -25,6 +25,7 @@ @article{AbsilBakerGallivan:2006 TITLE = {Trust-Region Methods on Riemannian Manifolds}, JOURNAL = {Foundations of Computational Mathematics} } + @article{AdachiOkunoTakeda:2022, AUTHOR = {Adachi, S. and Okuno, T., and Takeda, A.}, JOURNAL = {ArXiv Preprint}, @@ -34,6 +35,15 @@ @article{AdachiOkunoTakeda:2022 YEAR = {2022}, } +@article{AgarwalBoumalBullinsCartis:2020, + AUTHOR = {Agarwal, N. and Boumal, N. and Bullins, B. and Cartis, C.}, + TITLE = {Adaptive regularization with cubics on manifolds}, + JOURNAL = {Mathematical Programming}, + PUBLISHER = {Springer Science and Business Media LLC}, + YEAR = {2020}, + DOI = {10.1007/s10107-020-01505-1} +} + @article{AlmeidaNetoOliveiraSouza:2020, AUTHOR = {Yldenilson Torres Almeida and João Xavier da Cruz Neto and Paulo Roberto Oliveira and João Carlos de Oliveira Souza}, DOI = {10.1007/s10589-020-00173-3}, diff --git a/docs/src/solvers/adaptive-regularization-with-cubics.md b/docs/src/solvers/adaptive-regularization-with-cubics.md new file mode 100644 index 0000000000..f47bf034be --- /dev/null +++ b/docs/src/solvers/adaptive-regularization-with-cubics.md @@ -0,0 +1,64 @@ +# [Adaptive regularization with Cubics](@id ARSSection) + + + +```@meta +CurrentModule = Manopt +``` + +```@docs +adaptive_regularization_with_cubics +adaptive_regularization_with_cubics! +``` + +## State + +```@docs +AdaptiveRegularizationState +``` + +## Sub solvers + +There are several ways to approach the subsolver. The default is the first one. + +## Lanczos Iteration + +```@docs +Manopt.LanczosState +``` + +## (Conjugate) Gradient Descent + +There are two generic functors, that implement the sub problem + +```@docs +AdaptiveRegularizationCubicCost +AdaptiveRegularizationCubicGrad +``` + +Since the sub problem is given on the tangent space, you have to provide + +``` +g = AdaptiveRegularizationCubicCost(M, mho, σ) +grad_g = AdaptiveRegularizationCubicGrad(M, mho, σ) +sub_problem = DefaultProblem(TangentSpaceAt(M,p), ManifoldGradienObjective(g, grad_g)) +``` + +where `mho` is the hessian objective of `f` to solve. +Then use this for the `sub_problem` keyword +and use your favourite gradient based solver for the `sub_state` keyword, for example a +[`ConjugateGradientDescentState`](@ref) + +## Additional Stopping Criteria + +```@docs +StopWhenAllLanczosVectorsUsed +StopWhenFirstOrderProgress +``` + +## Literature + +```@bibliography +Pages = ["solvers/adaptive-regularization-with-cubics.md"] +Canonical=false +``` \ No newline at end of file diff --git a/docs/src/solvers/trust_regions.md b/docs/src/solvers/trust_regions.md index 70ad1f67ed..2e0f4c9c27 100644 --- a/docs/src/solvers/trust_regions.md +++ b/docs/src/solvers/trust_regions.md @@ -156,6 +156,8 @@ as well as their (non-exported) common supertype Manopt.AbstractApproxHessian ``` +## Literature + ```@bibliography Pages = ["solvers/trust_regions.md"] Canonical=false diff --git a/ext/ManoptManifoldsExt/ARC_CG.jl b/ext/ManoptManifoldsExt/ARC_CG.jl new file mode 100644 index 0000000000..d3f42dd395 --- /dev/null +++ b/ext/ManoptManifoldsExt/ARC_CG.jl @@ -0,0 +1,46 @@ +function set_manopt_parameter!(M::TangentSpaceAtPoint, ::Val{:p}, v) + M.point .= v + return M +end +function (f::Manopt.AdaptiveRegularizationCubicCost)(M::TangentSpaceAtPoint, X) + ## (33) in Agarwal et al. + return get_cost(base_manifold(M), f.mho, M.point) + + inner(base_manifold(M), M.point, X, f.X) + + 1 / 2 * inner( + base_manifold(M), + M.point, + X, + get_hessian(base_manifold(M), f.mho, M.point, X), + ) + + f.σ / 3 * norm(base_manifold(M), M.point, X)^3 +end +function (grad_f::Manopt.AdaptiveRegularizationCubicGrad)(M::TangentSpaceAtPoint, X) + # (37) in Agarwal et + return grad_f.X + + get_hessian(base_manifold(M), grad_f.mho, M.point, X) + + grad_f.σ * norm(base_manifold(M), M.point, X) * X +end +function (grad_f::Manopt.AdaptiveRegularizationCubicGrad)(M::TangentSpaceAtPoint, Y, X) + get_hessian!(base_manifold(M), Y, grad_f.mho, M.point, X) + Y .= Y + grad_f.X + grad_f.σ * norm(base_manifold(M), M.point, X) * X + return Y +end +function (c::StopWhenFirstOrderProgress)( + dmp::AbstractManoptProblem{<:TangentSpaceAtPoint}, + ams::AbstractManoptSolverState, + i::Int, +) + if (i == 0) + c.reason = "" + return false + end + #Update Gradient + TpM = get_manifold(dmp) + nG = norm(base_manifold(TpM), TpM.point, get_gradient(dmp, ams.p)) + nX = norm(base_manifold(TpM), TpM.point, ams.p) + if (i > 0) && (nG <= c.θ * nX^2) + c.reason = "The algorithm has reduced the model grad norm by $(c.θ).\n" + return true + end + return false +end diff --git a/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl b/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl index 10d4a816d3..fcbcfe3a4d 100644 --- a/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl +++ b/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl @@ -9,7 +9,8 @@ import Manopt: alternating_gradient_descent, alternating_gradient_descent!, get_gradient, - get_gradient! + get_gradient!, + set_manopt_parameter! using LinearAlgebra: cholesky, det, diag, dot, Hermitian, qr, Symmetric, triu, I, Diagonal import ManifoldsBase: copy, mid_point, mid_point! @@ -29,5 +30,5 @@ include("nonmutating_manifolds_functions.jl") include("artificialDataFunctionsManifolds.jl") include("ChambollePockManifolds.jl") include("alternating_gradient.jl") - +include("ARC_CG.jl") end diff --git a/src/Manopt.jl b/src/Manopt.jl index 518f2c8927..828c235e40 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -15,7 +15,8 @@ using ColorTypes using Colors using DataStructures: CircularBuffer, capacity, length, push!, size using Dates: Millisecond, Nanosecond, Period, canonicalize, value -using LinearAlgebra: Diagonal, I, eigen, eigvals, tril, Symmetric, dot, cholesky +using LinearAlgebra: + Diagonal, I, eigen, eigvals, tril, Symmetric, dot, cholesky, eigmin, opnorm using ManifoldDiff: adjoint_Jacobi_field, adjoint_Jacobi_field!, @@ -70,6 +71,7 @@ using ManifoldsBase: allocate, allocate_result, allocate_result_type, + base_manifold, copy, copyto!, default_inverse_retraction_method, @@ -105,6 +107,7 @@ using ManifoldsBase: power_dimensions, project, project!, + rand!, representation_size, requires_caching, retract, @@ -138,6 +141,7 @@ include("functions/manifold_functions.jl") # solvers general framework include("solvers/solver.jl") # specific solvers +include("solvers/adaptive_regularization_with_cubics.jl") include("solvers/alternating_gradient_descent.jl") include("solvers/augmented_Lagrangian_method.jl") include("solvers/ChambollePock.jl") @@ -227,6 +231,7 @@ export AbstractGradientSolverState, AbstractHessianSolverState, AbstractManoptSolverState, AbstractPrimalDualSolverState, + AdaptiveRegularizationState, AlternatingGradientDescentState, AugmentedLagrangianMethodState, ChambollePockState, @@ -238,6 +243,7 @@ export AbstractGradientSolverState, ExactPenaltyMethodState, FrankWolfeState, GradientDescentState, + LanczosState, LevenbergMarquardtState, NelderMeadState, ParticleSwarmState, @@ -314,8 +320,7 @@ export QuasiNewtonCautiousDirectionUpdate, BFGS, InverseBFGS, DFP, InverseDFP, SR1, InverseSR1 export InverseBroyden, Broyden export AbstractQuasiNewtonDirectionUpdate, AbstractQuasiNewtonUpdateRule -export WolfePowellLinesearch, - operator_to_matrix, square_matrix_vector_product, WolfePowellBinaryLinesearch +export WolfePowellLinesearch, WolfePowellBinaryLinesearch export AbstractStateAction, StoreStateAction export has_storage, get_storage, update_storage! export objective_cache_factory @@ -335,7 +340,9 @@ export DirectionUpdateRule, ConjugateGradientBealeRestart # # Solvers -export alternating_gradient_descent, +export adaptive_regularization_with_cubics, + adaptive_regularization_with_cubics!, + alternating_gradient_descent, alternating_gradient_descent!, augmented_Lagrangian_method, augmented_Lagrangian_method!, @@ -381,6 +388,7 @@ export solve! export ApproxHessianFiniteDifference, ApproxHessianSymmetricRankOne, ApproxHessianBFGS export update_hessian!, update_hessian_basis! export ExactPenaltyCost, ExactPenaltyGrad, AugmentedLagrangianCost, AugmentedLagrangianGrad +export AdaptiveRegularizationCubicCost, AdaptiveRegularizationCubicGrad # # Stepsize export Stepsize @@ -395,12 +403,14 @@ export StopAfter, StopAfterIteration, StopWhenResidualIsReducedByFactorOrPower, StopWhenAll, + StopWhenAllLanczosVectorsUsed, StopWhenAny, StopWhenChangeLess, StopWhenCostLess, StopWhenCurvatureIsNegative, StopWhenGradientChangeLess, StopWhenGradientNormLess, + StopWhenFirstOrderProgress, StopWhenModelIncreased, StopWhenPopulationConcentrated, StopWhenSmallerOrEqual, @@ -480,7 +490,7 @@ export DebugDualBaseChange, DebugDualBaseIterate, DebugDualChange, DebugDualIter export DebugDualResidual, DebugPrimalDualResidual, DebugPrimalResidual export DebugProximalParameter, DebugWarnIfCostIncreases export DebugGradient, DebugGradientNorm, DebugStepsize -export DebugWhenActive +export DebugWhenActive, DebugWarnIfFieldNotFinite, DebugIfEntry export DebugWarnIfCostNotFinite, DebugWarnIfFieldNotFinite, DebugMessages # # Records - and access functions diff --git a/src/plans/adabtive_regularization_with_cubics_plan.jl b/src/plans/adabtive_regularization_with_cubics_plan.jl new file mode 100644 index 0000000000..7dda3f8d7e --- /dev/null +++ b/src/plans/adabtive_regularization_with_cubics_plan.jl @@ -0,0 +1,99 @@ +@doc raw""" + AdaptiveRegularizationCubicCost + +We define the model ``m(X)`` in the tangent space of the current iterate ``p=p_k`` as + +```math + m(X) = f(p) + + + \frac{1}{2} + \frac{σ}{3} \lVert X \rVert^3 +``` + +# Fields +* `mho` – an [`AbstractManifoldObjective`](@ref) that should provide at least [`get_cost`](@ref), [`get_gradient`](@ref) and [`get_hessian`](@ref). +* `σ` – the current regularization parameter +* `X` – a storage for the gradient at `p` of the original cost + +# Constructors + + AdaptiveRegularizationCubicCost(mho, σ, X) + AdaptiveRegularizationCubicCost(M, mho, σ; p=rand(M), X=get_gradient(M, mho, p)) + +Initialize the cubic cost to the objective `mho`, regularization parameter `σ`, and +(temporary) gradient `X`. + +!!! note + For this gradient function to work, we require the [`TangentSpaceAtPoint`](https://juliamanifolds.github.io/Manifolds.jl/latest/manifolds/vector_bundle.html#Manifolds.TangentSpaceAtPoint) + from `Manifolds.jl` +""" +mutable struct AdaptiveRegularizationCubicCost{T,R,O<:AbstractManifoldObjective} + mho::O + σ::R + X::T +end +function AdaptiveRegularizationCubicCost( + M, mho::O, σ::R=1.0; p::P=rand(M), X::T=get_gradient(M, mho, p) +) where {P,T,R,O} + return AdaptiveRegularizationCubicCost{T,R,O}(mho, σ, X) +end +function set_manopt_parameter!(f::AdaptiveRegularizationCubicCost, ::Val{:X}, X) + f.X = X + return f +end +function set_manopt_parameter!(f::AdaptiveRegularizationCubicCost, ::Val{:σ}, σ) + f.σ = σ + return f +end + +@doc raw""" + AdaptiveRegularizationCubicGrad + +We define the model ``m(X)`` in the tangent space of the current iterate ``p=p_k`` as + +```math + m(X) = f(p) + + + \frac{1}{2} + \frac{σ}{3} \lVert X \rVert^3 +``` + +This struct represents its gradient, given by + +```math + \operatorname{grad} m(X) = \operatorname{grad}f(p) + \operatorname{Hess} f(p)[X] + σ \lVert X \rVert X +``` + +# Fields + +* `mho` – an [`AbstractManifoldObjective`](@ref) that should provide at least [`get_cost`](@ref), [`get_gradient`](@ref) and [`get_hessian`](@ref). +* `σ` – the current regularization parameter +* `X` – a storage for the gradient at `p` of the original cost + +# Constructors + + AdaptiveRegularizationCubicGrad(mho, σ, X) + AdaptiveRegularizationCubicGrad(M, mho, σ; p=rand(M), X=get_gradient(M, mho, p)) + +Initialize the cubic cost to the original objective `mho`, regularization parameter `σ`, and +(temporary) gradient `X`. + +!!! note + * For this gradient function to work, we require the [`TangentSpaceAtPoint`](https://juliamanifolds.github.io/Manifolds.jl/latest/manifolds/vector_bundle.html#Manifolds.TangentSpaceAtPoint) + from `Manifolds.jl` + * The gradient functor provides both an allocating as well as an in-place variant. +""" +mutable struct AdaptiveRegularizationCubicGrad{T,R,O<:AbstractManifoldObjective} + mho::O + σ::R + X::T +end +function AdaptiveRegularizationCubicGrad( + M, mho::O, σ::R=1.0; p::P=rand(M), X::T=get_gradient(M, mho, p) +) where {P,T,R,O} + return AdaptiveRegularizationCubicGrad{T,R,O}(mho, σ, X) +end +function set_manopt_parameter!(f::AdaptiveRegularizationCubicGrad, ::Val{:X}, X) + f.X = X + return f +end +function set_manopt_parameter!(f::AdaptiveRegularizationCubicGrad, ::Val{:σ}, σ) + f.σ = σ + return f +end diff --git a/src/plans/conjugate_gradient_plan.jl b/src/plans/conjugate_gradient_plan.jl index d2529f2003..54380a5457 100644 --- a/src/plans/conjugate_gradient_plan.jl +++ b/src/plans/conjugate_gradient_plan.jl @@ -26,14 +26,24 @@ specify options for a conjugate gradient descent algorithm, that solves a [`DefaultManoptProblem`]. # Fields -* `p` – the current iterate, a point on a manifold -* `X` – the current gradient, also denoted as ``ξ`` or ``X_k`` for the gradient in the ``k``th step. -* `δ` – the current descent direction, i.e. also tangent vector -* `β` – the current update coefficient rule, see . -* `coefficient` – a [`DirectionUpdateRule`](@ref) function to determine the new `β` -* `stepsize` – a [`Stepsize`](@ref) function -* `stop` – a [`StoppingCriterion`](@ref) -* `retraction_method` – (`default_retraction_method(M, typeof(p))`) a type of retraction +* `p` – the current iterate, a point on a manifold +* `X` – the current gradient, also denoted as ``ξ`` or ``X_k`` for the gradient in the ``k``th step. +* `δ` – the current descent direction, i.e. also tangent vector +* `β` – the current update coefficient rule, see . +* `coefficient` – ([`ConjugateDescentCoefficient`](@ref)`()`) a [`DirectionUpdateRule`](@ref) function to determine the new `β` +* `stepsize` – ([`default_stepsize`](@ref)`(M, ConjugateGradientDescentState; retraction_method=retraction_method)`) a [`Stepsize`](@ref) function +* `stop` – ([`StopAfterIteration`](@ref)`(500) | `[`StopWhenGradientNormLess`](@ref)`(1e-8)`) a [`StoppingCriterion`](@ref) +* `retraction_method` – (`default_retraction_method(M, typeof(p))`) a type of retraction +* `vector_transport_method` – (`default_retraction_method(M, typeof(p))`) a type of retraction + +# Constructor + + ConjugateGradientState(M, p) + +where the last five fields above can be set by their names as keyword and the +`X` can be set to a tangent vector type using the keyword `initial_gradient` which defaults to `zero_vector(M,p)`, +and `δ` is initialized to a copy of this vector. + # See also @@ -85,17 +95,48 @@ mutable struct ConjugateGradientDescentState{ return cgs end end -function ConjugateGradientDescentState( +@deprecate ConjugateGradientDescentState( M::AbstractManifold, - p::P, + p, sC::StoppingCriterion, s::Stepsize, dU::DirectionUpdateRule, retr::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), vtr::AbstractVectorTransportMethod=default_vector_transport_method(M, typeof(p)), + initial_gradient=zero_vector(M, p), +) ConjugateGradientDescentState( + M, + p; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtr, + initial_gradient=initial_gradient, +) +function ConjugateGradientDescentState( + M::AbstractManifold, + p::P; + coefficient::DirectionUpdateRule=ConjugateDescentCoefficient(), + retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), + stepsize::Stepsize=default_stepsize( + M, ConjugateGradientDescentState; retraction_method=retraction_method + ), + stopping_criterion::StoppingCriterion=StopAfterIteration(500) | + StopWhenGradientNormLess(1e-8), + vector_transport_method=default_vector_transport_method(M, typeof(p)), initial_gradient::T=zero_vector(M, p), ) where {P,T} - return ConjugateGradientDescentState{P,T}(M, p, sC, s, dU, retr, vtr, initial_gradient) + return ConjugateGradientDescentState{P,T}( + M, + p, + stopping_criterion, + stepsize, + coefficient, + retraction_method, + vector_transport_method, + initial_gradient, + ) end function get_message(cgs::ConjugateGradientDescentState) diff --git a/src/plans/count.jl b/src/plans/count.jl index 08198c2d4e..3e4adbfaeb 100644 --- a/src/plans/count.jl +++ b/src/plans/count.jl @@ -411,7 +411,7 @@ function status_summary(co::ManifoldCountObjective) longest_key_length = max(length.(["$c" for c in keys(co.counts)])...) s = "## Statistics on function calls\n" count_strings = [ - " * :$(rpad("$(c[1])",longest_key_length)) : $(c[2])" for c in co.counts + " * :$(rpad("$(c[1])",longest_key_length)) : $(c[2])" for c in co.counts ] s2 = status_summary(co.objective) (length(s2) > 0) && (s2 = "\n$(s2)") diff --git a/src/plans/debug.jl b/src/plans/debug.jl index ae80800bca..b1903ad3e6 100644 --- a/src/plans/debug.jl +++ b/src/plans/debug.jl @@ -332,7 +332,7 @@ specified how to print the entry. # Constructor - DebugEntry(f[, prefix="$f:", format = "$prefix %s", io=stdout]) + DebugEntry(f; prefix="$f:", format = "$prefix %s", io=stdout) """ mutable struct DebugEntry <: DebugAction @@ -351,6 +351,50 @@ function show(io::IO, di::DebugEntry) return print(io, "DebugEntry(:$(di.field); format=\"$(di.format)\")") end +@doc raw""" + DebugIfEntry <: DebugAction + +Issue a warning, info or error if a certain field does _not_ pass a check + +# Fields + +* `io` – an io stream +* `check` – a function that takes the value of the `field` as input and returns a boolean +* `field` – Symbol the entry can be accessed with within [`AbstractManoptSolverState`](@ref) +* `msg` - is the check fails, this message is displayed +* `type` – Symbol specifying the type of display, possible values `:print`, `: warn`, `:info`, `:error`, + where `:print` prints to `io`. + +# Constructor + + DebugEntry(field, check=(>(0)); type=:warn, message=":$f is nonnegative", io=stdout) + +""" +mutable struct DebugIfEntry{F} <: DebugAction + io::IO + check::F + field::Symbol + msg::String + type::Symbol + function DebugIfEntry( + f::Symbol, check::F=(>(0)); type=:warn, message=":$f nonpositive.", io::IO=stdout + ) where {F} + return new{F}(io, check, f, message, type) + end +end +function (d::DebugIfEntry)(::AbstractManoptProblem, st::AbstractManoptSolverState, i) + if (i >= 0) && (!d.check(getfield(st, d.field))) + d.type === :warn && (@warn "$(d.msg)") + d.type === :info && (@info "$(d.msg)") + d.type === :error && error(d.msg) + d.type === :print && print(d.io, d.msg) + end + return nothing +end +function show(io::IO, di::DebugIfEntry) + return print(io, "DebugIfEntry(:$(di.field), $(di.check); type=:$(di.type))") +end + @doc raw""" DebugEntryChange{T} <: DebugAction diff --git a/src/plans/hessian_plan.jl b/src/plans/hessian_plan.jl index 2fed2ebde5..480044b0be 100644 --- a/src/plans/hessian_plan.jl +++ b/src/plans/hessian_plan.jl @@ -97,7 +97,7 @@ end function get_hessian!( M::AbstractManifold, Y, mho::ManifoldHessianObjective{AllocatingEvaluation}, p, X ) - copyto!(M, Y, mho.hessian!!(M, p, X)) + copyto!(M, Y, p, mho.hessian!!(M, p, X)) return Y end function get_hessian!( diff --git a/src/plans/plan.jl b/src/plans/plan.jl index db879a07f7..c52bbbf362 100644 --- a/src/plans/plan.jl +++ b/src/plans/plan.jl @@ -31,19 +31,20 @@ include("record.jl") include("stopping_criterion.jl") include("stepsize.jl") - include("cost_plan.jl") include("gradient_plan.jl") -include("alternating_gradient_plan.jl") +include("hessian_plan.jl") +include("proximal_plan.jl") +include("subgradient_plan.jl") include("constrained_plan.jl") + +include("adabtive_regularization_with_cubics_plan.jl") +include("alternating_gradient_plan.jl") include("augmented_lagrangian_plan.jl") include("conjugate_gradient_plan.jl") include("exact_penalty_method_plan.jl") include("frank_wolfe_plan.jl") include("quasi_newton_plan.jl") -include("hessian_plan.jl") -include("proximal_plan.jl") -include("subgradient_plan.jl") include("nonlinear_least_squares_plan.jl") include("difference_of_convex_plan.jl") diff --git a/src/plans/problem.jl b/src/plans/problem.jl index c4faa1c7c0..ba6341ac3f 100644 --- a/src/plans/problem.jl +++ b/src/plans/problem.jl @@ -91,6 +91,11 @@ By default this passes on to the inner objective, see [`set_manopt_parameter!`]( """ set_manopt_parameter!(amp::AbstractManoptProblem, e::Symbol, args...) +function set_manopt_parameter!(amp::AbstractManoptProblem, ::Val{:Manifold}, args...) + set_manopt_parameter!(get_manifold(amp), args...) + return amp +end + function set_manopt_parameter!(amp::AbstractManoptProblem, ::Val{:Objective}, args...) set_manopt_parameter!(get_objective(amp), args...) return amp diff --git a/src/plans/solver_state.jl b/src/plans/solver_state.jl index b466fedab3..c326502405 100644 --- a/src/plans/solver_state.jl +++ b/src/plans/solver_state.jl @@ -195,7 +195,9 @@ By default also undecorates the state beforehand. get_iterate(s::AbstractManoptSolverState) = _get_iterate(s, dispatch_state_decorator(s)) function _get_iterate(s::AbstractManoptSolverState, ::Val{false}) return error( - "It seems the AbstractManoptSolverState $s do not provide access to an iterate" + "It seems the AbstractManoptSolverState $s do not provide access to an iterate. + If it has the iterate stored internally, please implement `get_iterate(s::$(typeof(s))).` + ", ) end _get_iterate(s::AbstractManoptSolverState, ::Val{true}) = get_iterate(s.state) diff --git a/src/plans/stopping_criterion.jl b/src/plans/stopping_criterion.jl index 90aa2d921d..81dc502848 100644 --- a/src/plans/stopping_criterion.jl +++ b/src/plans/stopping_criterion.jl @@ -141,8 +141,8 @@ mutable struct StopAfterIteration <: StoppingCriterion StopAfterIteration(mIter::Int) = new(mIter, "", 0) end function (c::StopAfterIteration)( - ::P, ::O, i::Int -) where {P<:AbstractManoptProblem,O<:AbstractManoptSolverState} + ::P, ::S, i::Int +) where {P<:AbstractManoptProblem,S<:AbstractManoptSolverState} if i == 0 # reset on init c.reason = "" c.at_iteration = 0 diff --git a/src/plans/subsolver_plan.jl b/src/plans/subsolver_plan.jl index c76fac3a43..3e9940d03a 100644 --- a/src/plans/subsolver_plan.jl +++ b/src/plans/subsolver_plan.jl @@ -45,6 +45,7 @@ function set_manopt_parameter!(dss::DebugSolverState, ::Val{:Debug}, args...) end return dss end + """ set_manopt_parameter!(ams::DebugSolverState, ::Val{:SubProblem}, args...) diff --git a/src/solvers/adaptive_regularization_with_cubics.jl b/src/solvers/adaptive_regularization_with_cubics.jl new file mode 100644 index 0000000000..29fe76b061 --- /dev/null +++ b/src/solvers/adaptive_regularization_with_cubics.jl @@ -0,0 +1,839 @@ +@doc raw""" + AdaptiveRegularizationState{P,T} <: AbstractHessianSolverState + +A state for the [`adaptive_regularization_with_cubics`](@ref) solver. + +# Fields +a default value is given in brackets if a parameter can be left out in initialization. + +* `η1`, `η2` – (`0.1`, `0.9`) bounds for evaluating the regularization parameter +* `γ1`, `γ2` – (`0.1`, `2.0`) shrinking and exansion factors for regularization parameter `σ` +* `p` – (`rand(M)` the current iterate +* `X` – (`zero_vector(M,p)`) the current gradient ``\operatorname{grad}f(p)`` +* `s` - (`zero_vector(M,p)`) the tangent vector step resulting from minimizing the model + problem in the tangent space ``\mathcal T_{p} \mathcal M`` +* `σ` – the current cubic regularization parameter +* `σmin` – (`1e-7`) lower bound for the cubic regularization parameter +* `ρ_regularization` – (1e3) regularization paramter for computing ρ. As we approach convergence the ρ may be difficult to compute with numerator and denominator approachign zero. Regularizing the the ratio lets ρ go to 1 near convergence. +* `evaluation` - (`AllocatingEvaluation()`) if you provide a +* `retraction_method` – (`default_retraction_method(M)`) the retraction to use +* `stopping_criterion` – ([`StopAfterIteration`](@ref)`(100)`) a [`StoppingCriterion`](@ref) +* `sub_problem` - sub problem solved in each iteration +* `sub_state` - sub state for solving the sub problem – either a solver state if + the problem is an [`AbstractManoptProblem`](@ref) or an [`AbstractEvaluationType`](@ref) if it is a function, + where it defaults to [`AllocatingEvaluation`](@ref). + +Furthermore the following interal fields are defined + +* `q` - (`copy(M,p)`) a point for the candidates to evaluate model and ρ +* `H` – (`copy(M, p, X)`) the current hessian, $\operatorname{Hess}F(p)[⋅]$ +* `S` – (`copy(M, p, X)`) the current solution from the subsolver +* `ρ` – the current regularized ratio of actual improvement and model improvement. +* `ρ_denominator` – (`one(ρ)`) a value to store the denominator from the computation of ρ + to allow for a warning or error when this value is non-positive. + +# Constructor + + AdaptiveRegularizationState(M, p=rand(M); X=zero_vector(M, p); kwargs...) + +Construct the solver state with all fields stated above as keyword arguments. +""" +mutable struct AdaptiveRegularizationState{ + P, + T, + Pr<:AbstractManoptProblem, + St<:AbstractManoptSolverState, + TStop<:StoppingCriterion, + R, + TRTM<:AbstractRetractionMethod, +} <: AbstractManoptSolverState + p::P + X::T + sub_problem::Pr + sub_state::St + q::P + H::T + S::T + σ::R + ρ::R + ρ_denonimator::R + ρ_regularization::R + stop::TStop + retraction_method::TRTM + σmin::R + η1::R + η2::R + γ1::R + γ2::R +end + +function AdaptiveRegularizationState( + M::AbstractManifold, + p::P=rand(M), + X::T=zero_vector(M, p); + sub_objective=nothing, + sub_problem::Pr=if isnothing(sub_objective) + nothing + else + DefaultManoptProblem(M, sub_objective) + end, + sub_state::St=sub_problem isa Function ? AllocatingEvaluation() : LanczosState(M, p), + σ::R=100.0 / sqrt(manifold_dimension(M)),# Had this to inital value of 0.01. However try same as in MATLAB: 100/sqrt(dim(M)) + ρ_regularization::R=1e3, + stopping_criterion::SC=StopAfterIteration(100), + retraction_method::RTM=default_retraction_method(M), + σmin::R=1e-10, #Set the below to appropriate default vals. + η1::R=0.1, + η2::R=0.9, + γ1::R=0.1, + γ2::R=2.0, +) where { + P, + T, + R, + Pr<:Union{<:AbstractManoptProblem,<:Function,Nothing}, + St<:Union{<:AbstractManoptSolverState,<:AbstractEvaluationType}, + SC<:StoppingCriterion, + RTM<:AbstractRetractionMethod, +} + isnothing(sub_problem) && error("No sub_problem provided,") + + return AdaptiveRegularizationState{P,T,Pr,St,SC,R,RTM}( + p, + X, + sub_problem, + sub_state, + copy(M, p), + copy(M, p, X), + copy(M, p, X), + σ, + one(σ), + one(σ), + ρ_regularization, + stopping_criterion, + retraction_method, + σmin, + η1, + η2, + γ1, + γ2, + ) +end + +get_iterate(s::AdaptiveRegularizationState) = s.p +function set_iterate!(s::AdaptiveRegularizationState, p) + s.p = p + return s +end +get_gradient(s::AdaptiveRegularizationState) = s.X +function set_gradient!(s::AdaptiveRegularizationState, X) + s.X = X + return s +end + +function show(io::IO, arcs::AdaptiveRegularizationState) + i = get_count(arcs, :Iterations) + Iter = (i > 0) ? "After $i iterations\n" : "" + Conv = indicates_convergence(arcs.stop) ? "Yes" : "No" + sub = repr(arcs.sub_state) + sub = replace(sub, "\n" => "\n | ") + s = """ + # Solver state for `Manopt.jl`s Adaptive Regularization with Cubics (ARC) + $Iter + ## Parameters + * η1 | η2 : $(arcs.η1) | $(arcs.η2) + * γ1 | γ2 : $(arcs.γ1) | $(arcs.γ2) + * σ (σmin) : $(arcs.σ) ($(arcs.σmin)) + * ρ (ρ_regularization) : $(arcs.ρ) ($(arcs.ρ_regularization)) + * retraction method : $(arcs.retraction_method) + * sub solver state : + | $(sub) + + ## Stopping Criterion + $(status_summary(arcs.stop)) + This indicates convergence: $Conv""" + return print(io, s) +end + +@doc raw""" + adaptive_regularization_with_cubics(M, f, grad_f, Hess_f, p=rand(M); kwargs...) + adaptive_regularization_with_cubics(M, f, grad_f, p=rand(M); kwargs...) + adaptive_regularization_with_cubics(M, mho, p=rand(M); kwargs...) + +Solve an optimization problem on the manifold `M` by iteratively minimizing + +```math +m_k(X) = f(p_k) + ⟨X, \operatorname{grad} f(p_k)⟩ + \frac{1}{2}⟨X, \operatorname{Hess} f(p_k)[X]⟩ + \frac{σ_k}{3}\lVert X \rVert^3 +``` + +on the tangent space at the current iterate ``p_k``, i.e. ``X ∈ T_{p_k}\mathcal M`` and +where ``σ_k > 0`` is a regularization parameter. + +Let ``X_k`` denote the minimizer of the model ``m_k``, then we use the model improvement + +```math +ρ_k = \frac{f(p_k) - f(\operatorname{retr}_{p_k}(X_k))}{m_k(0) - m_k(s) + \frac{σ_k}{3}\lVert X_k\rVert^3}. +``` + +We use two thresholds ``η_2 ≥ η_1 > 0`` and set +``p_{k+1} = \operatorname{retr}_{p_k}(X_k)`` if ``ρ ≥ η_1`` and reject the candidate otherwise, i.e. set ``p_{k+1} = p_k``. + +We further update the regularozation parameter using factors ``0 < γ_1 < 1 < γ_2`` + +```math +σ_{k+1} = +\begin{cases} + \max\{σ_{\min}, γ_1σ_k\} & \text{ if } ρ \geq η_2 &\text{ (the model was very successful)},\\ + σ_k & \text{ if } ρ \in [η_1, η_2)&\text{ (the model was succesful)},\\ + γ_2σ_k & \text{ if } ρ < η_1&\text{ (the model was unsuccesful)}. +\end{cases} +``` + +For more details see [Agarwal, Boumal, Bullins, Cartis, Math. Prog., 2020](@cite AgarwalBoumalBullinsCartis:2020). + +# Input + +* `M` – a manifold ``\mathcal M`` +* `f` – a cost function ``F: \mathcal M → ℝ`` to minimize +* `grad_f`- the gradient ``\operatorname{grad}F: \mathcal M → T \mathcal M`` of ``F`` +* `Hess_f` – (optional) the hessian ``H( \mathcal M, x, ξ)`` of ``F`` +* `p` – an initial value ``p ∈ \mathcal M`` + +For the case that no hessian is provided, the Hessian is computed using finite difference, see +[`ApproxHessianFiniteDifference`](@ref). + +the cost `f` and its gradient and hessian might also be provided as a [`ManifoldHessianObjective`](@ref) + +# Keyword arguments +the default values are given in brackets + +* `σ` - (`100.0 / sqrt(manifold_dimension(M)`) initial regularization parameter +* `σmin` - (`1e-10`) minimal regularization value ``σ_{\min}`` +* `η1` - (`0.1`) lower model success threshold +* `η2` - (`0.9`) upper model success threshold +* `γ1` - (`0.1`) regularization reduction factor (for the success case) +* `γ2` - (`2.0`) regularization increment factor (for the non-success case) +* `evaluation` – ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by allocation (default) form `grad_f(M, p)` + or [`InplaceEvaluation`](@ref) in place, i.e. is of the form `grad_f!(M, X, p)` and analogously for the hessian. +* `retraction_method` – (`default_retraction_method(M, typeof(p))`) a retraction to use +* `initial_tangent_vector` - (`zero_vector(M, p)`) initialize any tangent vector data, +* `maxIterLanczos` - (`200`) a shortcut to set the stopping criterion in the sub_solver, +* `ρ_regularization` - (`1e3`) a regularization to avoid dividing by zero for small values of cost and model +* `stopping_criterion` - ([`StopAfterIteration`](@ref)`(40) | `[`StopWhenGradientNormLess`](@ref)`(1e-9) | `[`StopWhenAllLanczosVectorsUsed`](@ref)`(maxIterLanczos)`) +* `sub_state` - [`LanczosState`](@ref)`(M, copy(M, p); maxIterLanczos=maxIterLanczos, σ=σ) + a state for the subproblem or an [`AbstractEvaluationType`](@ref) if the problem is a funtion. +* `sub_objective` - a shortcut to modify the objective of the subproblem used within in the +* `sub_problem` - [`DefaultManoptProblem`](@ref)`(M, sub_objective)` the problem (or a function) for the sub problem + +All other keyword arguments are passed to [`decorate_state!`](@ref) for state decorators or +[`decorate_objective!`](@ref) for objective, respectively. +If you provide the [`ManifoldGradientObjective`](@ref) directly, these decorations can still be specified + +By default the `debug=` keyword is set to [`DebugIfEntry`](@ref)`(:ρ_denonimator, >(0); message="Denominator nonpositive", type=:error)`` +to avoid that by rounding errors the denominator in the computation of `ρ` gets nonpositive. +""" +adaptive_regularization_with_cubics(M::AbstractManifold, args...; kwargs...) + +function adaptive_regularization_with_cubics( + M::AbstractManifold, f, grad_f, Hess_f::TH; kwargs... +) where {TH<:Function} + return adaptive_regularization_with_cubics(M, f, grad_f, Hess_f, rand(M); kwargs...) +end +function adaptive_regularization_with_cubics( + M::AbstractManifold, + f::TF, + grad_f::TDF, + Hess_f::THF, + p; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + kwargs..., +) where {TF,TDF,THF} + mho = ManifoldHessianObjective(f, grad_f, Hess_f; evaluation=evaluation) + return adaptive_regularization_with_cubics(M, mho, p; evaluation=evaluation, kwargs...) +end +function adaptive_regularization_with_cubics( + M::AbstractManifold, + f::TF, + grad_f::TDF, + Hess_f::THF, + p::Number; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + kwargs..., +) where {TF,TDF,THF} + q = [p] + f_(M, p) = f(M, p[]) + Hess_f_ = Hess_f + # For now we can not update the gradient within the ApproxHessian so the filled default + # Hessian fails here + if evaluation isa AllocatingEvaluation + grad_f_ = (M, p) -> [grad_f(M, p[])] + Hess_f_ = (M, p, X) -> [Hess_f(M, p[], X[])] + else + grad_f_ = (M, X, p) -> (X .= [grad_f(M, p[])]) + Hess_f_ = (M, Y, p, X) -> (Y .= [Hess_f(M, p[], X[])]) + end + rs = adaptive_regularization_with_cubics( + M, f_, grad_f_, Hess_f_, q; evaluation=evaluation, kwargs... + ) + return (typeof(q) == typeof(rs)) ? rs[] : rs +end +function adaptive_regularization_with_cubics(M::AbstractManifold, f, grad_f; kwargs...) + return adaptive_regularization_with_cubics(M, f, grad_f, rand(M); kwargs...) +end +function adaptive_regularization_with_cubics( + M::AbstractManifold, + f::TF, + grad_f::TdF, + p; + evaluation=AllocatingEvaluation(), + retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), + kwargs..., +) where {TF,TdF} + Hess_f = ApproxHessianFiniteDifference( + M, copy(M, p), grad_f; evaluation=evaluation, retraction_method=retraction_method + ) + return adaptive_regularization_with_cubics( + M, + f, + grad_f, + Hess_f, + p; + evaluation=evaluation, + retraction_method=retraction_method, + kwargs..., + ) +end +function adaptive_regularization_with_cubics( + M::AbstractManifold, mho::O, p=rand(M); kwargs... +) where {O<:Union{ManifoldHessianObjective,AbstractDecoratedManifoldObjective}} + q = copy(M, p) + return adaptive_regularization_with_cubics!(M, mho, q; kwargs...) +end + +@doc raw""" + adaptive_regularization_with_cubics!(M, f, grad_f, Hess_f, p; kwargs...) + adaptive_regularization_with_cubics!(M, f, grad_f, p; kwargs...) + adaptive_regularization_with_cubics!(M, mho, p; kwargs...) + +evaluate the Riemannian adaptive regularization with cubics solver in place of `p`. + +# Input +* `M` – a manifold ``\mathcal M`` +* `f` – a cost function ``F: \mathcal M → ℝ`` to minimize +* `grad_f`- the gradient ``\operatorname{grad}F: \mathcal M → T \mathcal M`` of ``F`` +* `Hess_f` – (optional) the hessian ``H( \mathcal M, x, ξ)`` of ``F`` +* `p` – an initial value ``p ∈ \mathcal M`` + +For the case that no hessian is provided, the Hessian is computed using finite difference, see +[`ApproxHessianFiniteDifference`](@ref). + +the cost `f` and its gradient and hessian might also be provided as a [`ManifoldHessianObjective`](@ref) + +for more details and all options, see [`adaptive_regularization_with_cubics`](@ref). +""" +adaptive_regularization_with_cubics!(M::AbstractManifold, args...; kwargs...) +function adaptive_regularization_with_cubics!( + M::AbstractManifold, + f, + grad_f, + p; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), + kwargs..., +) + hess_f = ApproxHessianFiniteDifference( + M, copy(M, p), grad_f; evaluation=evaluation, retraction_method=retraction_method + ) + return adaptive_regularization_with_cubics!( + M, + f, + grad_f, + hess_f, + p; + evaluation=evaluation, + retraction_method=retraction_method, + kwargs..., + ) +end +function adaptive_regularization_with_cubics!( + M::AbstractManifold, + f, + grad_f, + Hess_f::TH, + p; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + kwargs..., +) where {TH<:Function} + mho = ManifoldHessianObjective(f, grad_f, Hess_f; evaluation=evaluation) + return adaptive_regularization_with_cubics!(M, mho, p; evaluation=evaluation, kwargs...) +end +function adaptive_regularization_with_cubics!( + M::AbstractManifold, + mho::O, + p=rand(M); + debug=DebugIfEntry( + :ρ_denonimator, >(0); message="Denominator nonpositive", type=:error + ), + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + initial_tangent_vector::T=zero_vector(M, p), + maxIterLanczos=min(300, manifold_dimension(M)), + ρ_regularization::R=1e3, + retraction_method::AbstractRetractionMethod=default_retraction_method(M), + σmin::R=1e-10, + σ::R=100.0 / sqrt(manifold_dimension(M)), + η1::R=0.1, + η2::R=0.9, + γ1::R=0.1, + γ2::R=2.0, + θ::R=0.5, + sub_stopping_criterion::StoppingCriterion=StopAfterIteration(maxIterLanczos) | + StopWhenFirstOrderProgress(θ), + sub_state::Union{<:AbstractManoptSolverState,<:AbstractEvaluationType}=LanczosState( + M, + copy(M, p); + maxIterLanczos=maxIterLanczos, + σ=σ, + θ=θ, + stopping_criterion=sub_stopping_criterion, + ), + sub_objective=mho, + sub_problem=DefaultManoptProblem(M, sub_objective), + stopping_criterion::StoppingCriterion=if sub_state isa LanczosState + StopAfterIteration(40) | + StopWhenGradientNormLess(1e-9) | + StopWhenAllLanczosVectorsUsed(maxIterLanczos - 1) + else + StopAfterIteration(40) | StopWhenGradientNormLess(1e-9) + end, + kwargs..., +) where {T,R,O<:Union{ManifoldHessianObjective,AbstractDecoratedManifoldObjective}} + X = copy(M, p, initial_tangent_vector) + dmho = decorate_objective!(M, mho; kwargs...) + dmp = DefaultManoptProblem(M, dmho) + arcs = AdaptiveRegularizationState( + M, + p, + X; + sub_state=sub_state, + sub_problem=sub_problem, + σ=σ, + ρ_regularization=ρ_regularization, + stopping_criterion=stopping_criterion, + retraction_method=retraction_method, + σmin=σmin, + η1=η1, + η2=η2, + γ1=γ1, + γ2=γ2, + ) + darcs = decorate_state!(arcs; debug, kwargs...) + solve!(dmp, darcs) + return get_solver_return(get_objective(dmp), darcs) +end + +function initialize_solver!(dmp::AbstractManoptProblem, arcs::AdaptiveRegularizationState) + get_gradient!(dmp, arcs.X, arcs.p) + return arcs +end +function step_solver!(dmp::AbstractManoptProblem, arcs::AdaptiveRegularizationState, i) + M = get_manifold(dmp) + mho = get_objective(dmp) + # Update sub state + # Set point also in the sub problem (eventually the tangent space) + get_gradient!(M, arcs.X, mho, arcs.p) + # Update base point in manifold + set_manopt_parameter!(arcs.sub_problem, :Manifold, :p, copy(M, arcs.p)) + set_manopt_parameter!(arcs.sub_problem, :Objective, :Cost, :X, copy(M, arcs.p, arcs.X)) + set_manopt_parameter!(arcs.sub_problem, :Objective, :Cost, :σ, arcs.σ) + set_manopt_parameter!( + arcs.sub_problem, :Objective, :Gradient, :X, copy(M, arcs.p, arcs.X) + ) + set_manopt_parameter!(arcs.sub_problem, :Objective, :Gradient, :σ, arcs.σ) + set_iterate!(arcs.sub_state, M, copy(M, arcs.p, arcs.X)) + set_manopt_parameter!(arcs.sub_state, :σ, arcs.σ) + set_manopt_parameter!(arcs.sub_state, :p, copy(M, arcs.p)) + #Solve the sub_problem – via dispatch depending on type + solve_arc_subproblem!(M, arcs.S, arcs.sub_problem, arcs.sub_state, arcs.p) + # Compute ρ + retract!(M, arcs.q, arcs.p, arcs.S, arcs.retraction_method) + cost = get_cost(M, mho, arcs.p) + ρ_num = cost - get_cost(M, mho, arcs.q) + ρ_vec = arcs.X + 0.5 * get_hessian(M, mho, arcs.p, arcs.S) + ρ_den = -inner(M, arcs.p, arcs.S, ρ_vec) + ρ_reg = arcs.ρ_regularization * eps(Float64) * max(abs(cost), 1) + arcs.ρ_denonimator = ρ_den + ρ_reg # <= 0 -> the default debug kicks in + arcs.ρ = (ρ_num + ρ_reg) / (ρ_den + ρ_reg) + + #Update iterate + if arcs.ρ >= arcs.η1 + copyto!(M, arcs.p, arcs.q) + get_gradient!(dmp, arcs.X, arcs.p) #only compute gradient when we update the point + end + #Update regularization parameter - in the mid interval between η1 and η2 we leave it as is + if arcs.ρ >= arcs.η2 #very successful, reduce + arcs.σ = max(arcs.σmin, arcs.γ1 * arcs.σ) + elseif arcs.ρ < arcs.η1 # unsuccessful + arcs.σ = arcs.γ2 * arcs.σ + end + return arcs +end + +# Dispatch on different forms of sub_solvers +function solve_arc_subproblem!( + M, s, problem::P, state::S, p +) where {P<:AbstractManoptProblem,S<:AbstractManoptSolverState} + solve!(problem, state) + copyto!(M, s, p, get_solver_result(state)) + return s +end +function solve_arc_subproblem!( + M, s, problem::P, ::AllocatingEvaluation, p +) where {P<:Function} + copyto!(M, s, p, problem(M, p)) + return s +end +function solve_arc_subproblem!( + M, s, problem!::P, ::InplaceEvaluation, p +) where {P<:Function} + problem!(M, s, p) + return s +end + +# +# Lanczos sub solver +# + +@doc raw""" + LanczosState{P,T,SC,B,I,R,TM,V,Y} <: AbstractManoptSolverState + +Solve the adaptive regularized subproblem with a Lanczos iteration + +# Fields + +* `p` the current iterate +* `stop` – the stopping criterion +* `σ` – the current regularization parameter +* `X` the current gradient +* `Lanczos_vectors` – the obtained Lanczos vectors +* `tridig_matrix` the tridigonal coefficient matrix T +* `coefficients` the coefficients `y_1,...y_k`` that deteermine the solution +* `Hp` – a temporary vector containing the evaluation of the Hessian +* `Hp_residual` – a temporary vector containing the residual to the Hessian +* `S` – the current obtained / approximated solution +""" +mutable struct LanczosState{P,T,R,SC,SCN,B,TM,C} <: AbstractManoptSolverState + p::P + X::T + σ::R + stop::SC # Notation in ABBC + stop_newton::SCN + Lanczos_vectors::B # qi + tridig_matrix::TM # T + coefficients::C # y + Hp::T # Hess_f A temporary vector for evaluations of the hessian + Hp_residual::T # A residual vector + # Maybe not necessary? + S::T # store the tangent vector that solves the minimization problem +end +function LanczosState( + M::AbstractManifold, + p::P=rand(M); + X::T=zero_vector(M, p), + maxIterLanczos=200, + θ=0.5, + stopping_criterion::SC=StopAfterIteration(maxIterLanczos) | + StopWhenFirstOrderProgress(θ), + stopping_criterion_newtown::SCN=StopAfterIteration(200), + σ::R=10.0, +) where {P,T,SC<:StoppingCriterion,SCN<:StoppingCriterion,R} + tridig = spdiagm(maxIterLanczos, maxIterLanczos, [0.0]) + coeffs = zeros(maxIterLanczos) + Lanczos_vectors = typeof(X)[] + return LanczosState{P,T,R,SC,SCN,typeof(Lanczos_vectors),typeof(tridig),typeof(coeffs)}( + p, + X, + σ, + stopping_criterion, + stopping_criterion_newtown, + Lanczos_vectors, + tridig, + coeffs, + copy(M, p, X), + copy(M, p, X), + copy(M, p, X), + ) +end +function get_solver_result(ls::LanczosState) + return ls.S +end +function set_manopt_parameter!(ls::LanczosState, ::Val{:p}, p) + ls.p = p + return ls +end +function set_iterate!(ls::LanczosState, M, X) + ls.X = X + return ls +end +function set_manopt_parameter!(ls::LanczosState, ::Val{:σ}, σ) + ls.σ = σ + return ls +end + +function show(io::IO, ls::LanczosState) + i = get_count(ls, :Iterations) + Iter = (i > 0) ? "After $i iterations\n" : "" + Conv = indicates_convergence(ls.stop) ? "Yes" : "No" + s = """ + # Solver state for `Manopt.jl`s Lanczos Iteration + $Iter + ## Parameters + * σ : $(ls.σ) + * # of Lanczos vectors used : $(length(ls.Lanczos_vectors)) + + ## Stopping Criteria + (a) For the Lanczos Iteration + $(status_summary(ls.stop)) + (b) For the Newton sub solver + $(status_summary(ls.stop_newton)) + This indicates convergence: $Conv""" + return print(io, s) +end + +# +# The Lanczos Subsolver implementation +# +function initialize_solver!(dmp::AbstractManoptProblem, ls::LanczosState) + M = get_manifold(dmp) + # Maybe better to allocate once and just reset the number of vectors k? + maxIterLanczos = size(ls.tridig_matrix, 1) + ls.tridig_matrix = spdiagm(maxIterLanczos, maxIterLanczos, [0.0]) + ls.coefficients = zeros(maxIterLanczos) + for X in ls.Lanczos_vectors + zero_vector!(M, X, ls.p) + end + zero_vector!(M, ls.Hp, ls.p) + zero_vector!(M, ls.Hp_residual, ls.p) + return ls +end + +function step_solver!(dmp::AbstractManoptProblem, ls::LanczosState, i) + M = get_manifold(dmp) + mho = get_objective(dmp) + if i == 1 #we can easily compute the first Lanczos vector + nX = norm(M, ls.p, ls.X) + if length(ls.Lanczos_vectors) == 0 + push!(ls.Lanczos_vectors, ls.X ./ nX) + else + copyto!(M, ls.Lanczos_vectors[1], ls.p, ls.X ./ nX) + end + get_hessian!(M, ls.Hp, mho, ls.p, ls.Lanczos_vectors[1]) + α = inner(M, ls.p, ls.Lanczos_vectors[1], ls.Hp) + # This is also the first coefficient in the tridigianoal matrix + ls.tridig_matrix[1, 1] = α + ls.Hp_residual .= ls.Hp - α * ls.Lanczos_vectors[1] + #argmin of one dimensional model + ls.coefficients[1] = (α - sqrt(α^2 + 4 * ls.σ * nX)) / (2 * ls.σ) + else # i > 1 + β = norm(M, ls.p, ls.Hp_residual) + if β > 1e-12 # Obtained new orth Lanczos long enough cf. to num stability + if length(ls.Lanczos_vectors) < i + push!(ls.Lanczos_vectors, ls.Hp_residual ./ β) + else + copyto!(M, ls.Lanczos_vectors[i], ls.p, ls.Hp_residual ./ β) + end + else # Generate new random vec and MGS of new vec wrt. Q + rand!(M, ls.Hp_residual; vector_at=ls.p) + for k in 1:(i - 1) + ls.Hp_residual .= + ls.Hp_residual - + inner(M, ls.p, ls.Lanczos_vectors[k], ls.Hp_residual) * + ls.Lanczos_vectors[k] + end + if length(ls.Lanczos_vectors) < i + push!(ls.Lanczos_vectors, ls.Hp_residual ./ norm(M, ls.p, ls.Hp_residual)) + else + copyto!( + M, + ls.Lanczos_vectors[i], + ls.p, + ls.Hp_residual ./ norm(M, ls.p, ls.Hp_residual), + ) + end + end + # Update Hessian and residual + get_hessian!(M, ls.Hp, mho, ls.p, ls.Lanczos_vectors[i]) + ls.Hp_residual .= ls.Hp - β * ls.Lanczos_vectors[i - 1] + α = inner(M, ls.p, ls.Hp_residual, ls.Lanczos_vectors[i]) + ls.Hp_residual .= ls.Hp_residual - α * ls.Lanczos_vectors[i] + # Update tridiagonal matric + ls.tridig_matrix[i, i] = α + ls.tridig_matrix[i - 1, i] = β + ls.tridig_matrix[i, i - 1] = β + min_cubic_Newton!(dmp, ls, i) + end + copyto!(M, ls.S, ls.p, sum(ls.Lanczos_vectors[k] * ls.coefficients[k] for k in 1:i)) + return ls +end +# +# Solve Lanczos sub problem +# +function min_cubic_Newton!(mp::AbstractManoptProblem, ls::LanczosState, i) + M = get_manifold(mp) + tol = 1e-16 # TODO: Put into a stopping criterion + + gvec = zeros(i) + gvec[1] = norm(M, ls.p, ls.X) + λ = opnorm(Array(@view ls.tridig_matrix[1:i, 1:i])) + 2 + T_λ = @view(ls.tridig_matrix[1:i, 1:i]) + λ * I + + λ_min = eigmin(Array(@view ls.tridig_matrix[1:i, 1:i])) + lower_barrier = max(0, -λ_min) + k = 0 + y = zeros(i) + while !ls.stop_newton(mp, ls, k) + k += 1 + y = -(T_λ \ gvec) + ynorm = norm(y, 2) + ϕ = 1 / ynorm - ls.σ / λ #when ϕ is "zero", y is the solution. + (abs(ϕ) < tol * ynorm) && break + #compute the newton step + ψ = ynorm^2 + Δy = -(T_λ) \ y + ψ_prime = 2 * dot(y, Δy) + # Quadratic polynomial coefficients + p0 = 2 * ls.σ * ψ^(1.5) + p1 = -2 * ψ - λ * ψ_prime + p2 = ψ_prime + #Polynomial roots + r1 = (-p1 + sqrt(p1^2 - 4 * p2 * p0)) / (2 * p2) + r2 = (-p1 - sqrt(p1^2 - 4 * p2 * p0)) / (2 * p2) + + Δλ = max(r1, r2) - λ + + #if we jumped past the lower barrier for λ, jump to midpoint between current and lower λ. + (λ + Δλ <= lower_barrier) && (Δλ = -0.5 * (λ - lower_barrier)) + #if the steps we make are to small, terminate + (abs(Δλ) <= eps(λ)) && break + T_λ = T_λ + Δλ * I + λ = λ + Δλ + end + ls.coefficients[1:i] .= y + return ls.coefficients +end + +# +# Stopping Criteria +# +@doc raw""" + StopWhenFirstOrderProgress <: StoppingCriterion + +A stopping criterion related to the Riemannian adaptive regularization with cubics (ARC) +solver indicating that the model function at the current (outer) iterate, i.e. + +```math + m(X) = f(p) + + + \frac{1}{2} + \frac{σ}{3} \lVert X \rVert^3, +``` + +defined on the tangent space ``T_{p}\mathcal M`` +fulfills at the current iterate ``X_k`` that + +```math +m(X_k) \leq m(0) +\quad\text{ and }\quad +\lVert \operatorname{grad} m(X_k) \rVert ≤ θ \lVert X_k \rVert^2 +``` + +# Fields + +* `θ` – the factor ``θ`` in the second condition above +* `reason` – a String indicating the reason if the criterion indicated to stop +""" +mutable struct StopWhenFirstOrderProgress <: StoppingCriterion + θ::Float64 #θ + reason::String + StopWhenFirstOrderProgress(θ::Float64) = new(θ, "") +end +function (c::StopWhenFirstOrderProgress)( + dmp::AbstractManoptProblem, ls::LanczosState, i::Int +) + if (i == 0) + c.reason = "" + return false + end + #Update Gradient + M = get_manifold(dmp) + get_gradient!(dmp, ls.X, ls.p) + nX = norm(M, ls.p, ls.X) + y = @view(ls.coefficients[1:(i - 1)]) + Ty = @view(ls.tridig_matrix[1:i, 1:(i - 1)]) * y + ny = norm(y) + model_grad_norm = norm(nX .* [1, zeros(i - 1)...] + Ty + ls.σ * ny * [y..., 0]) + if (i > 0) && (model_grad_norm <= c.θ * ny^2) + c.reason = "The subproblem has reached a point with ||grad m(X)|| ≤ θ ||X||^2, θ = $(c.θ)." + return true + end + return false +end +function status_summary(c::StopWhenFirstOrderProgress) + has_stopped = length(c.reason) > 0 + s = has_stopped ? "reached" : "not reached" + return "First order progress with θ=$(c.θ):\t$s" +end +indicates_convergence(c::StopWhenFirstOrderProgress) = true +function show(io::IO, c::StopWhenFirstOrderProgress) + return print(io, "StopWhenFirstOrderProgress($(repr(c.θ)))\n $(status_summary(c))") +end + +#A new stopping criterion that deals with the scenario when a step needs more Lanczos vectors than preallocated. +#Previously this would just cause an error due to out of bounds error. So this stopping criterion deals both with the scenario +#of too few allocated vectors and stagnation in the solver. +@doc raw""" + StopWhenAllLanczosVectorsUsed <: StoppingCriterion + +When an inner iteration has used up all Lanczos vectors, then this stoping crtierion is +a fallback / security stopping criterion in order to not access a non-existing field +in the array allocated for vectors. + +Note that this stopping criterion (for now) is only implemented for the case that an +[`AdaptiveRegularizationState`](@ref) when using a [`LanczosState`](@ref) subsolver + +# Fields + +* `maxLanczosVectors` – maximal number of Lanczos vectors +* `reason` – a String indicating the reason if the criterion indicated to stop + +# Constructor + + StopWhenAllLanczosVectorsUsed(maxLancosVectors::Int) + +""" +mutable struct StopWhenAllLanczosVectorsUsed <: StoppingCriterion + maxLanczosVectors::Int + reason::String + StopWhenAllLanczosVectorsUsed(maxLanczosVectors::Int) = new(maxLanczosVectors, "") +end +function (c::StopWhenAllLanczosVectorsUsed)( + ::AbstractManoptProblem, + arcs::AdaptiveRegularizationState{P,T,Pr,<:LanczosState}, + i::Int, +) where {P,T,Pr} + (i == 0) && (c.reason = "") # reset on init + if (i > 0) && length(arcs.sub_state.Lanczos_vectors) == c.maxLanczosVectors + c.reason = "The algorithm used all ($(c.maxLanczosVectors)) preallocated Lanczos vectors and may have stagnated.\n Consider increasing this value.\n" + return true + end + return false +end +function status_summary(c::StopWhenAllLanczosVectorsUsed) + has_stopped = length(c.reason) > 0 + s = has_stopped ? "reached" : "not reached" + return "All Lanczos vectors ($(c.maxLanczosVectors)) used:\t$s" +end +indicates_convergence(c::StopWhenAllLanczosVectorsUsed) = false +function show(io::IO, c::StopWhenAllLanczosVectorsUsed) + return print( + io, + "StopWhenAllLanczosVectorsUsed($(repr(c.maxLanczosVectors)))\n $(status_summary(c))", + ) +end diff --git a/src/solvers/conjugate_gradient_descent.jl b/src/solvers/conjugate_gradient_descent.jl index 427b81d2eb..622bea96fb 100644 --- a/src/solvers/conjugate_gradient_descent.jl +++ b/src/solvers/conjugate_gradient_descent.jl @@ -165,20 +165,20 @@ function conjugate_gradient_descent!( StopAfterIteration(500), StopWhenGradientNormLess(10^(-8)) ), vector_transport_method=default_vector_transport_method(M, typeof(p)), + initial_gradient=zero_vector(M, p), kwargs..., ) where {O<:Union{ManifoldGradientObjective,AbstractDecoratedManifoldObjective}} dmgo = decorate_objective!(M, mgo; kwargs...) dmp = DefaultManoptProblem(M, dmgo) - X = zero_vector(M, p) cgs = ConjugateGradientDescentState( M, - p, - stopping_criterion, - stepsize, - coefficient, - retraction_method, - vector_transport_method, - X, + p; + stopping_criterion=stopping_criterion, + stepsize=stepsize, + coefficient=coefficient, + retraction_method=retraction_method, + vector_transport_method=vector_transport_method, + initial_gradient=initial_gradient, ) dcgs = decorate_state!(cgs; kwargs...) solve!(dmp, dcgs) diff --git a/src/solvers/gradient_descent.jl b/src/solvers/gradient_descent.jl index 8eccbc510c..58e3076182 100644 --- a/src/solvers/gradient_descent.jl +++ b/src/solvers/gradient_descent.jl @@ -60,6 +60,7 @@ mutable struct GradientDescentState{ return s end end + function GradientDescentState( M::AbstractManifold, p::P=rand(M); diff --git a/src/solvers/trust_regions.jl b/src/solvers/trust_regions.jl index c53be7dad9..e049040b56 100644 --- a/src/solvers/trust_regions.jl +++ b/src/solvers/trust_regions.jl @@ -354,14 +354,14 @@ end trust_regions!(M, f, grad_f, Hess_f, p; kwargs...) trust_regions!(M, f, grad_f, p; kwargs...) -evaluate the Riemannian trust-regions solver for optimization on manifolds in place of `p`. +evaluate the Riemannian trust-regions solver in place of `p`. # Input * `M` – a manifold ``\mathcal M`` * `f` – a cost function ``F: \mathcal M → ℝ`` to minimize * `grad_f`- the gradient ``\operatorname{grad}F: \mathcal M → T \mathcal M`` of ``F`` * `Hess_f` – (optional) the hessian ``H( \mathcal M, x, ξ)`` of ``F`` -* `x` – an initial value ``x ∈ \mathcal M`` +* `p` – an initial value ``p ∈ \mathcal M`` For the case that no hessian is provided, the Hessian is computed using finite difference, see [`ApproxHessianFiniteDifference`](@ref). @@ -452,11 +452,11 @@ function trust_regions!( ), ) dmho = decorate_objective!(M, mho; kwargs...) - mp = DefaultManoptProblem(M, dmho) + dmp = DefaultManoptProblem(M, dmho) trs = TrustRegionsState( M, p, - get_gradient(mp, p), + get_gradient(dmp, p), sub_state; trust_region_radius=trust_region_radius, max_trust_region_radius=max_trust_region_radius, @@ -470,8 +470,8 @@ function trust_regions!( (project!)=project!, ) dtrs = decorate_state!(trs; kwargs...) - solve!(mp, dtrs) - return get_solver_return(get_objective(mp), dtrs) + solve!(dmp, dtrs) + return get_solver_return(get_objective(dmp), dtrs) end function initialize_solver!(mp::AbstractManoptProblem, trs::TrustRegionsState) M = get_manifold(mp) diff --git a/test/plans/test_debug.jl b/test/plans/test_debug.jl index b2c9913644..f47404d5a5 100644 --- a/test/plans/test_debug.jl +++ b/test/plans/test_debug.jl @@ -350,6 +350,28 @@ Manopt.get_message(::TestMessageState) = "DebugTest" @test Manopt.status_summary(d) == ":Messages" @test_logs (:info, "DebugTest") d(mp, s, 0) end + @testset "DebugIfEntry" begin + io = IOBuffer() + M = ManifoldsBase.DefaultManifold(2) + p = [-4.0, 2.0] + st = GradientDescentState( + M, p; stopping_criterion=StopAfterIteration(20), stepsize=ConstantStepsize(M) + ) + f(M, y) = Inf + grad_f(M, y) = Inf .* ones(2) + mp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f)) + + die1 = DebugIfEntry(:p, p -> p[1] > 0.0; type=:warn, message="test1") + @test startswith(repr(die1), "DebugIfEntry(:p, ") + @test_logs (:warn, "test1") die1(mp, st, 1) + die2 = DebugIfEntry(:p, p -> p[1] > 0.0; type=:info, message="test2") + @test_logs (:info, "test2") die2(mp, st, 1) + die3 = DebugIfEntry(:p, p -> p[1] > 0.0; type=:error, message="test3") + @test_throws ErrorException die3(mp, st, 1) + die4 = DebugIfEntry(:p, p -> p[1] > 0.0; type=:print, message="test4", io=io) + die4(mp, st, 1) + @test String(take!(io)) == "test4" + end @testset "DebugWhenActive" begin io = IOBuffer() M = ManifoldsBase.DefaultManifold(2) diff --git a/test/plans/test_objective.jl b/test/plans/test_objective.jl index 9f3ad175cb..f6bc73be1e 100644 --- a/test/plans/test_objective.jl +++ b/test/plans/test_objective.jl @@ -1,4 +1,4 @@ -using Manopt, Test +using ManifoldsBase, Manopt, Test include("../utils/dummy_types.jl") @@ -23,4 +23,9 @@ include("../utils/dummy_types.jl") r2 = Manopt.ReturnManifoldObjective(d) @test repr(r) == "ManifoldCostObjective{AllocatingEvaluation}" end + @testset "set_manopt_parameter!" begin + o = ManifoldCostObjective(x -> x) + mp = DefaultManoptProblem(ManifoldsBase.DefaultManifold(2), o) + set_manopt_parameter!(mp, :Objective, :Dummy, 1) + end end diff --git a/test/runtests.jl b/test/runtests.jl index 233e348ded..1409eac8b2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -42,6 +42,7 @@ include("utils/example_tasks.jl") include("helpers/test_linesearches.jl") end @testset "Solver Tests " begin + include("solvers/test_adaptive_regularization_with_cubics.jl") include("solvers/test_alternating_gradient.jl") include("solvers/test_augmented_lagrangian.jl") include("solvers/test_ChambollePock.jl") diff --git a/test/solvers/test_adaptive_regularization_with_cubics.jl b/test/solvers/test_adaptive_regularization_with_cubics.jl new file mode 100644 index 0000000000..3be02bf4cc --- /dev/null +++ b/test/solvers/test_adaptive_regularization_with_cubics.jl @@ -0,0 +1,207 @@ +using Manifolds, ManifoldsBase, Manopt, Test, Random +using LinearAlgebra: I, tr, Symmetric + +include("../utils/example_tasks.jl") + +@testset "Adaptive Reguilarization with Cubics" begin + Random.seed!(42) + n = 8 + k = 3 + A = Symmetric(randn(n, n)) + + M = Grassmann(n, k) + + f(M, p) = -0.5 * tr(p' * A * p) + grad_f(M, p) = -A * p + p * (p' * A * p) + Hess_f(M, p, X) = -A * X + p * p' * A * X + X * p' * A * p + + p0 = Matrix{Float64}(I, n, n)[:, 1:k] + M2 = TangentSpaceAtPoint(M, p0) + + mho = ManifoldHessianObjective(f, grad_f, Hess_f) + g = AdaptiveRegularizationCubicCost(M2, mho) + grad_g = AdaptiveRegularizationCubicGrad(M2, mho) + + @testset "State and repr" begin + # if we neither provide a problem nor an objective, we expect an error + @test_throws ErrorException AdaptiveRegularizationState(ℝ^2) + g = AdaptiveRegularizationCubicCost(M2, mho) + grad_g = AdaptiveRegularizationCubicGrad(M2, mho) + + arcs = AdaptiveRegularizationState( + M, + p0; + sub_problem=DefaultManoptProblem(M2, ManifoldGradientObjective(g, grad_g)), + sub_state=GradientDescentState(M2, zero_vector(M, p0)), + ) + @test startswith( + repr(arcs), + "# Solver state for `Manopt.jl`s Adaptive Regularization with Cubics (ARC)", + ) + p1 = rand(M) + X1 = rand(M; vector_at=p1) + set_iterate!(arcs, p1) + @test arcs.p == p1 + set_gradient!(arcs, X1) + @test arcs.X == X1 + arcs2 = AdaptiveRegularizationState( + M, + p0; + sub_objective=mho, + sub_state=LanczosState(M; maxIterLanczos=1), + stopping_criterion=StopWhenAllLanczosVectorsUsed(1), + ) + #add a fake Lanczos + push!(arcs2.sub_state.Lanczos_vectors, X1) + # we reached 1 Lanczos + @test stop_solver!(arcs2.sub_problem, arcs2.sub_state, 1) + @test stop_solver!(arcs2.sub_problem, arcs2, 1) + + arcs3 = AdaptiveRegularizationState( + M, p0; sub_objective=mho, sub_state=LanczosState(M; maxIterLanczos=2) + ) + #add a fake Lanczos + push!(arcs3.sub_state.Lanczos_vectors, copy(M, p1, X1)) + step_solver!(arcs3.sub_problem, arcs3.sub_state, 2) # to introduce a random new one + # test orthognality of the new 2 ones + @test isapprox( + inner( + M, + p1, + arcs3.sub_state.Lanczos_vectors[1], + arcs3.sub_state.Lanczos_vectors[2], + ), + 0.0, + atol=1e-14, + ) + # a second that copies + arcs4 = AdaptiveRegularizationState( + M, p0; sub_objective=mho, sub_state=LanczosState(M; maxIterLanczos=2) + ) + #add a fake Lanczos + push!(arcs4.sub_state.Lanczos_vectors, copy(M, p1, X1)) + push!(arcs4.sub_state.Lanczos_vectors, copy(M, p1, X1)) + step_solver!(arcs4.sub_problem, arcs4.sub_state, 2) # to introduce a random new one but cupy to 2 + # test orthognality of the new 2 ones + @test isapprox( + inner( + M, + p1, + arcs4.sub_state.Lanczos_vectors[1], + arcs4.sub_state.Lanczos_vectors[2], + ), + 0.0, + atol=1e-14, + ) + + st1 = StopWhenFirstOrderProgress(0.5) + @test startswith(repr(st1), "StopWhenFirstOrderProgress(0.5)\n") + @test Manopt.indicates_convergence(st1) + st2 = StopWhenAllLanczosVectorsUsed(2) + @test startswith(repr(st2), "StopWhenAllLanczosVectorsUsed(2)\n") + @test !Manopt.indicates_convergence(st2) + @test startswith( + repr(arcs2.sub_state), "# Solver state for `Manopt.jl`s Lanczos Iteration\n" + ) + + f1(M, p) = p + f1!(M, q, p) = copyto!(M, q, p) + r = copy(M, p1) + Manopt.solve_arc_subproblem!(M, r, f1, AllocatingEvaluation(), p0) + @test r == p0 + r = copy(M, p1) + Manopt.solve_arc_subproblem!(M, r, f1!, InplaceEvaluation(), p0) + @test r == p0 + end + + @testset "A few solver runs" begin + p1 = adaptive_regularization_with_cubics( + M, f, grad_f, Hess_f, p0; θ=0.5, σ=100.0, retraction_method=PolarRetraction() + ) + # Second run with random p0 + Random.seed!(42) + p2 = adaptive_regularization_with_cubics( + M, f, grad_f, Hess_f; θ=0.5, σ=100.0, retraction_method=PolarRetraction() + ) + @test isapprox(M, p1, p2) + # Third with approximate Hessian + p3 = adaptive_regularization_with_cubics( + M, f, grad_f, p0; θ=0.5, σ=100.0, retraction_method=PolarRetraction() + ) + @test isapprox(M, p1, p3) + # Fourth with approximate Hessian _and_ random point + Random.seed!(36) + p4 = adaptive_regularization_with_cubics( + M, f, grad_f; θ=0.5, σ=100.0, retraction_method=PolarRetraction() + ) + @test isapprox(M, p1, p4) + # with a large η1 to trigger the bad model case once + p5 = adaptive_regularization_with_cubics( + M, + f, + grad_f, + Hess_f; + θ=0.5, + σ=100.0, + η1=0.89, + retraction_method=PolarRetraction(), + ) + @test isapprox(M, p1, p5) + + # in place + q1 = copy(M, p0) + adaptive_regularization_with_cubics!( + M, f, grad_f, Hess_f, q1; θ=0.5, σ=100.0, retraction_method=PolarRetraction() + ) + @test isapprox(M, p1, q1) + # in place with approx Hess + q2 = copy(M, p0) + adaptive_regularization_with_cubics!( + M, f, grad_f, q2; θ=0.5, σ=100.0, retraction_method=PolarRetraction() + ) + @test isapprox(M, p1, q2) + + # test both inplace and allocating variants of grad_g + X0 = grad_f(M, p0) + X1 = grad_g(M2, X0) + X2 = zero_vector(M, p0) + grad_g(M2, X2, X0) + @test isapprox(M, p0, X1, X2) + + sub_problem = DefaultManoptProblem(M2, ManifoldGradientObjective(g, grad_g)) + sub_state = GradientDescentState( + M2, + zero_vector(M, p0); + stopping_criterion=StopAfterIteration(500) | + StopWhenGradientNormLess(1e-11) | + StopWhenFirstOrderProgress(0.1), + ) + q3 = copy(M, p0) + adaptive_regularization_with_cubics!( + M, + mho, + q3; + θ=0.5, + σ=100.0, + retraction_method=PolarRetraction(), + sub_problem=sub_problem, + sub_state=sub_state, + return_objective=true, + return_state=true, + ) + @test isapprox(M, p1, q3) + end + @testset "A short solver run on the circle" begin + Mc, fc, grad_fc, pc0, pc_star = Circle_mean_task() + hess_fc(Mc, p, X) = 1.0 + p0 = 0.2 + p1 = adaptive_regularization_with_cubics( + Mc, fc, grad_fc, hess_fc, p0; θ=0.5, σ=100.0 + ) + @test fc(Mc, p0) > fc(Mc, p1) + p2 = adaptive_regularization_with_cubics( + Mc, fc, grad_fc, hess_fc, p0; θ=0.5, σ=100.0, evaluation=InplaceEvaluation() + ) + @test fc(Mc, p0) > fc(Mc, p2) + end +end diff --git a/test/solvers/test_gradient_descent.jl b/test/solvers/test_gradient_descent.jl index 904fdf04a6..cf3049d560 100644 --- a/test/solvers/test_gradient_descent.jl +++ b/test/solvers/test_gradient_descent.jl @@ -1,4 +1,4 @@ -using Manopt, Manifolds, Test +using Manopt, Manifolds, Test, Random @testset "Gradient Descent" begin @testset "allocating Circle" begin @@ -124,6 +124,7 @@ using Manopt, Manifolds, Test # Since we called gradient_descent n2 is newly allocated @test !isapprox(M, pts[1], n2) @test isapprox(M, north, n2) + Random.seed!(43) n2a = gradient_descent(M, f, grad_f) # Since we called gradient_descent n2 is newly allocated @test isapprox(M, north, n2a) diff --git a/tutorials/_quarto.yml b/tutorials/_quarto.yml index 1dacaa6add..27dec4224f 100644 --- a/tutorials/_quarto.yml +++ b/tutorials/_quarto.yml @@ -5,7 +5,6 @@ project: - "*.qmd" - "!GeodesicRegression.qmd" - "!InplaceGradient.qmd" - - "!HowToDebug.qmd" crossref: fig-prefix: Figure