Adaptive Regularisation with Cubics (ARC) Solver (#277)

* Add the adaptive regularisation with cubics (ARC) solver with different sub solvers, both Lanczos and a gradient based one. * Update changelog. --------- Co-authored-by: mathiasrm1 <103418574+mathiasrm1@users.noreply.github.com>
JuliaManifolds · Aug 23, 2023 · 0ab53d6 · 0ab53d6 · kellertuer · Aug 23, 2023
1 parent da64a03
commit 0ab53d6
Show file tree

Hide file tree

Showing 31 changed files with 1,469 additions and 49 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,4 @@ docs/src/tutorials/*/
 docs/src/tutorials/*.md
 docs/.CondaPkg
 docs/src/tutorials/Optimize!_files
+docs/src/tutorials/*.html
diff --git a/Changelog.md b/Changelog.md
@@ -5,7 +5,20 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.4.x] - dd/mm/2023
+## [0.4.32] - 23/08/2023
+
+### Added
+
+* The adaptive regularization with cubics (ARC) solver.
+
+## [0.4.31] - 14/08/2023
+
+### Added
+
+* A `:Subsolver` keyword in the `debug=` keyword argument, that activates the new `DebugWhenActive``
+  to de/activate subsolver debug from the main solvers `DebugEvery`.
+
+## [0.4.30] - 03/08/2023
 
 ### Changed
 

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Manopt"
 uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
 authors = ["Ronny Bergmann <manopt@ronnybergmann.net>"]
-version = "0.4.31"
+version = "0.4.32"
 
 [deps]
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
@@ -13,6 +13,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 ManifoldDiff = "af67fdf4-a580-4b9f-bbec-742ef357defd"
 ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+PolynomialRoots = "3a141323-8675-5d76-9d11-e1df1406c778"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
@@ -41,6 +42,7 @@ LRUCache = "1.4"
 ManifoldDiff = "0.2, 0.3.3"
 Manifolds = "0.8.69"
 ManifoldsBase = "0.14.4"
+PolynomialRoots = "1"
 Requires = "0.5, 1"
 julia = "1.6"
 

diff --git a/docs/make.jl b/docs/make.jl
@@ -62,6 +62,7 @@ makedocs(
         mathengine=MathJax3(), prettyurls=get(ENV, "CI", nothing) == "true"
     ),
     modules=[Manopt],
+    authors="Ronny Bergmann and contributors.",
     sitename="Manopt.jl",
     strict=[
         :doctest,
@@ -94,6 +95,7 @@ makedocs(
         ],
         "Solvers" => [
             "Introduction" => "solvers/index.md",
+            "Adaptive Regularization with Cubics" => "solvers/adaptive-regularization-with-cubics.md",
             "Alternating Gradient Descent" => "solvers/alternating_gradient_descent.md",
             "Augmented Lagrangian Method" => "solvers/augmented_Lagrangian_method.md",
             "Chambolle-Pock" => "solvers/ChambollePock.md",

diff --git a/docs/src/about.md b/docs/src/about.md
@@ -7,7 +7,8 @@ The following people contributed
 * [Constantin Ahlmann-Eltze](https://const-ae.name) implemented the [gradient and differential check functions](helpers/checks.md)
 * [Renée Dornig](https://github.com/r-dornig) implemented the [particle swarm](@ref ParticleSwarmSolver), the [Riemannian Augmented Lagrangian Method](@ref AugmentedLagrangianSolver), the [Exact Penalty Method](@ref ExactPenaltySolver), as well as the [`NonmonotoneLinesearch`](@ref)
 * [Willem Diepeveen](https://www.maths.cam.ac.uk/person/wd292) implemented the [primal-dual Riemannian semismooth Newton](@ref PDRSSNSolver) solver.
-* Even Stephansen Kjemsås contributed to the implementation of the [Frank Wolfe Method](@ref FrankWolfe)
+* Even Stephansen Kjemsås contributed to the implementation of the [Frank Wolfe Method](@ref FrankWolfe) solver
+* Mathias Ravn Munkvold contributed most of the implementation of the [Adaptive Regularization with Cubics](@ref ARSSection) solver
 * [Tom-Christian Riemer](https://www.tu-chemnitz.de/mathematik/wire/mitarbeiter.php) Riemer implemented the [trust regions](@ref trust_regions) and [quasi Newton](solvers/quasi_Newton.md) solvers.
 * [Manuel Weiss](https://scoop.iwr.uni-heidelberg.de/author/manuel-weiß/) implemented most of the [conjugate gradient update rules](@ref cg-coeffs)
 

diff --git a/docs/src/references.bib b/docs/src/references.bib
@@ -25,6 +25,7 @@ @article{AbsilBakerGallivan:2006
     TITLE     = {Trust-Region Methods on Riemannian Manifolds},
     JOURNAL   = {Foundations of Computational Mathematics}
 }
+
 @article{AdachiOkunoTakeda:2022,
    AUTHOR = {Adachi, S. and Okuno, T., and Takeda, A.},
    JOURNAL = {ArXiv Preprint},
@@ -34,6 +35,15 @@ @article{AdachiOkunoTakeda:2022
    YEAR = {2022},
 }
 
+@article{AgarwalBoumalBullinsCartis:2020,
+    AUTHOR    = {Agarwal, N. and Boumal, N. and Bullins, B. and Cartis, C.},
+    TITLE     = {Adaptive regularization with cubics on manifolds},
+    JOURNAL   = {Mathematical Programming},
+    PUBLISHER = {Springer Science and Business Media LLC},
+    YEAR      = {2020},
+    DOI       = {10.1007/s10107-020-01505-1}
+}
+
 @article{AlmeidaNetoOliveiraSouza:2020,
     AUTHOR    = {Yldenilson Torres Almeida and João Xavier da Cruz Neto and Paulo Roberto Oliveira and João Carlos de Oliveira Souza},
     DOI       = {10.1007/s10589-020-00173-3},

diff --git a/docs/src/solvers/adaptive-regularization-with-cubics.md b/docs/src/solvers/adaptive-regularization-with-cubics.md
@@ -0,0 +1,64 @@
+# [Adaptive regularization with Cubics](@id ARSSection)
+
+
+
+```@meta
+CurrentModule = Manopt
+```
+
+```@docs
+adaptive_regularization_with_cubics
+adaptive_regularization_with_cubics!
+```
+
+## State
+
+```@docs
+AdaptiveRegularizationState
+```
+
+## Sub solvers
+
+There are several ways to approach the subsolver. The default is the first one.
+
+## Lanczos Iteration
+
+```@docs
+Manopt.LanczosState
+```
+
+## (Conjugate) Gradient Descent
+
+There are two generic functors, that implement the sub problem
+
+```@docs
+AdaptiveRegularizationCubicCost
+AdaptiveRegularizationCubicGrad
+```
+
+Since the sub problem is given on the tangent space, you have to provide
+
+```
+g = AdaptiveRegularizationCubicCost(M, mho, σ)
+grad_g = AdaptiveRegularizationCubicGrad(M, mho, σ)
+sub_problem = DefaultProblem(TangentSpaceAt(M,p), ManifoldGradienObjective(g, grad_g))
+```
+
+where `mho` is the hessian objective of `f` to solve.
+Then use this for the `sub_problem` keyword
+and use your favourite gradient based solver for the `sub_state` keyword, for example a
+[`ConjugateGradientDescentState`](@ref)
+
+## Additional Stopping Criteria
+
+```@docs
+StopWhenAllLanczosVectorsUsed
+StopWhenFirstOrderProgress
+```
+
+## Literature
+
+```@bibliography
+Pages = ["solvers/adaptive-regularization-with-cubics.md"]
+Canonical=false
+```
diff --git a/docs/src/solvers/trust_regions.md b/docs/src/solvers/trust_regions.md
@@ -156,6 +156,8 @@ as well as their (non-exported) common supertype
 Manopt.AbstractApproxHessian
 ```
 
+## Literature
+
 ```@bibliography
 Pages = ["solvers/trust_regions.md"]
 Canonical=false

diff --git a/ext/ManoptManifoldsExt/ARC_CG.jl b/ext/ManoptManifoldsExt/ARC_CG.jl
@@ -0,0 +1,46 @@
+function set_manopt_parameter!(M::TangentSpaceAtPoint, ::Val{:p}, v)
+    M.point .= v
+    return M
+end
+function (f::Manopt.AdaptiveRegularizationCubicCost)(M::TangentSpaceAtPoint, X)
+    ## (33) in Agarwal et al.
+    return get_cost(base_manifold(M), f.mho, M.point) +
+           inner(base_manifold(M), M.point, X, f.X) +
+           1 / 2 * inner(
+               base_manifold(M),
+               M.point,
+               X,
+               get_hessian(base_manifold(M), f.mho, M.point, X),
+           ) +
+           f.σ / 3 * norm(base_manifold(M), M.point, X)^3
+end
+function (grad_f::Manopt.AdaptiveRegularizationCubicGrad)(M::TangentSpaceAtPoint, X)
+    # (37) in Agarwal et
+    return grad_f.X +
+           get_hessian(base_manifold(M), grad_f.mho, M.point, X) +
+           grad_f.σ * norm(base_manifold(M), M.point, X) * X
+end
+function (grad_f::Manopt.AdaptiveRegularizationCubicGrad)(M::TangentSpaceAtPoint, Y, X)
+    get_hessian!(base_manifold(M), Y, grad_f.mho, M.point, X)
+    Y .= Y + grad_f.X + grad_f.σ * norm(base_manifold(M), M.point, X) * X
+    return Y
+end
+function (c::StopWhenFirstOrderProgress)(
+    dmp::AbstractManoptProblem{<:TangentSpaceAtPoint},
+    ams::AbstractManoptSolverState,
+    i::Int,
+)
+    if (i == 0)
+        c.reason = ""
+        return false
+    end
+    #Update Gradient
+    TpM = get_manifold(dmp)
+    nG = norm(base_manifold(TpM), TpM.point, get_gradient(dmp, ams.p))
+    nX = norm(base_manifold(TpM), TpM.point, ams.p)
+    if (i > 0) && (nG <= c.θ * nX^2)
+        c.reason = "The algorithm has reduced the model grad norm by $(c.θ).\n"
+        return true
+    end
+    return false
+end
diff --git a/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl b/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl
@@ -9,7 +9,8 @@ import Manopt:
     alternating_gradient_descent,
     alternating_gradient_descent!,
     get_gradient,
-    get_gradient!
+    get_gradient!,
+    set_manopt_parameter!
 using LinearAlgebra: cholesky, det, diag, dot, Hermitian, qr, Symmetric, triu, I, Diagonal
 import ManifoldsBase: copy, mid_point, mid_point!
 
@@ -29,5 +30,5 @@ include("nonmutating_manifolds_functions.jl")
 include("artificialDataFunctionsManifolds.jl")
 include("ChambollePockManifolds.jl")
 include("alternating_gradient.jl")
-
+include("ARC_CG.jl")
 end
diff --git a/src/Manopt.jl b/src/Manopt.jl
@@ -15,7 +15,8 @@ using ColorTypes
 using Colors
 using DataStructures: CircularBuffer, capacity, length, push!, size
 using Dates: Millisecond, Nanosecond, Period, canonicalize, value
-using LinearAlgebra: Diagonal, I, eigen, eigvals, tril, Symmetric, dot, cholesky
+using LinearAlgebra:
+    Diagonal, I, eigen, eigvals, tril, Symmetric, dot, cholesky, eigmin, opnorm
 using ManifoldDiff:
     adjoint_Jacobi_field,
     adjoint_Jacobi_field!,
@@ -70,6 +71,7 @@ using ManifoldsBase:
     allocate,
     allocate_result,
     allocate_result_type,
+    base_manifold,
     copy,
     copyto!,
     default_inverse_retraction_method,
@@ -105,6 +107,7 @@ using ManifoldsBase:
     power_dimensions,
     project,
     project!,
+    rand!,
     representation_size,
     requires_caching,
     retract,
@@ -138,6 +141,7 @@ include("functions/manifold_functions.jl")
 # solvers general framework
 include("solvers/solver.jl")
 # specific solvers
+include("solvers/adaptive_regularization_with_cubics.jl")
 include("solvers/alternating_gradient_descent.jl")
 include("solvers/augmented_Lagrangian_method.jl")
 include("solvers/ChambollePock.jl")
@@ -227,6 +231,7 @@ export AbstractGradientSolverState,
     AbstractHessianSolverState,
     AbstractManoptSolverState,
     AbstractPrimalDualSolverState,
+    AdaptiveRegularizationState,
     AlternatingGradientDescentState,
     AugmentedLagrangianMethodState,
     ChambollePockState,
@@ -238,6 +243,7 @@ export AbstractGradientSolverState,
     ExactPenaltyMethodState,
     FrankWolfeState,
     GradientDescentState,
+    LanczosState,
     LevenbergMarquardtState,
     NelderMeadState,
     ParticleSwarmState,
@@ -314,8 +320,7 @@ export QuasiNewtonCautiousDirectionUpdate,
     BFGS, InverseBFGS, DFP, InverseDFP, SR1, InverseSR1
 export InverseBroyden, Broyden
 export AbstractQuasiNewtonDirectionUpdate, AbstractQuasiNewtonUpdateRule
-export WolfePowellLinesearch,
-    operator_to_matrix, square_matrix_vector_product, WolfePowellBinaryLinesearch
+export WolfePowellLinesearch, WolfePowellBinaryLinesearch
 export AbstractStateAction, StoreStateAction
 export has_storage, get_storage, update_storage!
 export objective_cache_factory
@@ -335,7 +340,9 @@ export DirectionUpdateRule,
     ConjugateGradientBealeRestart
 #
 # Solvers
-export alternating_gradient_descent,
+export adaptive_regularization_with_cubics,
+    adaptive_regularization_with_cubics!,
+    alternating_gradient_descent,
     alternating_gradient_descent!,
     augmented_Lagrangian_method,
     augmented_Lagrangian_method!,
@@ -381,6 +388,7 @@ export solve!
 export ApproxHessianFiniteDifference, ApproxHessianSymmetricRankOne, ApproxHessianBFGS
 export update_hessian!, update_hessian_basis!
 export ExactPenaltyCost, ExactPenaltyGrad, AugmentedLagrangianCost, AugmentedLagrangianGrad
+export AdaptiveRegularizationCubicCost, AdaptiveRegularizationCubicGrad
 #
 # Stepsize
 export Stepsize
@@ -395,12 +403,14 @@ export StopAfter,
     StopAfterIteration,
     StopWhenResidualIsReducedByFactorOrPower,
     StopWhenAll,
+    StopWhenAllLanczosVectorsUsed,
     StopWhenAny,
     StopWhenChangeLess,
     StopWhenCostLess,
     StopWhenCurvatureIsNegative,
     StopWhenGradientChangeLess,
     StopWhenGradientNormLess,
+    StopWhenFirstOrderProgress,
     StopWhenModelIncreased,
     StopWhenPopulationConcentrated,
     StopWhenSmallerOrEqual,
@@ -480,7 +490,7 @@ export DebugDualBaseChange, DebugDualBaseIterate, DebugDualChange, DebugDualIter
 export DebugDualResidual, DebugPrimalDualResidual, DebugPrimalResidual
 export DebugProximalParameter, DebugWarnIfCostIncreases
 export DebugGradient, DebugGradientNorm, DebugStepsize
-export DebugWhenActive
+export DebugWhenActive, DebugWarnIfFieldNotFinite, DebugIfEntry
 export DebugWarnIfCostNotFinite, DebugWarnIfFieldNotFinite, DebugMessages
 #
 # Records - and access functions