diff --git a/Project.toml b/Project.toml index 7b45199f35..dd7837d271 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.33" +version = "0.4.34" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" @@ -40,8 +40,8 @@ Colors = "0.11.2, 0.12" DataStructures = "0.17, 0.18" LRUCache = "1.4" ManifoldDiff = "0.2, 0.3.3" -Manifolds = "0.8.69" -ManifoldsBase = "0.14.4" +Manifolds = "0.8.75" +ManifoldsBase = "0.14.10" PolynomialRoots = "1" Requires = "0.5, 1" julia = "1.6" diff --git a/docs/Project.toml b/docs/Project.toml index 2de77afbe2..4c1e7b5828 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -23,5 +23,5 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" BenchmarkTools = "1.3" CondaPkg = "0.2" Documenter = "0.27" -Manifolds = "0.8.27" +Manifolds = "0.8.75" ManifoldsBase = "0.13, 0.14" diff --git a/docs/make.jl b/docs/make.jl index dccda1527b..60675a5ca5 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -61,7 +61,29 @@ makedocs( format=Documenter.HTML(; mathengine=MathJax3(), prettyurls=get(ENV, "CI", nothing) == "true" ), - modules=[Manopt], + modules=[ + Manopt, + if isdefined(Base, :get_extension) + Base.get_extension(Manopt, :ManoptLineSearchesExt) + else + Manopt.ManoptLineSearchesExt + end, + if isdefined(Base, :get_extension) + Base.get_extension(Manopt, :ManoptLRUCacheExt) + else + Manopt.ManoptLRUCacheExt + end, + if isdefined(Base, :get_extension) + Base.get_extension(Manopt, :ManoptManifoldsExt) + else + Manopt.ManoptManifoldsExt + end, + if isdefined(Base, :get_extension) + Base.get_extension(Manopt, :ManoptPlotsExt) + else + Manopt.ManoptPlotsExt + end, + ], authors="Ronny Bergmann and contributors.", sitename="Manopt.jl", strict=[ @@ -86,8 +108,9 @@ makedocs( "Get started: Optimize!" => "tutorials/Optimize!.md", "Speedup using Inplace computations" => "tutorials/InplaceGradient.md", "Use Automatic Differentiation" => "tutorials/AutomaticDifferentiation.md", + "Define Objectives in the Embedding" => "tutorials/EmbeddingObjectives.md", "Count and use a Cache" => "tutorials/CountAndCache.md", - "Perform Debug Output" => "tutorials/HowToDebug.md", + "Print Debug Output" => "tutorials/HowToDebug.md", "Record values" => "tutorials/HowToRecord.md", "Implement a Solver" => "tutorials/ImplementASolver.md", "Do Contrained Optimization" => "tutorials/ConstrainedOptimization.md", diff --git a/docs/src/extensions.md b/docs/src/extensions.md index 77bd37e57a..7906b64c5f 100644 --- a/docs/src/extensions.md +++ b/docs/src/extensions.md @@ -45,6 +45,11 @@ x_opt = quasi_Newton( ) ``` +### Manifolds.jl + ```@docs Manopt.LineSearchesStepsize +mid_point +Manopt.max_stepsize(::TangentBundle, ::Any) +Manopt.max_stepsize(::FixedRankMatrices, ::Any) ``` diff --git a/docs/src/plans/objective.md b/docs/src/plans/objective.md index 2253391259..1ba78470c9 100644 --- a/docs/src/plans/objective.md +++ b/docs/src/plans/objective.md @@ -20,23 +20,83 @@ InplaceEvaluation evaluation_type ``` -It sometimes might be nice to set certain parameters within -## Cost Objective +## Decorators for Objectives + +An objective can be decorated using the following trait and function to initialize + +```@docs +dispatch_objective_decorator +is_objective_decorator +decorate_objective! +``` + +### [Embedded Objectives](@id ManifoldEmbeddedObjective) + +```@docs +EmbeddedManifoldObjective +``` + +### [Cache Objective](@id CacheSection) + +Since single function calls, e.g. to the cost or the gradient, might be expensive, +a simple cache objective exists as a decorator, that caches one cost value or gradient. + +It can be activated/used with the `cache=` keyword argument available for every solver. + +```@docs +Manopt.reset_counters! +Manopt.objective_cache_factory +``` + +#### A simple cache + +A first generic cache is always available, but it only caches one gradient and one cost function evaluation (for the same point). + +```@docs +SimpleManifoldCachedObjective +``` + +#### A Generic Cache + +For the more advanced cache, you need to implement some type of cache yourself, that provides a `get!` +and implement [`init_caches`](@ref). +This is for example provided if you load [`LRUCache.jl`](https://github.com/JuliaCollections/LRUCache.jl). Then you obtain + +```@docs +ManifoldCachedObjective +init_caches +``` + +### [Count Objective](@id ManifoldCountObjective) + +```@docs +ManifoldCountObjective +``` + +### Internal Decorators + +```@docs +ReturnManifoldObjective +``` + +## Specific Objective typed and their access functions + +### Cost Objective ```@docs AbstractManifoldCostObjective ManifoldCostObjective ``` -### Access functions +#### Access functions ```@docs get_cost get_cost_function ``` -## Gradient Objectives +### Gradient Objectives ```@docs AbstractManifoldGradientObjective @@ -52,7 +112,7 @@ There is also a second variant, if just one function is responsible for computin ManifoldCostGradientObjective ``` -### Access functions +#### Access functions ```@docs get_gradient @@ -60,45 +120,45 @@ get_gradients get_gradient_function ``` -## Subgradient Objective +### Subgradient Objective ```@docs ManifoldSubgradientObjective ``` -### Access Functions +#### Access Functions ```@docs get_subgradient ``` -## Proximal Map Objective +### Proximal Map Objective ```@docs ManifoldProximalMapObjective ``` -### Access Functions +#### Access Functions ```@docs get_proximal_map ``` -## Hessian Objective +### Hessian Objective ```@docs ManifoldHessianObjective ``` -### Access functions +#### Access functions ```@docs get_hessian get_preconditioner ``` -## Primal-Dual based Objetives +### Primal-Dual based Objectives ```@docs AbstractPrimalDualManifoldObjective @@ -106,7 +166,7 @@ PrimalDualManifoldObjective PrimalDualManifoldSemismoothNewtonObjective ``` -### Access functions +#### Access functions ```@docs adjoint_linearized_operator @@ -118,7 +178,7 @@ get_primal_prox linearized_forward_operator ``` -## Constrained Objective +### Constrained Objective Besides the [`AbstractEvaluationType`](@ref) there is one further property to distinguish among constraint functions, especially the gradients of the constraints. @@ -135,7 +195,7 @@ The [`ConstraintType`](@ref) is a parameter of the corresponding Objective. ConstrainedManifoldObjective ``` -### Access functions +#### Access functions ```@docs get_constraints @@ -152,50 +212,3 @@ get_grad_inequality_constraint! get_grad_inequality_constraints get_grad_inequality_constraints! ``` - -## Decorators for AbstractManoptSolverState - -An objective can be decorated using the following trait and function to initialize - -```@docs -dispatch_objective_decorator -is_objective_decorator -decorate_objective! -``` - -### [Cache Objective](@id CacheSection) - -Since single function calls, e.g. to the cost or the gradient, might be expensive, -a simple cache objective exists as a decorator, that caches one cost value or gradient. - -It can be activated/used with the `cache=` keyword argument available for every solver. - -```@docs -Manopt.reset_counters! -Manopt.objective_cache_factory -``` - -#### A simple cache - -A first generic cache is always available, but it only caches one gradient and one cost function evaluation (for the same point). - -```@docs -SimpleManifoldCachedObjective -``` - -#### A Generic Cache - -For the more advanced cache, you need to implement some type of cache yourself, that provides a `get!` -and implement [`init_caches`](@ref). -This is for example provided if you load [`LRUCache.jl`](https://github.com/JuliaCollections/LRUCache.jl). Then you obtain - -```@docs -ManifoldCachedObjective -init_caches -``` - -### [Count Objective](@id ManifoldCountObjective) - -```@docs -ManifoldCountObjective -``` \ No newline at end of file diff --git a/docs/src/plans/state.md b/docs/src/plans/state.md index ec2d503205..29cb603e29 100644 --- a/docs/src/plans/state.md +++ b/docs/src/plans/state.md @@ -25,7 +25,7 @@ This might be useful to continue investigation at the current iterate, or to set ```@docs get_iterate set_iterate! -get_gradient(::AbstractManifoldGradientObjective) +get_gradient(s::AbstractManoptSolverState) set_gradient! ``` diff --git a/docs/src/references.bib b/docs/src/references.bib index 5f7655f7ca..93e54c4971 100644 --- a/docs/src/references.bib +++ b/docs/src/references.bib @@ -501,6 +501,20 @@ @article{LiuStorey:1991 % --- N % % +@article{Nguyen:2023, + DOI = {10.1007/s10957-023-02242-z}, + EPRINT = {2009.10159}, + EPRINTTYPE = {arXiv}, + YEAR = {2023}, + MONTH = jun, + PUBLISHER = {Springer Science and Business Media {LLC}}, + VOLUME = {198}, + NUMBER = {1}, + PAGES = {135--164}, + AUTHOR = {Du Nguyen}, + TITLE = {Operator-Valued Formulas for Riemannian Gradient and Hessian and Families of Tractable Metrics in Riemannian Optimization}, + JOURNAL = {Journal of Optimization Theory and Applications} +} @book{NocedalWright:2006, ADDRESS = {New York}, AUTHOR = {Nocedal, Jorge and Wright, Steven J.}, diff --git a/src/Manopt.jl b/src/Manopt.jl index 828c235e40..7e59fc8f42 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -7,8 +7,10 @@ * 🎯 Issues: [github.com/JuliaManifolds/Manopt.jl/issues](https://github.com/JuliaManifolds/Manopt.jl/issues) """ module Manopt + import Base: &, copy, getindex, identity, setindex!, show, | import LinearAlgebra: reflect! +import ManifoldsBase: embed! using ColorSchemes using ColorTypes @@ -45,7 +47,11 @@ using ManifoldDiff: differential_shortest_geodesic_startpoint, differential_shortest_geodesic_startpoint!, jacobi_field, - jacobi_field! + jacobi_field!, + riemannian_gradient, + riemannian_gradient!, + riemannian_Hessian, + riemannian_Hessian! using ManifoldsBase: AbstractBasis, AbstractDecoratorManifold, @@ -78,6 +84,7 @@ using ManifoldsBase: default_retraction_method, default_vector_transport_method, distance, + embed, embed_project, embed_project!, exp, @@ -87,6 +94,7 @@ using ManifoldsBase: get_component, get_coordinates, get_coordinates!, + get_embedding, get_iterator, get_vector, get_vector!, @@ -206,6 +214,7 @@ export AbstractDecoratedManifoldObjective, AbstractManifoldObjective, AbstractPrimalDualManifoldObjective, ConstrainedManifoldObjective, + EmbeddedManifoldObjective, ManifoldCountObjective, NonlinearLeastSquaresObjective, ManifoldAlternatingGradientObjective, diff --git a/src/plans/constrained_plan.jl b/src/plans/constrained_plan.jl index ca569a8285..5da627ee2a 100644 --- a/src/plans/constrained_plan.jl +++ b/src/plans/constrained_plan.jl @@ -515,9 +515,9 @@ eevaluate all gradients of the equality constraints ``\operatorname{grad} h(x)`` of the [`ConstrainedManifoldObjective`](@ref) `P` at `p`. !!! note - for the [`InplaceEvaluation`](@ref) and [`FunctionConstraint`](@ref) variant of the problem, - this function currently also calls [`get_equality_constraints`](@ref), - since this is the only way to determine the number of cconstraints. + For the [`InplaceEvaluation`](@ref) and [`FunctionConstraint`](@ref) variant of the problem, + this function currently also calls [`get_equality_constraints`](@ref), + since this is the only way to determine the number of cconstraints. """ get_grad_equality_constraints(M::AbstractManifold, co::ConstrainedManifoldObjective, p) function get_grad_equality_constraints( diff --git a/src/plans/embedded_objective.jl b/src/plans/embedded_objective.jl new file mode 100644 index 0000000000..e77cca972b --- /dev/null +++ b/src/plans/embedded_objective.jl @@ -0,0 +1,364 @@ +@doc raw""" + EmbeddedManifoldObjective{P, T, E, O2, O1<:AbstractManifoldObjective{E}} <: + AbstractDecoratedManifoldObjective{O2, O1} + +Declare an objective to be defined in the embedding. +This also declares the gradient to be defined in the embedding, +and especially being the Riesz representer with respect to the metric in the embedding. +The types can be used to still dispatch on also the undecorated objective type `O2`. + +# Fields +* `objective` – the objective that is defined in the embedding +* `p` - (`nothing`) a point in the embedding. +* `X` - (`nothing`) a tangent vector in the embedding + +When a point in the embedding `p` is provided, `embed!` is used in place of this point to reduce +memory allocations. Similarly `X` is used when embedding tangent vectors +""" +struct EmbeddedManifoldObjective{P,T,E,O2,O1<:AbstractManifoldObjective{E}} <: + AbstractDecoratedManifoldObjective{E,O2} + objective::O1 + p::P + X::T +end +function EmbeddedManifoldObjective( + o::O, p::P=missing, X::T=missing +) where {P,T,E<:AbstractEvaluationType,O<:AbstractManifoldObjective{E}} + return EmbeddedManifoldObjective{P,T,E,O,O}(o, p, X) +end +function EmbeddedManifoldObjective( + o::O1, p::P=missing, X::T=missing +) where { + P, + T, + E<:AbstractEvaluationType, + O2<:AbstractManifoldObjective, + O1<:AbstractDecoratedManifoldObjective{E,O2}, +} + return EmbeddedManifoldObjective{P,T,E,O2,O1}(o, p, X) +end +function EmbeddedManifoldObjective( + M::AbstractManifold, + o::O; + q=rand(M), + p::P=embed(M, q), + X::T=embed(M, q, rand(M; vector_at=q)), +) where {P,T,O<:AbstractManifoldObjective} + return EmbeddedManifoldObjective(o, p, X) +end + +# dispatch whether to do this in place or not +function local_embed!(M::AbstractManifold, ::EmbeddedManifoldObjective{Missing}, p) + return embed(M, p) +end +function local_embed!(M::AbstractManifold, emo::EmbeddedManifoldObjective{P}, p) where {P} + embed!(M, emo.p, p) + return emo.p +end + +@doc raw""" + get_cost(M::AbstractManifold,emo::EmbeddedManifoldObjective, p) + +Evaluate the cost function of an objective defined in the embedding, i.e. embed `p` +before calling the cost function stored in the [`EmbeddedManifoldObjective`](@ref Manopt.EmbeddedManifoldObjective). +""" +function get_cost(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + q = local_embed!(M, emo, p) + return get_cost(get_embedding(M), emo.objective, q) +end + +@doc raw""" + get_gradient(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + get_gradient!(M::AbstractManifold, X, emo::EmbeddedManifoldObjective, p) + +Evaluate the gradient function of an objective defined in the embedding, that is embed `p` +before calling the gradient function stored in the [`EmbeddedManifoldObjective`](@ref). + +The returned gradient is then converted to a Riemannian gradient calling +[`riemannian_gradient`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library.html#ManifoldDiff.riemannian_gradient-Tuple{AbstractManifold,%20Any,%20Any}). +""" +function get_gradient( + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,Missing}, p +) where {P} + q = local_embed!(M, emo, p) + return riemannian_gradient(M, p, get_gradient(get_embedding(M), emo.objective, q)) +end +function get_gradient( + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,T}, p +) where {P,T} + q = local_embed!(M, emo, p) + get_gradient!(get_embedding(M), emo.X, emo.objective, q) + return riemannian_gradient(M, p, emo.X) +end +function get_gradient!( + M::AbstractManifold, X, emo::EmbeddedManifoldObjective{P,Missing}, p +) where {P} + q = local_embed!(M, emo, p) + riemannian_gradient!(M, X, p, get_gradient(get_embedding(M), emo.objective, q)) + return X +end +function get_gradient!( + M::AbstractManifold, X, emo::EmbeddedManifoldObjective{P,T}, p +) where {P,T} + q = local_embed!(M, emo, p) + get_gradient!(get_embedding(M), emo.X, emo.objective, q) + riemannian_gradient!(M, X, p, emo.X) + return X +end +# +# Hessian +# +@doc raw""" + get_hessian(M::AbstractManifold, emo::EmbeddedManifoldObjective, p, X) + get_hessian!(M::AbstractManifold, Y, emo::EmbeddedManifoldObjective, p, X) + +Evaluate the Hessian of an objective defined in the embedding, that is embed `p` and `X` +before calling the Hessiand function stored in the [`EmbeddedManifoldObjective`](@ref). + +The returned Hessian is then converted to a Riemannian Hessian calling + [`riemannian_Hessian`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library/#ManifoldDiff.riemannian_Hessian-Tuple{AbstractManifold,%20Any,%20Any,%20Any,%20Any}). +""" +function get_hessian( + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,Missing}, p, X +) where {P} + q = local_embed!(M, emo, p) + return riemannian_Hessian( + M, + p, + get_gradient(get_embedding(M), emo.objective, q), + get_hessian(get_embedding(M), emo.objective, q, embed(M, p, X)), + X, + ) +end +function get_hessian( + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,T}, p, X +) where {P,T} + q = local_embed!(M, emo, p) + get_gradient!(get_embedding(M), emo.X, emo.objective, embed(M, p)) + return riemannian_Hessian( + M, p, emo.X, get_hessian(get_embedding(M), emo.objective, q, embed(M, p, X)), X + ) +end +function get_hessian!( + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,Missing}, p, X +) where {P} + q = local_embed!(M, emo, p) + riemannian_Hessian!( + M, + Y, + p, + get_gradient(get_embedding(M), emo.objective, q), + get_hessian(get_embedding(M), emo.objective, q, embed(M, p, X)), + X, + ) + return Y +end +function get_hessian!( + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,T}, p, X +) where {P,T} + q = local_embed!(M, emo, p) + get_gradient!(get_embedding(M), emo.X, emo.objective, embed(M, p)) + riemannian_Hessian!( + M, Y, p, emo.X, get_hessian(get_embedding(M), emo.objective, q, embed(M, p, X)), X + ) + return Y +end +# +# Constraints +# +""" + get_constraints(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + +Return the vector ``(g_1(p),...g_m(p),h_1(p),...,h_n(p))`` defined in the embedding, that is embed `p` +before calling the constraint function(s) stored in the [`EmbeddedManifoldObjective`](@ref). +""" +function get_constraints(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + q = local_embed!(M, emo, p) + return [ + get_inequality_constraints(M, emo.objective, q), + get_equality_constraints(M, emo.objective, q), + ] +end +@doc raw""" + get_equality_constraints(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + +Evaluate all equality constraints ``h(p)`` of ``\bigl(h_1(p), h_2(p),\ldots,h_p(p)\bigr)`` +defined in the embedding, that is embed `p` +before calling the constraint function(s) stored in the [`EmbeddedManifoldObjective`](@ref). +""" +function get_equality_constraints(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + q = local_embed!(M, emo, p) + return get_equality_constraints(M, emo.objective, q) +end +@doc raw""" + get_equality_constraint(M::AbstractManifold, emo::EmbeddedManifoldObjective, p, j) + +evaluate the `j`s equality constraint ``h_j(p)`` defined in the embedding, that is embed `p` +before calling the constraint function(s) stored in the [`EmbeddedManifoldObjective`](@ref). +""" +function get_equality_constraint(M::AbstractManifold, emo::EmbeddedManifoldObjective, p, j) + q = local_embed!(M, emo, p) + return get_equality_constraint(M, emo.objective, q, j) +end +@doc raw""" + get_inequality_constraints(M::AbstractManifold, ems::EmbeddedManifoldObjective, p) + +Evaluate all inequality constraints ``g(p)`` of ``\bigl(g_1(p), g_2(p),\ldots,g_m(p)\bigr)`` +defined in the embedding, that is embed `p` +before calling the constraint function(s) stored in the [`EmbeddedManifoldObjective`](@ref). +""" +function get_inequality_constraints(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + q = local_embed!(M, emo, p) + return get_inequality_constraints(M, emo.objective, q) +end + +@doc raw""" + get_inequality_constraint(M::AbstractManifold, ems::EmbeddedManifoldObjective, p, i) + +Evaluate the `i`s inequality constraint ``g_i(p)`` defined in the embedding, that is embed `p` +before calling the constraint function(s) stored in the [`EmbeddedManifoldObjective`](@ref). +""" +function get_inequality_constraint( + M::AbstractManifold, emo::EmbeddedManifoldObjective, p, i +) + q = local_embed!(M, emo, p) + return get_inequality_constraint(M, emo.objective, q, i) +end +@doc raw""" + X = get_grad_equality_constraint(M::AbstractManifold, emo::EmbeddedManifoldObjective, p, j) + get_grad_equality_constraint!(M::AbstractManifold, X, emo::EmbeddedManifoldObjective, p, j) + +evaluate the gradient of the `j`th equality constraint ``\operatorname{grad} h_j(p)`` defined in the embedding, that is embed `p` +before calling the gradient function stored in the [`EmbeddedManifoldObjective`](@ref). + +The returned gradient is then converted to a Riemannian gradient calling +[`riemannian_gradient`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library.html#ManifoldDiff.riemannian_gradient-Tuple{AbstractManifold,%20Any,%20Any}). +""" +function get_grad_equality_constraint( + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,Missing}, p, j +) where {P} + q = local_embed!(M, emo, p) + Z = get_grad_equality_constraint(get_embedding(M), emo.objective, q, j) + return riemannian_gradient(M, p, Z) +end +function get_grad_equality_constraint( + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,T}, p, j +) where {P,T} + q = local_embed!(M, emo, p) + get_grad_equality_constraint!(get_embedding(M), emo.X, emo.objective, q, j) + return riemannian_gradient(M, p, emo.X) +end +function get_grad_equality_constraint!( + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,Missing}, p, j +) where {P} + q = local_embed!(M, emo, p) + Z = get_grad_equality_constraint(get_embedding(M), emo.objective, q, j) + riemannian_gradient!(M, Y, p, Z) + return Y +end +function get_grad_equality_constraint!( + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,T}, p, j +) where {P,T} + q = local_embed!(M, emo, p) + get_grad_equality_constraint!(get_embedding(M), emo.X, emo.objective, q, j) + riemannian_gradient!(M, Y, p, emo.X) + return Y +end +@doc raw""" + X = get_grad_equality_constraints(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + get_grad_equality_constraints!(M::AbstractManifold, X, emo::EmbeddedManifoldObjective, p) + +evaluate the gradients of the the equality constraints ``\operatorname{grad} h(p)`` defined in the embedding, that is embed `p` +before calling the gradient function stored in the [`EmbeddedManifoldObjective`](@ref). + +The returned gradients are then converted to a Riemannian gradient calling +[`riemannian_gradient`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library.html#ManifoldDiff.riemannian_gradient-Tuple{AbstractManifold,%20Any,%20Any}). +""" +function get_grad_equality_constraints( + M::AbstractManifold, emo::EmbeddedManifoldObjective, p +) + q = local_embed!(M, emo, p) + Z = get_grad_equality_constraints(get_embedding(M), emo.objective, q) + return [riemannian_gradient(M, p, Zj) for Zj in Z] +end +function get_grad_equality_constraints!( + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective, p +) + q = local_embed!(M, emo, p) + Z = get_grad_equality_constraints(get_embedding(M), emo.objective, q) + for (Yj, Zj) in zip(Y, Z) + riemannian_gradient!(M, Yj, p, Zj) + end + return Y +end +@doc raw""" + X = get_grad_inequality_constraint(M::AbstractManifold, emo::EmbeddedManifoldObjective, p, i) + get_grad_inequality_constraint!(M::AbstractManifold, X, emo::EmbeddedManifoldObjective, p, i) + +evaluate the gradient of the `i`th inequality constraint ``\operatorname{grad} g_i(p)`` defined in the embedding, that is embed `p` +before calling the gradient function stored in the [`EmbeddedManifoldObjective`](@ref). + +The returned gradient is then converted to a Riemannian gradient calling +[`riemannian_gradient`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library.html#ManifoldDiff.riemannian_gradient-Tuple{AbstractManifold,%20Any,%20Any}). +""" +function get_grad_inequality_constraint( + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,Missing}, p, j +) where {P} + q = local_embed!(M, emo, p) + Z = get_grad_inequality_constraint(get_embedding(M), emo.objective, q, j) + return riemannian_gradient(M, p, Z) +end +function get_grad_inequality_constraint( + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,T}, p, j +) where {P,T} + q = local_embed!(M, emo, p) + get_grad_inequality_constraint!(get_embedding(M), emo.X, emo.objective, q, j) + return riemannian_gradient(M, p, emo.X) +end +function get_grad_inequality_constraint!( + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,Missing}, p, j +) where {P} + q = local_embed!(M, emo, p) + Z = get_grad_inequality_constraint(get_embedding(M), emo.objective, q, j) + riemannian_gradient!(M, Y, p, Z) + return Y +end +function get_grad_inequality_constraint!( + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,T}, p, j +) where {P,T} + q = local_embed!(M, emo, p) + get_grad_inequality_constraint!(get_embedding(M), emo.X, emo.objective, q, j) + riemannian_gradient!(M, Y, p, emo.X) + return Y +end +@doc raw""" + X = get_grad_inequality_constraints(M::AbstractManifold, emo::EmbeddedManifoldObjective, p) + get_grad_inequality_constraints!(M::AbstractManifold, X, emo::EmbeddedManifoldObjective, p) + +evaluate the gradients of the the inequality constraints ``\operatorname{grad} g(p)`` defined in the embedding, that is embed `p` +before calling the gradient function stored in the [`EmbeddedManifoldObjective`](@ref). + +The returned gradients are then converted to a Riemannian gradient calling +[`riemannian_gradient`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library.html#ManifoldDiff.riemannian_gradient-Tuple{AbstractManifold,%20Any,%20Any}). +""" +function get_grad_inequality_constraints( + M::AbstractManifold, emo::EmbeddedManifoldObjective, p +) + q = local_embed!(M, emo, p) + Z = get_grad_inequality_constraints(get_embedding(M), emo.objective, q) + return [riemannian_gradient(M, p, Zj) for Zj in Z] +end +function get_grad_inequality_constraints!( + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective, p +) + q = local_embed!(M, emo, p) + Z = get_grad_inequality_constraints(get_embedding(M), emo.objective, q) + for (Yj, Zj) in zip(Y, Z) + riemannian_gradient!(M, Yj, p, Zj) + end + return Y +end + +function show(io::IO, emo::EmbeddedManifoldObjective{P,T}) where {P,T} + return print(io, "EmbeddedManifoldObjective{$P,$T} of an $(emo.objective)") +end diff --git a/src/plans/objective.jl b/src/plans/objective.jl index 15ea27b3b5..6a90a798fa 100644 --- a/src/plans/objective.jl +++ b/src/plans/objective.jl @@ -47,9 +47,18 @@ do not allocate memory but work on their input, i.e. in place. """ struct InplaceEvaluation <: AbstractEvaluationType end -struct ReturnManifoldObjective{E,P,O<:AbstractManifoldObjective{E}} <: - AbstractDecoratedManifoldObjective{E,P} - objective::O +@doc raw""" + ReturnManifoldObjective{E,O2,O1<:AbstractManifoldObjective{E}} <: + AbstractDecoratedManifoldObjective{E,O2} + +A wrapper to indicate that `get_solver_result` should return the inner objetcive. + +The types are such that one can still dispatch on the undecorated type `O2` of the +original objective as well. +""" +struct ReturnManifoldObjective{E,O2,O1<:AbstractManifoldObjective{E}} <: + AbstractDecoratedManifoldObjective{E,O2} + objective::O1 end function ReturnManifoldObjective( o::O @@ -57,13 +66,13 @@ function ReturnManifoldObjective( return ReturnManifoldObjective{E,O,O}(o) end function ReturnManifoldObjective( - o::O + o::O1 ) where { E<:AbstractEvaluationType, - P<:AbstractManifoldObjective, - O<:AbstractDecoratedManifoldObjective{E,P}, + O2<:AbstractManifoldObjective, + O1<:AbstractDecoratedManifoldObjective{E,O2}, } - return ReturnManifoldObjective{E,P,O}(o) + return ReturnManifoldObjective{E,O2,O1}(o) end """ @@ -146,7 +155,7 @@ function show(io::IO, t::Tuple{<:AbstractManifoldObjective,P}) where {P} ) end -function status_summary(o::AbstractManifoldObjective{E}) where {E} +function status_summary(::AbstractManifoldObjective{E}) where {E} return "" end # Default undecorate for summary diff --git a/src/plans/plan.jl b/src/plans/plan.jl index c52bbbf362..9cee213cdb 100644 --- a/src/plans/plan.jl +++ b/src/plans/plan.jl @@ -36,6 +36,7 @@ include("gradient_plan.jl") include("hessian_plan.jl") include("proximal_plan.jl") include("subgradient_plan.jl") + include("constrained_plan.jl") include("adabtive_regularization_with_cubics_plan.jl") @@ -53,6 +54,7 @@ include("higher_order_primal_dual_plan.jl") include("stochastic_gradient_plan.jl") +include("embedded_objective.jl") include("subsolver_plan.jl") include("cache.jl") diff --git a/src/solvers/FrankWolfe.jl b/src/solvers/FrankWolfe.jl index 219d66fe4f..e7a211b726 100644 --- a/src/solvers/FrankWolfe.jl +++ b/src/solvers/FrankWolfe.jl @@ -251,6 +251,7 @@ function Frank_Wolfe_method!( p; initial_vector=zero_vector(M, p), evaluation=AllocatingEvaluation(), + objective_type=:Riemannian, retraction_method=default_retraction_method(M, typeof(p)), stepsize::TStep=default_stepsize(M, FrankWolfeState), stopping_criterion::TStop=StopAfterIteration(200) | @@ -258,9 +259,12 @@ function Frank_Wolfe_method!( StopWhenChangeLess(1.0e-8), sub_cost=FrankWolfeCost(p, initial_vector), sub_grad=FrankWolfeGradient(p, initial_vector), - sub_objective=ManifoldGradientObjective(sub_cost, sub_grad), - sub_problem=DefaultManoptProblem(M, sub_objective), sub_kwargs=[], + sub_objective=ManifoldGradientObjective(sub_cost, sub_grad), + sub_problem=DefaultManoptProblem( + M, + decorate_objective!(M, sub_objective; objective_type=objective_type, sub_kwargs...), + ), sub_stopping_criterion=StopAfterIteration(300) | StopWhenStepsizeLess(1e-8), sub_state::Union{AbstractManoptSolverState,AbstractEvaluationType}=if sub_problem isa Function @@ -275,6 +279,7 @@ function Frank_Wolfe_method!( M, GradientDescentState; retraction_method=retraction_method ), ); + objective_type=objective_type, sub_kwargs..., ) end, @@ -284,7 +289,7 @@ function Frank_Wolfe_method!( TStep<:Stepsize, O<:Union{ManifoldGradientObjective,AbstractDecoratedManifoldObjective}, } - dmgo = decorate_objective!(M, mgo; kwargs...) + dmgo = decorate_objective!(M, mgo; objective_type=objective_type, kwargs...) dmp = DefaultManoptProblem(M, dmgo) fws = FrankWolfeState( M, diff --git a/src/solvers/adaptive_regularization_with_cubics.jl b/src/solvers/adaptive_regularization_with_cubics.jl index 29fe76b061..91dcaaf06f 100644 --- a/src/solvers/adaptive_regularization_with_cubics.jl +++ b/src/solvers/adaptive_regularization_with_cubics.jl @@ -377,6 +377,7 @@ function adaptive_regularization_with_cubics!( evaluation::AbstractEvaluationType=AllocatingEvaluation(), initial_tangent_vector::T=zero_vector(M, p), maxIterLanczos=min(300, manifold_dimension(M)), + objective_type=:Riemannian, ρ_regularization::R=1e3, retraction_method::AbstractRetractionMethod=default_retraction_method(M), σmin::R=1e-10, @@ -386,6 +387,7 @@ function adaptive_regularization_with_cubics!( γ1::R=0.1, γ2::R=2.0, θ::R=0.5, + sub_kwargs=[], sub_stopping_criterion::StoppingCriterion=StopAfterIteration(maxIterLanczos) | StopWhenFirstOrderProgress(θ), sub_state::Union{<:AbstractManoptSolverState,<:AbstractEvaluationType}=LanczosState( @@ -396,7 +398,7 @@ function adaptive_regularization_with_cubics!( θ=θ, stopping_criterion=sub_stopping_criterion, ), - sub_objective=mho, + sub_objective=decorate_objective!(M, mho; objective_type=objective_type, sub_kwargs...), sub_problem=DefaultManoptProblem(M, sub_objective), stopping_criterion::StoppingCriterion=if sub_state isa LanczosState StopAfterIteration(40) | @@ -408,7 +410,7 @@ function adaptive_regularization_with_cubics!( kwargs..., ) where {T,R,O<:Union{ManifoldHessianObjective,AbstractDecoratedManifoldObjective}} X = copy(M, p, initial_tangent_vector) - dmho = decorate_objective!(M, mho; kwargs...) + dmho = decorate_objective!(M, mho; objective_type=objective_type, kwargs...) dmp = DefaultManoptProblem(M, dmho) arcs = AdaptiveRegularizationState( M, diff --git a/src/solvers/augmented_Lagrangian_method.jl b/src/solvers/augmented_Lagrangian_method.jl index 661839ccdd..eb276fc14a 100644 --- a/src/solvers/augmented_Lagrangian_method.jl +++ b/src/solvers/augmented_Lagrangian_method.jl @@ -327,6 +327,7 @@ function augmented_Lagrangian_method!( τ::Real=0.8, ρ::Real=1.0, θ_ρ::Real=0.3, + objective_type=:Riemannian, sub_cost=AugmentedLagrangianCost(cmo, ρ, μ, λ), sub_grad=AugmentedLagrangianGrad(cmo, ρ, μ, λ), sub_kwargs=[], @@ -347,7 +348,14 @@ function augmented_Lagrangian_method!( sub_kwargs..., ), sub_problem::AbstractManoptProblem=DefaultManoptProblem( - M, ManifoldGradientObjective(sub_cost, sub_grad; evaluation=evaluation) + M, + # pass down objective type to subsolvers + decorate_objective!( + M, + ManifoldGradientObjective(sub_cost, sub_grad; evaluation=evaluation); + objective_type=objective_type, + sub_kwargs..., + ), ), stopping_criterion::StoppingCriterion=StopAfterIteration(300) | ( StopWhenSmallerOrEqual(:ϵ, ϵ_min) & StopWhenChangeLess(1e-10) @@ -373,7 +381,7 @@ function augmented_Lagrangian_method!( θ_ϵ=θ_ϵ, stopping_criterion=stopping_criterion, ) - dcmo = decorate_objective!(M, cmo; kwargs...) + dcmo = decorate_objective!(M, cmo; objective_type=objective_type, kwargs...) mp = DefaultManoptProblem(M, dcmo) alms = decorate_state!(alms; kwargs...) solve!(mp, alms) diff --git a/src/solvers/difference-of-convex-proximal-point.jl b/src/solvers/difference-of-convex-proximal-point.jl index 875f74bb1b..a47ba8fcc3 100644 --- a/src/solvers/difference-of-convex-proximal-point.jl +++ b/src/solvers/difference-of-convex-proximal-point.jl @@ -310,6 +310,7 @@ function difference_of_convex_proximal_point!( λ=i -> 1 / 2, evaluation::AbstractEvaluationType=AllocatingEvaluation(), inverse_retraction_method=default_inverse_retraction_method(M), + objective_type=:Riemannian, retraction_method=default_retraction_method(M), stepsize=ConstantStepsize(M), stopping_criterion=if isnothing(get_gradient_function(mdcpo)) @@ -338,6 +339,7 @@ function difference_of_convex_proximal_point!( sub_cost, sub_grad, sub_hess; evaluation=evaluation ) end; + objective_type=objective_type, sub_kwargs..., ) end, @@ -383,7 +385,7 @@ function difference_of_convex_proximal_point!( """, ) end - dmdcpo = decorate_objective!(M, mdcpo; kwargs...) + dmdcpo = decorate_objective!(M, mdcpo; objective_type=objective_type, kwargs...) dmp = DefaultManoptProblem(M, dmdcpo) dcps = DifferenceOfConvexProximalState( M, diff --git a/src/solvers/difference_of_convex_algorithm.jl b/src/solvers/difference_of_convex_algorithm.jl index 83b4242c10..0309e80b6d 100644 --- a/src/solvers/difference_of_convex_algorithm.jl +++ b/src/solvers/difference_of_convex_algorithm.jl @@ -280,6 +280,7 @@ function difference_of_convex_algorithm!( grad_g=nothing, gradient=nothing, initial_vector=zero_vector(M, p), + objective_type=:Riemannian, stopping_criterion=if isnothing(gradient) StopAfterIteration(300) | StopWhenChangeLess(1e-9) else @@ -313,6 +314,7 @@ function difference_of_convex_algorithm!( sub_cost, sub_grad, sub_hess; evaluation=evaluation ) end; + objective_type=objective_type, sub_kwargs..., ) end, @@ -338,7 +340,7 @@ function difference_of_convex_algorithm!( end, kwargs..., #collect rest ) where {O<:Union{ManifoldDifferenceOfConvexObjective,AbstractDecoratedManifoldObjective}} - dmdco = decorate_objective!(M, mdco; kwargs...) + dmdco = decorate_objective!(M, mdco; objective_type=objective_type, kwargs...) dmp = DefaultManoptProblem(M, dmdco) # For now only subsolvers - TODO closed form solution init here if isnothing(sub_problem) diff --git a/src/solvers/exact_penalty_method.jl b/src/solvers/exact_penalty_method.jl index 3c8ecb381d..77a98bf998 100644 --- a/src/solvers/exact_penalty_method.jl +++ b/src/solvers/exact_penalty_method.jl @@ -296,16 +296,23 @@ function exact_penalty_method!( u::Real=1e-1, u_min::Real=1e-6, u_exponent=1 / 100, - θ_u=(u_min / u)^(u_exponent), ρ::Real=1.0, + objective_type=:Riemannian, θ_ρ::Real=0.3, + θ_u=(u_min / u)^(u_exponent), smoothing=LogarithmicSumOfExponentials(), sub_cost=ExactPenaltyCost(cmo, ρ, u; smoothing=smoothing), sub_grad=ExactPenaltyGrad(cmo, ρ, u; smoothing=smoothing), + sub_kwargs=[], sub_problem::AbstractManoptProblem=DefaultManoptProblem( - M, ManifoldGradientObjective(sub_cost, sub_grad; evaluation=evaluation) + M, + decorate_objective!( + M, + ManifoldGradientObjective(sub_cost, sub_grad; evaluation=evaluation); + objective_type=objective_type, + sub_kwargs..., + ), ), - sub_kwargs=[], sub_stopping_criterion=StopAfterIteration(300) | StopWhenGradientNormLess(ϵ) | StopWhenStepsizeLess(1e-8), @@ -342,7 +349,7 @@ function exact_penalty_method!( θ_u=θ_u, stopping_criterion=stopping_criterion, ) - deco_o = decorate_objective!(M, cmo; kwargs...) + deco_o = decorate_objective!(M, cmo; objective_type=objective_type, kwargs...) dmp = DefaultManoptProblem(M, deco_o) epms = decorate_state!(emps; kwargs...) solve!(dmp, epms) diff --git a/src/solvers/solver.jl b/src/solvers/solver.jl index 10ec67ce58..e41e2b62f4 100644 --- a/src/solvers/solver.jl +++ b/src/solvers/solver.jl @@ -64,13 +64,15 @@ decorate the [`AbstractManifoldObjective`](@ref)` o` with specific decorators. optional arguments provide necessary details on the decorators. A specific one is used to activate certain decorators. -* `cache` – (`missing`) specify a cache. Currenlty `:Simple` is supported and `:LRU` if you +* `cache` – (`missing`) specify a cache. Currenlty `:Simple` is supported and `:LRU` if you load `LRUCache.jl`. For this case a tuple specifying what to cache and how many can be provided, i.e. `(:LRU, [:Cost, :Gradient], 10)`, where the number specifies the size of each cache. and 10 is the default if one omits the last tuple entry -* `count` – (`missing`) specify calls to the objective to be called, see [`ManifoldCountObjective`](@ref) for the full list - -other keywords are ignored. +* `count` – (`missing`) specify calls to the objective to be called, see [`ManifoldCountObjective`](@ref) for the full list +* `objective_type` – (`:Riemannian`) specify that an objective is `:Riemannian` or `:Euclidean`. + The `:Euclidean` symbol is equivalent to specifying it as `:Embedded`, + since in the end, both refer to convertiing an objective from the embedding (whether its Euclidean or not) + to the Riemannian one. # See also @@ -83,14 +85,27 @@ function decorate_objective!( Missing,Symbol,Tuple{Symbol,<:AbstractArray},Tuple{Symbol,<:AbstractArray,P} }=missing, count::Union{Missing,AbstractVector{<:Symbol}}=missing, + objective_type::Symbol=:Riemannian, + p=objective_type == :Riemannian ? missing : rand(M), + embedded_p=objective_type == :Riemannian ? missing : embed(M, p), + embedded_X=objective_type == :Riemannian ? missing : embed(M, p, zero_vector(M, p)), return_objective=false, kwargs..., ) where {O<:AbstractManifoldObjective,P} - # Order: First count _then_ cache, so that cache is the outer decorator and - # we only count _after_ cache misses - deco_o = ismissing(count) ? o : objective_count_factory(M, o, count) - deco_o = ismissing(cache) ? deco_o : objective_cache_factory(M, deco_o, cache) - deco_o = return_objective ? ReturnManifoldObjective(deco_o) : deco_o + # Order: + # 1) wrap embedding, + # 2) _then_ count + # 3) _then_ cache, + # count should not be affected by 1) but cache should be on manifold not embedding + # => we only count _after_ cache misses + # and always last wrapper: ReturnObjective. + deco_o = o + if objective_type ∈ [:Embedding, :Euclidean] + deco_o = EmbeddedManifoldObjective(o, embedded_p, embedded_X) + end + (!ismissing(count)) && (deco_o = objective_count_factory(M, deco_o, count)) + (!ismissing(cache)) && (deco_o = objective_cache_factory(M, deco_o, cache)) + (return_objective) && (deco_o = ReturnManifoldObjective(deco_o)) return deco_o end diff --git a/test/plans/test_embedded.jl b/test/plans/test_embedded.jl new file mode 100644 index 0000000000..6b69b0db8a --- /dev/null +++ b/test/plans/test_embedded.jl @@ -0,0 +1,76 @@ +using Manifolds, Manopt, Test, LinearAlgebra + +@testset "Test Embedding accessors and conversion" begin + n = 5 + k = 2 + E = ℝ^(5, 2) + M = Grassmann(n, k) + A = diagm([1.0, 2.0, 3.0, 4.0, 5.0]) + f(M, p) = 1 / 2 * tr(p' * A * p) + ∇f(M, p) = A * p + ∇²f(M, p, X) = A * X + grad_f(M, p) = A * p - p * (p' * A * p) + Hess_f(M, p, X) = A * X - p * p' * A * X - X * p' * A * p + o = ManifoldHessianObjective(f, ∇f, ∇²f) + p = [1.0 0.0; 0.0 1.0; 0.0 0.0; 0.0 0.0; 0.0 0.0] + X = [0.0 0.0; 0.0 0.0; 1.0 0.0; 0.0 1.0; 0.0 0.0] + + # With interims caches for p and X + eo1 = Manopt.decorate_objective!( + M, o; objective_type=:Euclidean, embedded_p=copy(p), embedded_X=copy(X) + ) + eo2 = EmbeddedManifoldObjective(o, missing, copy(X)) + eo3 = EmbeddedManifoldObjective(o, copy(p), missing) + eo4 = EmbeddedManifoldObjective(o) + + for eo in [eo1, eo2, eo3, eo4] + @testset "$(split(repr(eo), " ")[1])" begin + @test get_cost(M, eo, p) == f(E, p) + @test get_gradient(E, o, p) == ∇f(E, p) + @test get_gradient(M, eo, p) == grad_f(M, p) + Y = zero_vector(M, p) + get_gradient!(M, Y, eo, p) + @test Y == grad_f(M, p) + @test get_hessian(M, o, p, X) == ∇²f(M, p, X) + get_hessian(M, eo, p, X) == Hess_f(M, p, X) + get_hessian!(M, Y, eo, p, X) + @test Y == Hess_f(M, p, X) + end + end + # Without interims caches for p and X + @test repr(eo4) == + "EmbeddedManifoldObjective{Missing,Missing} of an $(repr(eo4.objective))" + + # Constraints, though this is not the most practical constraint + o2 = ConstrainedManifoldObjective(f, ∇f, [f], [∇f], [f], [∇f]) + eco1 = EmbeddedManifoldObjective(M, o2) + eco2 = EmbeddedManifoldObjective(o2, missing, copy(X)) + eco3 = EmbeddedManifoldObjective(o2, copy(p), missing) + eco4 = EmbeddedManifoldObjective(o2) + + for eco in [eco1, eco2, eco3, eco4] + @testset "$(split(repr(eco), " ")[1])" begin + @test get_constraints(M, eco, p) == [[f(E, p)], [f(E, p)]] + @test get_equality_constraints(M, eco, p) == [f(E, p)] + @test get_equality_constraint(M, eco, p, 1) == f(E, p) + @test get_inequality_constraints(M, eco, p) == [f(E, p)] + @test get_inequality_constraint(M, eco, p, 1) == f(E, p) + @test get_grad_equality_constraints(M, eco, p) == [grad_f(M, p)] + Z = [zero_vector(M, p)] + get_grad_equality_constraints!(M, Z, eco, p) + @test Z == [grad_f(M, p)] + @test get_grad_equality_constraint(M, eco, p, 1) == grad_f(M, p) + Y = zero_vector(M, p) + get_grad_equality_constraint!(M, Y, eco, p, 1) + @test Y == grad_f(M, p) + @test get_grad_inequality_constraints(M, eco, p) == [grad_f(M, p)] + Z = [zero_vector(M, p)] + get_grad_inequality_constraints!(M, Z, eco, p) + @test Z == [grad_f(M, p)] + @test get_grad_inequality_constraint(M, eco, p, 1) == grad_f(M, p) + get_grad_inequality_constraint!(M, Y, eco, p, 1) + @test Y == grad_f(M, p) + end + end + o3 = EmbeddedManifoldObjective(ManifoldCountObjective(M, o, [:Cost]), p, X) +end diff --git a/test/runtests.jl b/test/runtests.jl index 1409eac8b2..97045ac724 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,6 +11,7 @@ include("utils/example_tasks.jl") include("plans/test_counts.jl") include("plans/test_debug.jl") include("plans/test_difference_of_convex_plan.jl") + include("plans/test_embedded.jl") include("plans/test_storage.jl") include("plans/test_cache.jl") include("plans/test_nelder_mead_plan.jl") diff --git a/tutorials/AutomaticDifferentiation.qmd b/tutorials/AutomaticDifferentiation.qmd index bf038147db..2721ae2fb2 100644 --- a/tutorials/AutomaticDifferentiation.qmd +++ b/tutorials/AutomaticDifferentiation.qmd @@ -118,7 +118,7 @@ norm(M, p, X1 - X2) We obtain quite a good approximation of the gradient. -## 2. Conversion of a Euclidean Gradient in the Embedding to a Riemannian Gradient of a (not Necessarily Isometrically) Embedded Manifold +``## [2. Conversion of a Euclidean Gradient in the Embedding to a Riemannian Gradient of a (not Necessarily Isometrically) Embedded Manifold](@id EmbeddedGradient)``{=commonmark} Let $\tilde f\colon\mathbb R^m \to \mathbb R$ be a function on the embedding of an $n$-dimensional manifold $\mathcal M \subset \mathbb R^m$and let $f\colon \mathcal M \to \mathbb R$ denote the restriction of $\tilde f$ to the manifold $\mathcal M$. diff --git a/tutorials/EmbeddingObjectives.qmd b/tutorials/EmbeddingObjectives.qmd new file mode 100644 index 0000000000..d1738953af --- /dev/null +++ b/tutorials/EmbeddingObjectives.qmd @@ -0,0 +1,207 @@ +--- +title: How to define the cost in the embedding +author: Ronny Bergmann +--- + +Specifying a cost function $f\colon \mathcal M \to \mathbb R$ on a manifold +is usually the model one starts with. +Specifying its gradient $\operatorname{grad} f\colon\mathcal M \to T\mathcal M$, or more precisely $\operatorname{grad}f(p) \in T_p\mathcal M$, and eventually a Hessian $\operatorname{Hess} f\colon T_p\mathcal M \to T_p\mathcal M$ are then necessary to perform optimization. +Since these might be challenging to compute, especially when manifolds and differential geometry are not +the main area of a user – easier to use methods might be welcome. + +This tutorial discusses how to specify $f$ in the embedding as $\tilde f$, maybe only locally around the manifold, +and use the Euclidean gradient $∇ \tilde f$ and Hessian $∇^2 \tilde f$ within `Manopt.jl`. + +For the theoretical background see ``[convert an Euclidean to an Riemannian Gradient](@ref EmbeddedGradient)``{=commonmark}, +or Section 4.7 of [Boumal:2023](@cite) for the gradient part or Section 5.11 as well as [Nguyen:2023](@cite) +for the background on converting Hessians. + +Here we use the Examples 9.40 and 9.49 of [Boumal:2023](@cite) and compare the different methods, +one can call the solver, depending on which gradient and/or Hessian one provides. + +```{julia} +#| echo: false +#| code-fold: true +#| output: false +# For now use the local one since Manifolds.jl #644 is required for this +using Pkg; +cd(@__DIR__) +Pkg.activate("."); # for reproducibility use the local tutorial environment. +# But for this version use here the most recent dev version from the parent folder +Pkg.develop(PackageSpec(; path=(@__DIR__) * "/../")) +``` + +```{julia} +#| output: false +using Manifolds, Manopt, ManifoldDiff +using LinearAlgebra, Random, Colors, Plots +Random.seed!(123) +``` + +We consider the cost function on the [`Grassmann`](https://juliamanifolds.github.io/Manifolds.jl/latest/manifolds/grassmann.html) manifold given by + +```{julia} +n = 5 +k = 2 +M = Grassmann(5,2) +A = Symmetric(rand(n,n)); +``` + +```{julia} +#| output: false +f(M, p) = 1 / 2 * tr(p' * A * p) +``` + +Note that this implementation is already also a valid implementation / continuation +of $f$ into the (lifted) embedding of the Grassmann manifold. +In the implementation we can use `f` for both the Euclidean $\tilde f$ and the Grassmann case $f$. + +Its Euclidean gradient $\nabla f$ and Hessian $\nabla^2f$ are easy to compute as + +```{julia} +#| output: false +∇f(M, p) = A * p +∇²f(M,p,X) = A*X +``` + +On the other hand, from the aforementioned Example 9.49 we can also state +the Riemannian gradient and Hessian for comparison as + +```{julia} +#| output: false +grad_f(M, p) = A * p - p * (p' * A * p) +Hess_f(M, p, X) = A * X - p * p' * A * X - X * p' * A * p +``` + +We can check that these are the correct at least numerically by calling +the [`check_gradient`](@ref) + +```{julia} +check_gradient(M, f, grad_f; plot=true) +``` + +and the [`check_Hessian`](@ref), which requires a bit more tolerance in its linearity check + +```{julia} +check_Hessian(M, f, grad_f, Hess_f; plot=true, throw_error=true, atol=1e-15) +``` + +While they look reasonable here and were already derived – for the general case this derivation +might be more complicated. + +Luckily there exist two functions in [`ManifoldDiff.jl`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/) that are implemented for several +manifolds from [`Manifolds.jl`](https://github.com/JuliaManifolds/Manifolds.jl), namely [`riemannian_gradient`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library/#ManifoldDiff.riemannian_gradient-Tuple{AbstractManifold,%20Any,%20Any})`(M, p, eG)` that converts a Riemannian gradient +`eG=`$\nabla \tilde f(p)$ into a the Riemannain one $\operatorname{grad} f(p)$ + and [`riemannian_Hessian`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library/#ManifoldDiff.riemannian_Hessian-Tuple{AbstractManifold,%20Any,%20Any,%20Any,%20Any})`(M, p, eG, eH, X)` + which converts the Euclidean Hessian `eH=`$\nabla^2 \tilde f(p)[X]$ into $\operatorname{Hess} f(p)[X]$, + where we also require the Euclidean gradient `eG=`$\nabla \tilde f(p)$. + +So we can define + +```{julia} +#| output: false +grad2_f(M, p) = riemannian_gradient(M, p, ∇f(get_embedding(M), embed(M, p))) +``` + +where only formally we here call `embed(M,p)` before passing `p` to the Euclidean gradient, +though here (for the Grassmann manifold with Stiefel representation) the embedding function is the identity. + +Similarly for the Hessian, where in our example the embeddings of both the points and tangent vectors are the identity. + +```{julia} +#| output: false +function Hess2_f(M, p, X) + return riemannian_Hessian( + M, + p, + ∇f(get_embedding(M), embed(M, p)), + ∇²f(get_embedding(M), embed(M, p), embed(M, p, X)), + X + ) +end +``` + +And we can again check these numerically, + +```{julia} +check_gradient(M, f, grad2_f; plot=true) +``` + +and + +```{julia} +check_Hessian(M, f, grad2_f, Hess2_f; plot=true, throw_error=true, atol=1e-14) +``` + +which yields the same result, but we see that the Euclidean conversion might be a bit less stable. + +Now if we want to use these in optimization we would require these two functions to call e.g. + +```{julia} +p0 = [1.0 0.0; 0.0 1.0; 0.0 0.0; 0.0 0.0; 0.0 0.0] +r1 = adaptive_regularization_with_cubics( + M, + f, + grad_f, + Hess_f, + p0; + debug=[:Iteration, :Cost, "\n"], + return_objective=true, + return_state=true, +) +q1 = get_solver_result(r1) +r1 +``` + +but if you choose to go for the conversions, then, thinking of the embedding and defining two new functions might be tedious. There is a shortcut for these, which performs the change internally, when necessary +by specifying `objective_type=:Euclidean`. + +```{julia} +r2 = adaptive_regularization_with_cubics( + M, + f, + ∇f, + ∇²f, + p0; + # The one line different to specify our grad/Hess are Eucldiean: + objective_type=:Euclidean, + debug=[:Iteration, :Cost, "\n"], + return_objective=true, + return_state=true, +) +q2 = get_solver_result(r2) +r2 +``` + +which returns the same result, see + +```{julia} +distance(M, q1, q2) +``` + + +This conversion also works for the gradients of constraints, and is passed down to +subsolvers by deault when these are created using the Euclidean objective $f$, $\nabla f$ and $\nabla^2 f$. + +## Summary + +If you have the Euclidean gradient (or Hessian) available for a solver call, +all you need to provide is `objective_type=:Euclidean` to convert the objective +to a Riemannian one. + +## Literature + +````{=commonmark} +```@bibliography +Pages = ["tutorials/EmbeddingObjectives.md"] +Canonical=false +``` +```` + +## Technical Details + +This notebook was rendered with the following environment + +```{julia} +Pkg.status() +``` \ No newline at end of file diff --git a/tutorials/Project.toml b/tutorials/Project.toml index 1db6776935..d0b552b10d 100644 --- a/tutorials/Project.toml +++ b/tutorials/Project.toml @@ -18,7 +18,7 @@ FiniteDifferences = "0.12" IJulia = "1" LRUCache = "1.4" ManifoldDiff = "0.3" -Manifolds = "0.8.46" +Manifolds = "0.8.75" ManifoldsBase = "0.14.5" Manopt = "0.4.22" Plots = "1.38"