From 583efd26d33513205ce0fa9215d038f8ea78d6d4 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 3 Aug 2023 17:00:46 +0200 Subject: [PATCH] Fix a typo in trust regions and extend Frank-Wolfe docs (#284) * fixes two typos in the trust region Cauchy model computation. * Improve docs of Frank Wolfe. * Let's be more strict in the docs! * bump version. --- Project.toml | 2 +- docs/make.jl | 15 ++++++++++++ src/solvers/FrankWolfe.jl | 47 ++++++++++++++++++++++++------------ src/solvers/trust_regions.jl | 10 ++++---- 4 files changed, 53 insertions(+), 21 deletions(-) diff --git a/Project.toml b/Project.toml index a73abff701..d0b880b18e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.29" +version = "0.4.30" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" diff --git a/docs/make.jl b/docs/make.jl index b45f78a699..885d9f899d 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -63,6 +63,21 @@ makedocs( ), modules=[Manopt], sitename="Manopt.jl", + strict=[ + :doctest, + :linkcheck, + :parse_error, + :example_block, + :autodocs_block, + :cross_references, + :docs_block, + :eval_block, + :example_block, + :footnote, + :meta_block, + :missing_docs, + :setup_block, + ], pages=[ "Home" => "index.md", "About" => "about.md", diff --git a/src/solvers/FrankWolfe.jl b/src/solvers/FrankWolfe.jl index 3c8f361e71..bf7f35320e 100644 --- a/src/solvers/FrankWolfe.jl +++ b/src/solvers/FrankWolfe.jl @@ -7,13 +7,13 @@ It comes in two forms, depending on the realisation of the `subproblem`. # Fields -* `p` – the current iterate, i.e. a point on the manifold -* `X` – the current gradient ``\operatorname{grad} F(p)``, i.e. a tangent vector to `p`. +* `p` – the current iterate, i.e. a point on the manifold +* `X` – the current gradient ``\operatorname{grad} F(p)``, i.e. a tangent vector to `p`. * `inverse_retraction_method` – (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction method to use within Frank Wolfe. -* `sub_problem` – an [`AbstractManoptProblem`](@ref) problem for the subsolver -* `sub_state` – an [`AbstractManoptSolverState`](@ref) for the subsolver -* `stop` – ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(1.0e-6)`) a [`StoppingCriterion`](@ref) -* `stepsize` - ([`DecreasingStepsize`](@ref)`(; length=2.0, shift=2)`) ``s_k`` which by default is set to ``s_k = \frac{2}{k+2}``. +* `sub_problem` – an [`AbstractManoptProblem`](@ref) problem or a function `(M, p, X) -> q` or `(M, q, p, X)` for the a closed form solution of the sub problem +* `sub_state` – an [`AbstractManoptSolverState`](@ref) for the subsolver or an [`AbstractEvaluationType`](@ref) in case the sub problem is provided as a function +* `stop` – ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(1.0e-6)`) a [`StoppingCriterion`](@ref) +* `stepsize` - ([`DecreasingStepsize`](@ref)`(; length=2.0, shift=2)`) ``s_k`` which by default is set to ``s_k = \frac{2}{k+2}``. * `retraction_method` – (`default_retraction_method(M, typeof(p))`) a retraction to use within Frank-Wolfe For the subtask, we need a method to solve @@ -24,7 +24,7 @@ For the subtask, we need a method to solve # Constructor - FrankWolfeState(M, p, X, sub_problem, sub_task) + FrankWolfeState(M, p, X, sub_problem, sub_state) where the remaining fields from above are keyword arguments with their defaults already given in brackets. """ @@ -146,24 +146,41 @@ use a retraction and its inverse. # Input -* `M` – a manifold ``\mathcal M`` -* `f` – a cost function ``f: \mathcal M→ℝ`` to find a minimizer ``p^*`` for +* `M` – a manifold ``\mathcal M`` +* `f` – a cost function ``f: \mathcal M→ℝ`` to find a minimizer ``p^*`` for * `grad_f` – the gradient ``\operatorname{grad}f: \mathcal M → T\mathcal M`` of f - - as a function `(M, p) -> X` or a function `(M, X, p) -> X` -* `p` – an initial value ``p ∈ \mathcal C``, note that it really has to be a feasible point + - as a function `(M, p) -> X` or a function `(M, X, p) -> X` working in place of `X`. +* `p` – an initial value ``p ∈ \mathcal C``, note that it really has to be a feasible point Alternatively to `f` and `grad_f` you can prodive the [`AbstractManifoldGradientObjective`](@ref) `gradient_objective` directly. ## Keyword Arguments -* `evaluation` ([`AllocatingEvaluation`](@ref)) whether `grad_F` is an inplace or allocating (default) funtion -* `initial_vector` – (`zero_vectoir(M,p)`) how to initialize the inner gradient tangent vector +* `evaluation` - ([`AllocatingEvaluation`](@ref)) whether `grad_f` is an inplace or allocating (default) funtion +* `initial_vector` – (`zero_vectoir(M,p)`) how to initialize the inner gradient tangent vector * `stopping_criterion` – ([`StopAfterIteration`](@ref)`(500) | `[`StopWhenGradientNormLess`](@ref)`(1.0e-6)`) a stopping criterion -* `retraction_method` – (`default_retraction_method(M, typeof(p))`) a type of retraction -* `stepsize` ([`DecreasingStepsize`](@ref)`(; length=2.0, shift=2)` +* `retraction_method` – (`default_retraction_method(M, typeof(p))`) a type of retraction +* `stepsize` -([`DecreasingStepsize`](@ref)`(; length=2.0, shift=2)` a [`Stepsize`](@ref) to use; but it has to be always less than 1. The default is the one proposed by Frank & Wolfe: ``s_k = \frac{2}{k+2}``. +* `sub_cost` - ([`FrankWolfeCost`](@ref)`(p, initiel_vector)`) – the cost of the Frank-Wolfe sub problem + which by default uses the current iterate and (sub)gradient of the current iteration to define a default cost, + this is used to define the default `sub_objective`. It is ignored, if you set that or the `sub_problem` directly +* `sub_grad` - ([`FrankWolfeGradient`](@ref)`(p, initial_vector)`) – the gradient of the Frank-Wolfe sub problem + which by default uses the current iterate and (sub)gradient of the current iteration to define a default gradient + this is used to define the default `sub_objective`. It is ignored, if you set that or the `sub_problem` directly +* `sub_objective` - ([`ManifoldGradientObjective`](@ref)`(sub_cost, sub_gradient)`) – the objective for the Frank-Wolfe sub problem + this is used to define the default `sub_problem`. It is ignored, if you set the `sub_problem` manually +* `sub_problem` - ([`DefaultManoptProblem`](@ref)`(M, sub_objective)`) – the Frank-Wolfe sub problem to solve. + This can be given in three forms + 1. as an [`AbstractManoptProblem`](@ref), then the `sub_state` specifies the solver to use + 2. as a closed form solution, e.g. a function, evaluating with new allocations, that is a function `(M, p, X) -> q` that solves the sub problem on `M` given the current iterate `p` and (sub)gradient `X`. + 3. as a closed form solution, e.g. a function, evaluating in place, that is a function `(M, q, p, X) -> q` working in place of `q`, with the parameters as in the last point + For points 2 and 3 the `sub_state` has to be set to the corresponding [`AbstractEvaluationType`](@ref), [`AllocatingEvaluation`](@ref) and [`InplaceEvaluation`](@ref), respectively +* `sub_state` - (`evaluation` if `sub_problem` is a function, a decorated [`GradientDescentState`](@ref) otherwise) + for a function, the evaluation is inherited from the Frank-Wolfe `evaluation` keyword. +* `sub_kwargs` - (`[]`) – keyword arguments to decorate the `sub_state` default state in case the sub_problem is not a function All other keyword arguments are passed to [`decorate_state!`](@ref) for decorators or [`decorate_objective!`](@ref), respectively. diff --git a/src/solvers/trust_regions.jl b/src/solvers/trust_regions.jl index 713763fe65..c53be7dad9 100644 --- a/src/solvers/trust_regions.jl +++ b/src/solvers/trust_regions.jl @@ -531,11 +531,11 @@ function step_solver!(mp::AbstractManoptProblem, trs::TrustRegionsState, i) fx + real(inner(M, trs.p, trs.X, trs.η)) + 0.5 * real(inner(M, trs.p, trs.Hη, trs.η)) - modle_value_Cauchy = fx - -trs.τ * trs.trust_region_radius * norm_grad - +0.5 * trs.τ^2 * trs.trust_region_radius^2 / (norm_grad^2) * - real(inner(M, trs.p, trs.Hgrad, trs.X)) - if modle_value_Cauchy < model_value + model_value_Cauchy = + fx - trs.τ * trs.trust_region_radius * norm_grad + + 0.5 * trs.τ^2 * trs.trust_region_radius^2 / (norm_grad^2) * + real(inner(M, trs.p, trs.Hgrad, trs.X)) + if model_value_Cauchy < model_value copyto!(M, trs.η, (-trs.τ * trs.trust_region_radius / norm_grad) * trs.X) copyto!(M, trs.Hη, (-trs.τ * trs.trust_region_radius / norm_grad) * trs.Hgrad) end