Rework EPM docs.

JuliaManifolds · Jul 28, 2024 · f236171 · f236171
1 parent 0182d58
commit f236171
Show file tree

Hide file tree

Showing 2 changed files with 106 additions and 78 deletions.
diff --git a/src/solvers/augmented_Lagrangian_method.jl b/src/solvers/augmented_Lagrangian_method.jl
@@ -322,6 +322,8 @@ Otherwise the problem is not constrained and a better solver would be for exampl
 For the `range`s of the constraints' gradient, other power manifold tangent space representations,
 mainly the [`ArrayPowerRepresentation`](@extref Manifolds :jl:type:`Manifolds.ArrayPowerRepresentation`) can be used if the gradients can be computed more efficiently in that representation.
 
+$(_kw_others)
+
 $_doc_sec_output
 """
 

diff --git a/src/solvers/exact_penalty_method.jl b/src/solvers/exact_penalty_method.jl
@@ -1,27 +1,43 @@
-@doc raw"""
+@doc """
     ExactPenaltyMethodState{P,T} <: AbstractManoptSolverState
 
 Describes the exact penalty method, with
 
 # Fields
-a default value is given in brackets if a parameter can be left out in initialization.
 
-* `p`:                   a set point on a manifold as starting point
-* `sub_problem`:         an [`AbstractManoptProblem`](@ref) problem for the subsolver
-* `sub_state`:           an [`AbstractManoptSolverState`](@ref) for the subsolver
-* `ϵ=1e–3`: the accuracy tolerance
-* `ϵ_min=1e-6`: the lower bound for the accuracy tolerance
-* `u=1e–1`: the smoothing parameter and threshold for violation of the constraints
-* `u_min=1e-6`: the lower bound for the smoothing parameter and threshold for violation of the constraints
-* `ρ=1.0`: the penalty parameter
-* `θ_ρ=0.3`: the scaling factor of the penalty parameter
-* `stopping_criterion`:  ([`StopAfterIteration`](@ref)`(300) | (`[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min) & `[`StopWhenChangeLess`](@ref)`(min_stepsize))`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop.
+* `ϵ`: the accuracy tolerance
+* `ϵ_min`: the lower bound for the accuracy tolerance
+* $(_field_p)
+* `ρ`: the penalty parameter
+* $(_field_sub_problem)
+* $(_field_sub_state)
+* $(_field_stop)
+* `u`: the smoothing parameter and threshold for violation of the constraints
+* `u_min`: the lower bound for the smoothing parameter and threshold for violation of the constraints
+* `θ_ϵ`: the scaling factor of the tolerance parameter
+* `θ_ρ`: the scaling factor of the penalty parameter
+* `θ_u`: the scaling factor of the smoothing parameter
 
 # Constructor
 
     ExactPenaltyMethodState(M::AbstractManifold, p, sub_problem, sub_state; kwargs...)
 
-construct an exact penalty options with the remaining previously mentioned fields as keywords using their provided defaults.
+construct an exact penalty state.
+
+# Keyword arguments
+
+* `ϵ=1e-3`
+* `ϵ_min=1e-6`
+* `ϵ_exponent=1 / 100`: a shortcut for the scaling factor ``θ_ϵ``
+* `θ_ϵ=(ϵ_min / ϵ)^(ϵ_exponent)`
+* `u=1e-1`
+* `u_min=1e-6`
+* `u_exponent=1 / 100`:  a shortcut for the scaling factor ``θ_u``.
+* `θ_u=(u_min / u)^(u_exponent)`
+* `ρ=1.0`
+* `θ_ρ=0.3`
+* `stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`$(_sc_any)` (`
+  [`StopWhenSmallerOrEqual`](@ref)`(:ϵ, ϵ_min)`$(_sc_any)[`StopWhenChangeLess`](@ref)`(1e-10) )`
 
 # See also
 
@@ -110,51 +126,22 @@ function show(io::IO, epms::ExactPenaltyMethodState)
     return print(io, s)
 end
 
-@doc raw"""
-    exact_penalty_method(M, F, gradF, p=rand(M); kwargs...)
-    exact_penalty_method(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...)
-
-perform the exact penalty method (EPM) [LiuBoumal:2019](@cite)
-The aim of the EPM is to find a solution of the constrained optimisation task
-
-```math
-\begin{aligned}
-\min_{p ∈\mathcal{M}} &f(p)\\
-\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\
-\quad &h_j(p)=0 \quad  \text{ for } j=1,…,n,
-\end{aligned}
-```
-
-where `M` is a Riemannian manifold, and ``f``, ``\{g_i\}_{i=1}^m`` and ``\{h_j\}_{j=1}^n``
-are twice continuously differentiable functions from `M` to ℝ.
-For that a weighted ``L_1``-penalty term for the violation of the constraints is added to the objective
-
+_doc_EPM_penalty = raw"""
 ```math
 f(x) + ρ\biggl( \sum_{i=1}^m \max\bigl\{0, g_i(x)\bigr\} + \sum_{j=1}^n \vert h_j(x)\vert\biggr),
 ```
-
 where ``ρ>0`` is the penalty parameter.
-Since this is non-smooth, a [`SmoothingTechnique`](@ref) with parameter `u` is applied,
-see the [`ExactPenaltyCost`](@ref).
-
-In every step ``k`` of the exact penalty method, the smoothed objective is then minimized over all
-``x ∈\mathcal{M}``.
-Then, the accuracy tolerance ``ϵ`` and the smoothing parameter ``u`` are updated by setting
+"""
 
+_doc_EMP_ϵ_update = raw"""
 ```math
 ϵ^{(k)}=\max\{ϵ_{\min}, θ_ϵ ϵ^{(k-1)}\},
 ```
 
 where ``ϵ_{\min}`` is the lowest value ``ϵ`` is allowed to become and ``θ_ϵ ∈ (0,1)`` is constant scaling factor, and
+"""
 
-```math
-u^{(k)} = \max \{u_{\min}, \theta_u u^{(k-1)} \},
-```
-
-where ``u_{\min}`` is the lowest value ``u`` is allowed to become and ``θ_u ∈ (0,1)`` is constant scaling factor.
-
-Finally, the penalty parameter ``ρ`` is updated as
-
+_doc_EMP_ρ_update = raw"""
 ```math
 ρ^{(k)} = \begin{cases}
 ρ^{(k-1)}/θ_ρ,  & \text{if } \displaystyle \max_{j ∈ \mathcal{E},i ∈ \mathcal{I}} \Bigl\{ \vert h_j(x^{(k)}) \vert, g_i(x^{(k)})\Bigr\} \geq u^{(k-1)} \Bigr) ,\\
@@ -163,26 +150,66 @@ Finally, the penalty parameter ``ρ`` is updated as
 ```
 
 where ``θ_ρ ∈ (0,1)`` is a constant scaling factor.
+"""
+_doc_EMP_u_update = raw"""
+```math
+u^{(k)} = \max \{u_{\min}, \theta_u u^{(k-1)} \},
+```
+
+where ``u_{\min}`` is the lowest value ``u`` is allowed to become and ``θ_u ∈ (0,1)`` is constant scaling factor.
+"""
+
+_doc_EPM = """
+    exact_penalty_method(M, f, grad_f, p=rand(M); kwargs...)
+    exact_penalty_method(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...)
+    exact_penalty_method!(M, f, grad_f, p; kwargs...)
+    exact_penalty_method!(M, cmo::ConstrainedManifoldObjective, p; kwargs...)
+
+perform the exact penalty method (EPM) [LiuBoumal:2019](@cite)
+The aim of the EPM is to find a solution of the constrained optimisation task
+
+$(_problem_constrained)
+
+where `M` is a Riemannian manifold, and ``f``, ``$(_math_sequence("g", "i", "1", "n"))`` and ``$(_math_sequence("h", "j", "1", "m"))``
+are twice continuously differentiable functions from `M` to ℝ.
+For that a weighted ``L_1``-penalty term for the violation of the constraints is added to the objective
+
+$(_doc_EPM_penalty)
+
+Since this is non-smooth, a [`SmoothingTechnique`](@ref) with parameter `u` is applied,
+see the [`ExactPenaltyCost`](@ref).
+
+In every step ``k`` of the exact penalty method, the smoothed objective is then minimized over all ``p ∈$(_l_M)``.
+Then, the accuracy tolerance ``ϵ`` and the smoothing parameter ``u`` are updated by setting
+
+$(_doc_EMP_ϵ_update)
+
+$(_doc_EMP_u_update)
+
+Finally, the penalty parameter ``ρ`` is updated as
+
+$(_doc_EMP_ρ_update)
 
 # Input
 
-* `M`      a manifold ``\mathcal M``
-* `f`      a cost function ``f:\mathcal M→ℝ`` to minimize
-* `grad_f` the gradient of the cost function
+* $(_arg_M)
+* $(_arg_f)
+* $(_arg_grad_f)
+* $(_arg_p)
 
-# Optional (if not called with the [`ConstrainedManifoldObjective`](@ref) `cmo`)
+# Keyword arguments
+ if not called with the [`ConstrainedManifoldObjective`](@ref) `cmo`
 
 * `g=nothing`: the inequality constraints
 * `h=nothing`: the equality constraints
 * `grad_g=nothing`: the gradient of the inequality constraints
 * `grad_h=nothing`: the gradient of the equality constraints
 
 Note that one of the pairs (`g`, `grad_g`) or (`h`, `grad_h`) has to be provided.
-Otherwise the problem is not constrained and you should consider using unconstrained solvers like [`quasi_Newton`](@ref).
+Otherwise the problem is not constrained and a better solver would be for example [`quasi_Newton`](@ref).
 
-# Optional
+# Further keyword arguments
 
-* `smoothing`:                 ([`LogarithmicSumOfExponentials`](@ref)) [`SmoothingTechnique`](@ref) to use
 * `ϵ=1e–3`: the accuracy tolerance
 * `ϵ_exponent=1/100`: exponent of the ϵ update factor;
 * `ϵ_min=1e-6`: the lower bound for the accuracy tolerance
@@ -191,30 +218,36 @@ Otherwise the problem is not constrained and you should consider using unconstra
 * `u_min=1e-6`: the lower bound for the smoothing parameter and threshold for violation of the constraints
 * `ρ=1.0`: the penalty parameter
 * `equality_constraints=nothing`: the number ``n`` of equality constraints.
-* `gradient_range`             (`nothing`, equivalent to [`NestedPowerRepresentation`](@extref) specify how gradients are represented
-* `gradient_equality_range=gradient_range`: specify how the gradients of the equality constraints are represented
-* `gradient_inequality_range=gradient_range`: specify how the gradients of the inequality constraints are represented
+  If not provided, a call to the gradient of `g` is performed to estimate these.
+* `gradient_range=nothing`: specify how both gradients of the constraints are represented
+* `gradient_equality_range=gradient_range`:
+   specify how gradients of the equality constraints are represented, see [`VectorGradientFunction`](@ref).
+* `gradient_inequality_range=gradient_range`:
+   specify how gradients of the inequality constraints are represented, see [`VectorGradientFunction`](@ref).
 * `inequality_constraints=nothing`: the number ``m`` of inequality constraints.
+   If not provided, a call to the gradient of `g` is performed to estimate these.
 * `min_stepsize=1e-10`: the minimal step size
-* `sub_cost`:                  ([`ExactPenaltyCost`](@ref)`(problem, ρ, u; smoothing=smoothing)`) use this exact penalty cost, especially with the same numbers `ρ,u` as in the options for the sub problem
-* `sub_grad`:                  ([`ExactPenaltyGrad`](@ref)`(problem, ρ, u; smoothing=smoothing)`) use this exact penalty gradient, especially with the same numbers `ρ,u` as in the options for the sub problem
-* `sub_kwargs=(;)`: keyword arguments to decorate the sub options, for example debug, that automatically respects the main solvers debug options (like sub-sampling) as well
-* `sub_stopping_criterion`:    ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(ϵ) | `[`StopWhenStepsizeLess`](@ref)`(1e-10)`) specify a stopping criterion for the subsolver.
-* `sub_problem`:               ([`DefaultManoptProblem`](@ref)`(M, `[`ManifoldGradientObjective`](@ref)`(sub_cost, sub_grad; evaluation=evaluation)`, provide a problem for the subsolver
-* `sub_state`:                 ([`QuasiNewtonState`](@ref)) using [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) with [`InverseBFGS`](@ref) and `sub_stopping_criterion` as a stopping criterion. See also `sub_kwargs`.
-* `stopping_criterion`:        ([`StopAfterIteration`](@ref)`(300)` | ([`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min)` & [`StopWhenChangeLess`](@ref)`(1e-10)`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop.
+* `smoothing=`[`LogarithmicSumOfExponentials`](@ref): a [`SmoothingTechnique`](@ref) to use
+* `sub_cost=`[`ExactPenaltyCost`](@ref)`(problem, ρ, u; smoothing=smoothing)`: cost to use in the sub solver
+  $(_kw_used_in("sub_problem"))
+* `sub_grad=`[`ExactPenaltyGrad`](@ref)`(problem, ρ, u; smoothing=smoothing)`: gradient to use in the sub solver
+  $(_kw_used_in("sub_problem"))
+* * $(_kw_sub_kwargs_default): $(_kw_sub_kwargs)
+* `sub_stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(ϵ)`$(_sc_any)[`StopWhenStepsizeLess`](@ref)`(1e-10)`: a stopping cirterion for the sub solver
+  $(_kw_used_in("sub_state"))
+* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, `[`ManifoldGradientObjective`](@ref)`(sub_cost, sub_grad; evaluation=evaluation)`: the problem for the subsolver. The objective can also be decorated with argumens from `sub_kwargs`.
+* `sub_state=`[`QuasiNewtonState`](@ref)`(...)` a solver to use for the sub problem. By default an L-BFGS is used.
+* `stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`$(_sc_any)` ( `[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min)`$(_sc_all)[`StopWhenChangeLess`](@ref)`(1e-10) )`: $(_kw_stopping_criterion)
 
 For the `range`s of the constraints' gradient, other power manifold tangent space representations,
 mainly the [`ArrayPowerRepresentation`](@extref Manifolds :jl:type:`Manifolds.ArrayPowerRepresentation`) can be used if the gradients can be computed more efficiently in that representation.
 
-With `equality_constraints` and `inequality_constraints` you have to provide the dimension
-of the ranges of `h` and `g`, respectively. If not provided, together with `M` and the start point `p0`,
-a call to either of these is performed to try to infer these.
-
-# Output
+$(_kw_others)
 
-the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details
+$_doc_sec_output
 """
+
+@doc "$(_doc_EPM)"
 exact_penalty_method(M::AbstractManifold, args...; kwargs...)
 function exact_penalty_method(M::AbstractManifold, f, grad_f; kwargs...)
     return exact_penalty_method(M, f, grad_f, rand(M); kwargs...)
@@ -298,14 +331,7 @@ function exact_penalty_method(
     return exact_penalty_method!(M, cmo, q; kwargs...)
 end
 
-@doc raw"""
-    exact_penalty_method!(M, f, grad_f, p; kwargs...)
-    exact_penalty_method!(M, cmo::ConstrainedManifoldObjective, p; kwargs...)
-
-perform the exact penalty method (EPM) performed in place of `p`.
-
-For all options, see [`exact_penalty_method`](@ref).
-"""
+@doc "$(_doc_EPM)"
 exact_penalty_method!(M::AbstractManifold, args...; kwargs...)
 function exact_penalty_method!(
     M::AbstractManifold,