Skip to content

Commit

Permalink
Rework EPM docs.
Browse files Browse the repository at this point in the history
  • Loading branch information
kellertuer committed Jul 28, 2024
1 parent 0182d58 commit f236171
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 78 deletions.
2 changes: 2 additions & 0 deletions src/solvers/augmented_Lagrangian_method.jl
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@ Otherwise the problem is not constrained and a better solver would be for exampl
For the `range`s of the constraints' gradient, other power manifold tangent space representations,
mainly the [`ArrayPowerRepresentation`](@extref Manifolds :jl:type:`Manifolds.ArrayPowerRepresentation`) can be used if the gradients can be computed more efficiently in that representation.
$(_kw_others)
$_doc_sec_output
"""

Expand Down
182 changes: 104 additions & 78 deletions src/solvers/exact_penalty_method.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,43 @@
@doc raw"""
@doc """
ExactPenaltyMethodState{P,T} <: AbstractManoptSolverState
Describes the exact penalty method, with
# Fields
a default value is given in brackets if a parameter can be left out in initialization.
* `p`: a set point on a manifold as starting point
* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem for the subsolver
* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver
* `ϵ=1e–3`: the accuracy tolerance
* `ϵ_min=1e-6`: the lower bound for the accuracy tolerance
* `u=1e–1`: the smoothing parameter and threshold for violation of the constraints
* `u_min=1e-6`: the lower bound for the smoothing parameter and threshold for violation of the constraints
* `ρ=1.0`: the penalty parameter
* `θ_ρ=0.3`: the scaling factor of the penalty parameter
* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(300) | (`[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min) & `[`StopWhenChangeLess`](@ref)`(min_stepsize))`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop.
* `ϵ`: the accuracy tolerance
* `ϵ_min`: the lower bound for the accuracy tolerance
* $(_field_p)
* `ρ`: the penalty parameter
* $(_field_sub_problem)
* $(_field_sub_state)
* $(_field_stop)
* `u`: the smoothing parameter and threshold for violation of the constraints
* `u_min`: the lower bound for the smoothing parameter and threshold for violation of the constraints
* `θ_ϵ`: the scaling factor of the tolerance parameter
* `θ_ρ`: the scaling factor of the penalty parameter
* `θ_u`: the scaling factor of the smoothing parameter
# Constructor
ExactPenaltyMethodState(M::AbstractManifold, p, sub_problem, sub_state; kwargs...)
construct an exact penalty options with the remaining previously mentioned fields as keywords using their provided defaults.
construct an exact penalty state.
# Keyword arguments
* `ϵ=1e-3`
* `ϵ_min=1e-6`
* `ϵ_exponent=1 / 100`: a shortcut for the scaling factor ``θ_ϵ``
* `θ_ϵ=(ϵ_min / ϵ)^(ϵ_exponent)`
* `u=1e-1`
* `u_min=1e-6`
* `u_exponent=1 / 100`: a shortcut for the scaling factor ``θ_u``.
* `θ_u=(u_min / u)^(u_exponent)`
* `ρ=1.0`
* `θ_ρ=0.3`
* `stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`$(_sc_any)` (`
[`StopWhenSmallerOrEqual`](@ref)`(:ϵ, ϵ_min)`$(_sc_any)[`StopWhenChangeLess`](@ref)`(1e-10) )`
# See also
Expand Down Expand Up @@ -110,51 +126,22 @@ function show(io::IO, epms::ExactPenaltyMethodState)
return print(io, s)
end

@doc raw"""
exact_penalty_method(M, F, gradF, p=rand(M); kwargs...)
exact_penalty_method(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...)
perform the exact penalty method (EPM) [LiuBoumal:2019](@cite)
The aim of the EPM is to find a solution of the constrained optimisation task
```math
\begin{aligned}
\min_{p ∈\mathcal{M}} &f(p)\\
\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\
\quad &h_j(p)=0 \quad \text{ for } j=1,…,n,
\end{aligned}
```
where `M` is a Riemannian manifold, and ``f``, ``\{g_i\}_{i=1}^m`` and ``\{h_j\}_{j=1}^n``
are twice continuously differentiable functions from `M` to ℝ.
For that a weighted ``L_1``-penalty term for the violation of the constraints is added to the objective
_doc_EPM_penalty = raw"""
```math
f(x) + ρ\biggl( \sum_{i=1}^m \max\bigl\{0, g_i(x)\bigr\} + \sum_{j=1}^n \vert h_j(x)\vert\biggr),
```
where ``ρ>0`` is the penalty parameter.
Since this is non-smooth, a [`SmoothingTechnique`](@ref) with parameter `u` is applied,
see the [`ExactPenaltyCost`](@ref).
In every step ``k`` of the exact penalty method, the smoothed objective is then minimized over all
``x ∈\mathcal{M}``.
Then, the accuracy tolerance ``ϵ`` and the smoothing parameter ``u`` are updated by setting
"""

_doc_EMP_ϵ_update = raw"""
```math
ϵ^{(k)}=\max\{ϵ_{\min}, θ_ϵ ϵ^{(k-1)}\},
```
where ``ϵ_{\min}`` is the lowest value ``ϵ`` is allowed to become and ``θ_ϵ ∈ (0,1)`` is constant scaling factor, and
"""

```math
u^{(k)} = \max \{u_{\min}, \theta_u u^{(k-1)} \},
```
where ``u_{\min}`` is the lowest value ``u`` is allowed to become and ``θ_u ∈ (0,1)`` is constant scaling factor.
Finally, the penalty parameter ``ρ`` is updated as
_doc_EMP_ρ_update = raw"""
```math
ρ^{(k)} = \begin{cases}
ρ^{(k-1)}/θ_ρ, & \text{if } \displaystyle \max_{j ∈ \mathcal{E},i ∈ \mathcal{I}} \Bigl\{ \vert h_j(x^{(k)}) \vert, g_i(x^{(k)})\Bigr\} \geq u^{(k-1)} \Bigr) ,\\
Expand All @@ -163,26 +150,66 @@ Finally, the penalty parameter ``ρ`` is updated as
```
where ``θ_ρ ∈ (0,1)`` is a constant scaling factor.
"""
_doc_EMP_u_update = raw"""
```math
u^{(k)} = \max \{u_{\min}, \theta_u u^{(k-1)} \},
```
where ``u_{\min}`` is the lowest value ``u`` is allowed to become and ``θ_u ∈ (0,1)`` is constant scaling factor.
"""

_doc_EPM = """
exact_penalty_method(M, f, grad_f, p=rand(M); kwargs...)
exact_penalty_method(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...)
exact_penalty_method!(M, f, grad_f, p; kwargs...)
exact_penalty_method!(M, cmo::ConstrainedManifoldObjective, p; kwargs...)
perform the exact penalty method (EPM) [LiuBoumal:2019](@cite)
The aim of the EPM is to find a solution of the constrained optimisation task
$(_problem_constrained)
where `M` is a Riemannian manifold, and ``f``, ``$(_math_sequence("g", "i", "1", "n"))`` and ``$(_math_sequence("h", "j", "1", "m"))``
are twice continuously differentiable functions from `M` to ℝ.
For that a weighted ``L_1``-penalty term for the violation of the constraints is added to the objective
$(_doc_EPM_penalty)
Since this is non-smooth, a [`SmoothingTechnique`](@ref) with parameter `u` is applied,
see the [`ExactPenaltyCost`](@ref).
In every step ``k`` of the exact penalty method, the smoothed objective is then minimized over all ``p ∈$(_l_M)``.
Then, the accuracy tolerance ``ϵ`` and the smoothing parameter ``u`` are updated by setting
$(_doc_EMP_ϵ_update)
$(_doc_EMP_u_update)
Finally, the penalty parameter ``ρ`` is updated as
$(_doc_EMP_ρ_update)
# Input
* `M` a manifold ``\mathcal M``
* `f` a cost function ``f:\mathcal M→ℝ`` to minimize
* `grad_f` the gradient of the cost function
* $(_arg_M)
* $(_arg_f)
* $(_arg_grad_f)
* $(_arg_p)
# Optional (if not called with the [`ConstrainedManifoldObjective`](@ref) `cmo`)
# Keyword arguments
if not called with the [`ConstrainedManifoldObjective`](@ref) `cmo`
* `g=nothing`: the inequality constraints
* `h=nothing`: the equality constraints
* `grad_g=nothing`: the gradient of the inequality constraints
* `grad_h=nothing`: the gradient of the equality constraints
Note that one of the pairs (`g`, `grad_g`) or (`h`, `grad_h`) has to be provided.
Otherwise the problem is not constrained and you should consider using unconstrained solvers like [`quasi_Newton`](@ref).
Otherwise the problem is not constrained and a better solver would be for example [`quasi_Newton`](@ref).
# Optional
# Further keyword arguments
* `smoothing`: ([`LogarithmicSumOfExponentials`](@ref)) [`SmoothingTechnique`](@ref) to use
* `ϵ=1e–3`: the accuracy tolerance
* `ϵ_exponent=1/100`: exponent of the ϵ update factor;
* `ϵ_min=1e-6`: the lower bound for the accuracy tolerance
Expand All @@ -191,30 +218,36 @@ Otherwise the problem is not constrained and you should consider using unconstra
* `u_min=1e-6`: the lower bound for the smoothing parameter and threshold for violation of the constraints
* `ρ=1.0`: the penalty parameter
* `equality_constraints=nothing`: the number ``n`` of equality constraints.
* `gradient_range` (`nothing`, equivalent to [`NestedPowerRepresentation`](@extref) specify how gradients are represented
* `gradient_equality_range=gradient_range`: specify how the gradients of the equality constraints are represented
* `gradient_inequality_range=gradient_range`: specify how the gradients of the inequality constraints are represented
If not provided, a call to the gradient of `g` is performed to estimate these.
* `gradient_range=nothing`: specify how both gradients of the constraints are represented
* `gradient_equality_range=gradient_range`:
specify how gradients of the equality constraints are represented, see [`VectorGradientFunction`](@ref).
* `gradient_inequality_range=gradient_range`:
specify how gradients of the inequality constraints are represented, see [`VectorGradientFunction`](@ref).
* `inequality_constraints=nothing`: the number ``m`` of inequality constraints.
If not provided, a call to the gradient of `g` is performed to estimate these.
* `min_stepsize=1e-10`: the minimal step size
* `sub_cost`: ([`ExactPenaltyCost`](@ref)`(problem, ρ, u; smoothing=smoothing)`) use this exact penalty cost, especially with the same numbers `ρ,u` as in the options for the sub problem
* `sub_grad`: ([`ExactPenaltyGrad`](@ref)`(problem, ρ, u; smoothing=smoothing)`) use this exact penalty gradient, especially with the same numbers `ρ,u` as in the options for the sub problem
* `sub_kwargs=(;)`: keyword arguments to decorate the sub options, for example debug, that automatically respects the main solvers debug options (like sub-sampling) as well
* `sub_stopping_criterion`: ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(ϵ) | `[`StopWhenStepsizeLess`](@ref)`(1e-10)`) specify a stopping criterion for the subsolver.
* `sub_problem`: ([`DefaultManoptProblem`](@ref)`(M, `[`ManifoldGradientObjective`](@ref)`(sub_cost, sub_grad; evaluation=evaluation)`, provide a problem for the subsolver
* `sub_state`: ([`QuasiNewtonState`](@ref)) using [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) with [`InverseBFGS`](@ref) and `sub_stopping_criterion` as a stopping criterion. See also `sub_kwargs`.
* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(300)` | ([`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min)` & [`StopWhenChangeLess`](@ref)`(1e-10)`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop.
* `smoothing=`[`LogarithmicSumOfExponentials`](@ref): a [`SmoothingTechnique`](@ref) to use
* `sub_cost=`[`ExactPenaltyCost`](@ref)`(problem, ρ, u; smoothing=smoothing)`: cost to use in the sub solver
$(_kw_used_in("sub_problem"))
* `sub_grad=`[`ExactPenaltyGrad`](@ref)`(problem, ρ, u; smoothing=smoothing)`: gradient to use in the sub solver
$(_kw_used_in("sub_problem"))
* * $(_kw_sub_kwargs_default): $(_kw_sub_kwargs)
* `sub_stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(ϵ)`$(_sc_any)[`StopWhenStepsizeLess`](@ref)`(1e-10)`: a stopping cirterion for the sub solver
$(_kw_used_in("sub_state"))
* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, `[`ManifoldGradientObjective`](@ref)`(sub_cost, sub_grad; evaluation=evaluation)`: the problem for the subsolver. The objective can also be decorated with argumens from `sub_kwargs`.
* `sub_state=`[`QuasiNewtonState`](@ref)`(...)` a solver to use for the sub problem. By default an L-BFGS is used.
* `stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`$(_sc_any)` ( `[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min)`$(_sc_all)[`StopWhenChangeLess`](@ref)`(1e-10) )`: $(_kw_stopping_criterion)
For the `range`s of the constraints' gradient, other power manifold tangent space representations,
mainly the [`ArrayPowerRepresentation`](@extref Manifolds :jl:type:`Manifolds.ArrayPowerRepresentation`) can be used if the gradients can be computed more efficiently in that representation.
With `equality_constraints` and `inequality_constraints` you have to provide the dimension
of the ranges of `h` and `g`, respectively. If not provided, together with `M` and the start point `p0`,
a call to either of these is performed to try to infer these.
# Output
$(_kw_others)
the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details
$_doc_sec_output
"""

@doc "$(_doc_EPM)"
exact_penalty_method(M::AbstractManifold, args...; kwargs...)
function exact_penalty_method(M::AbstractManifold, f, grad_f; kwargs...)
return exact_penalty_method(M, f, grad_f, rand(M); kwargs...)
Expand Down Expand Up @@ -298,14 +331,7 @@ function exact_penalty_method(
return exact_penalty_method!(M, cmo, q; kwargs...)
end

@doc raw"""
exact_penalty_method!(M, f, grad_f, p; kwargs...)
exact_penalty_method!(M, cmo::ConstrainedManifoldObjective, p; kwargs...)
perform the exact penalty method (EPM) performed in place of `p`.
For all options, see [`exact_penalty_method`](@ref).
"""
@doc "$(_doc_EPM)"
exact_penalty_method!(M::AbstractManifold, args...; kwargs...)
function exact_penalty_method!(
M::AbstractManifold,
Expand Down

0 comments on commit f236171

Please sign in to comment.