Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

modified accelerator for variable timestep #341

Merged
merged 2 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ status = [
'format',
]
delete_merged_branches = true
timeout_sec = 3600
timeout_sec = 7200
block_labels = [ "do-not-merge-yet" ]
127 changes: 120 additions & 7 deletions src/Accelerators.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# included in EnsembleKalmanProcess.jl

export DefaultAccelerator, NesterovAccelerator
export DefaultAccelerator, NesterovAccelerator, ConstantStepNesterovAccelerator
export accelerate!, set_initial_acceleration!

"""
Expand All @@ -10,21 +10,52 @@ Default accelerator provides no acceleration, runs traditional EKI
"""
struct DefaultAccelerator <: Accelerator end



"""
$(TYPEDEF)

Accelerator that adapts Nesterov's momentum method for EKI.
Accelerator that adapts a Constant-timestep version of Nesterov's momentum method for EKI.
Stores a previous state value u_prev for computational purposes (note this is distinct from state returned as "ensemble value")

$(TYPEDFIELDS)
"""
mutable struct NesterovAccelerator{FT <: AbstractFloat} <: Accelerator
mutable struct ConstantStepNesterovAccelerator{FT <: AbstractFloat} <: Accelerator
r::FT
u_prev::Any
end

function NesterovAccelerator(r = 3.0, initial = Float64[])
return NesterovAccelerator(r, initial)
function ConstantStepNesterovAccelerator(r = 3.0, initial = Float64[])
return ConstantStepNesterovAccelerator(r, initial)
end


"""
Sets u_prev to the initial parameter values
"""
function set_ICs!(
accelerator::ConstantStepNesterovAccelerator{FT},
u::MA,
) where {FT <: AbstractFloat, MA <: AbstractMatrix}
accelerator.u_prev = u
end


"""
$(TYPEDEF)

Accelerator that adapts Nesterov's momentum method for EKI.
Stores a previous state value u_prev for computational purposes (note this is distinct from state returned as "ensemble value")

$(TYPEDFIELDS)
"""
mutable struct NesterovAccelerator{FT <: AbstractFloat} <: Accelerator
u_prev::Array{FT}
θ_prev::FT
end

function NesterovAccelerator(initial = Float64[])
return NesterovAccelerator(initial, 1.0)
end


Expand All @@ -48,17 +79,27 @@ end

"""
Performs state update with modified Nesterov momentum approach.
The dependence of the momentum parameter for variable timestep can be found e.g. here "https://www.damtp.cam.ac.uk/user/hf323/M19-OPT/lecture5.pdf"
"""
function accelerate!(
ekp::EnsembleKalmanProcess{FT, IT, P, LRS, NesterovAccelerator{FT}},
u::MA,
) where {FT <: AbstractFloat, IT <: Int, P <: Process, LRS <: LearningRateScheduler, MA <: AbstractMatrix}
## update "v" state:
k = get_N_iterations(ekp) + 2
v = u .+ (1 - ekp.accelerator.r / k) * (u .- ekp.accelerator.u_prev)
#v = u .+ 2 / (get_N_iterations(ekp) + 2) * (u .- ekp.accelerator.u_prev)
Δt_prev = length(ekp.Δt) == 1 ? 1 : ekp.Δt[end - 1]
Δt = ekp.Δt[end]
θ_prev = ekp.accelerator.θ_prev

# condition θ_prev^2 * (1 - θ) * Δt \leq Δt_prev * θ^2
a = sqrt(θ_prev^2 * Δt / Δt_prev)
θ = (-a + sqrt(a^2 + 4)) / 2

v = u .+ θ * (1 / θ_prev - 1) * (u .- ekp.accelerator.u_prev)

## update "u" state:
ekp.accelerator.u_prev = u
ekp.accelerator.θ_prev = θ

## push "v" state to EKP object
push!(ekp.u, DataContainer(v, data_are_columns = true))
Expand All @@ -67,13 +108,85 @@ end

"""
State update method for UKI with Nesterov Accelerator.
The dependence of the momentum parameter for variable timestep can be found e.g. here "https://www.damtp.cam.ac.uk/user/hf323/M19-OPT/lecture5.pdf"
Performs identical update as with other methods, but requires reconstruction of mean and covariance of the accelerated positions prior to saving.
"""
function accelerate!(
uki::EnsembleKalmanProcess{FT, IT, P, LRS, NesterovAccelerator{FT}},
u::AM,
) where {FT <: AbstractFloat, IT <: Int, P <: Unscented, LRS <: LearningRateScheduler, AM <: AbstractMatrix}

#identical update stage as before
## update "v" state:
Δt_prev = length(uki.Δt) == 1 ? 1 : uki.Δt[end - 1]
Δt = uki.Δt[end]
θ_prev = uki.accelerator.θ_prev


# condition θ_prev^2 * (1 - θ) * Δt \leq Δt_prev * θ^2
a = sqrt(θ_prev^2 * Δt / Δt_prev)
θ = (-a + sqrt(a^2 + 4)) / 2

v = u .+ θ * (1 / θ_prev - 1) * (u .- uki.accelerator.u_prev)

## update "u" state:
uki.accelerator.u_prev = u
uki.accelerator.θ_prev = θ

## push "v" state to UKI object
push!(uki.u, DataContainer(v, data_are_columns = true))

# additional complication: the stored "u_mean" and "uu_cov" are not the mean/cov of this ensemble
# the ensemble comes from the prediction operation acted upon this. we invert the prediction of the mean/cov of the sigma ensemble from u_mean/uu_cov
# u_mean = 1/alpha*(mean(v) - r) + r
# uu_cov = 1/alpha^2*(cov(v) - Σ_ω)
α_reg = uki.process.α_reg
r = uki.process.r
Σ_ω = uki.process.Σ_ω
Δt = uki.Δt[end]

v_mean = construct_mean(uki, v)
vv_cov = construct_cov(uki, v, v_mean)
u_mean = 1 / α_reg * (v_mean - r) + r
uu_cov = (1 / α_reg)^2 * (vv_cov - Σ_ω * Δt)

# overwrite the saved u_mean/uu_cov
uki.process.u_mean[end] = u_mean # N_ens x N_params
uki.process.uu_cov[end] = uu_cov # N_ens x N_data

end




"""
Performs state update with modified constant timestep Nesterov momentum approach.
"""
function accelerate!(
ekp::EnsembleKalmanProcess{FT, IT, P, LRS, ConstantStepNesterovAccelerator{FT}},
u::MA,
) where {FT <: AbstractFloat, IT <: Int, P <: Process, LRS <: LearningRateScheduler, MA <: AbstractMatrix}
## update "v" state:
k = get_N_iterations(ekp) + 2
v = u .+ (1 - ekp.accelerator.r / k) * (u .- ekp.accelerator.u_prev)

## update "u" state:
ekp.accelerator.u_prev = u

## push "v" state to EKP object
push!(ekp.u, DataContainer(v, data_are_columns = true))
end


"""
State update method for UKI with constant timestep Nesterov Accelerator.
Performs identical update as with other methods, but requires reconstruction of mean and covariance of the accelerated positions prior to saving.
"""
function accelerate!(
uki::EnsembleKalmanProcess{FT, IT, P, LRS, ConstantStepNesterovAccelerator{FT}},
u::AM,
) where {FT <: AbstractFloat, IT <: Int, P <: Unscented, LRS <: LearningRateScheduler, AM <: AbstractMatrix}

#identical update stage as before
## update "v" state:
k = get_N_iterations(uki) + 2
Expand Down
2 changes: 1 addition & 1 deletion src/EnsembleKalmanProcess.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ function EnsembleKalmanProcess(
end
AC = typeof(acc)

if AC <: NesterovAccelerator
if !(AC <: DefaultAccelerator)
set_ICs!(acc, params)
if P <: Sampler
@warn "Acceleration is experimental for Sampler processes and may affect convergence."
Expand Down
5 changes: 1 addition & 4 deletions src/UnscentedKalmanInversion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,7 @@ end

UKI prediction step : generate sigma points.
"""
function update_ensemble_prediction!(
process::Unscented,
Δt::FT,
) where {FT <: AbstractFloat, AV <: AbstractVector, AM <: AbstractMatrix}
function update_ensemble_prediction!(process::Unscented, Δt::FT) where {FT <: AbstractFloat}

process.iter += 1
# update evolution covariance matrix
Expand Down
50 changes: 42 additions & 8 deletions test/EnsembleKalmanProcess/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,29 @@ end
@testset "Accelerators" begin
# Get an inverse problem
y_obs, G, Γy, _ = inv_problems[end - 2] # additive noise inv problem (deterministic map)
inv_sqrt_Γy = sqrt(inv(Γy))

rng = Random.MersenneTwister(rng_seed)
N_ens_tmp = 5
initial_ensemble = EKP.construct_initial_ensemble(rng, prior, N_ens_tmp)

# build accelerated and non-accelerated processes
ekiobj = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion(), accelerator = NesterovAccelerator())
eksobj = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior), accelerator = NesterovAccelerator())
ekiobj_const = EKP.EnsembleKalmanProcess(
initial_ensemble,
y_obs,
Γy,
Inversion(),
accelerator = ConstantStepNesterovAccelerator(),
)
eksobj_const = EKP.EnsembleKalmanProcess(
initial_ensemble,
y_obs,
Γy,
Sampler(prior),
accelerator = ConstantStepNesterovAccelerator(),
)
ekiobj_noacc = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion())
eksobj_noacc = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior))
ekiobj_noacc_specified =
Expand All @@ -99,26 +115,45 @@ end
## test EKP object's accelerator type is consistent (EKP constructor reassigns object in some cases)
@test typeof(ekiobj.accelerator) <: NesterovAccelerator
@test typeof(eksobj.accelerator) <: NesterovAccelerator
@test typeof(ekiobj_const.accelerator) <: ConstantStepNesterovAccelerator
@test typeof(eksobj_const.accelerator) <: ConstantStepNesterovAccelerator
@test typeof(ekiobj_noacc.accelerator) <: DefaultAccelerator
@test typeof(eksobj_noacc.accelerator) <: DefaultAccelerator
@test typeof(ekiobj_noacc_specified.accelerator) <: DefaultAccelerator
@test typeof(eksobj_noacc_specified.accelerator) <: DefaultAccelerator

## test NesterovAccelerators satisfy desired ICs
@test ekiobj.accelerator.r ≈ 3.0
@test ekiobj.accelerator.u_prev == initial_ensemble
@test eksobj.accelerator.r ≈ 3.0
@test ekiobj.accelerator.θ_prev == 1.0
@test eksobj.accelerator.u_prev == initial_ensemble
@test eksobj.accelerator.θ_prev == 1.0

@test ekiobj_const.accelerator.r ≈ 3.0
@test ekiobj_const.accelerator.u_prev == initial_ensemble
@test eksobj_const.accelerator.r ≈ 3.0
@test eksobj_const.accelerator.u_prev == initial_ensemble

## test method convergence
# Note: this test only requires that the final ensemble is an improvement on the initial ensemble,
# NOT that the accelerated processes are more effective than the default, as this is not guaranteed.
# Specific cost values are printed to give an idea of acceleration.
processes = [Inversion(), TransformInversion(inv(Γy)), Unscented(prior; impose_prior = true), Sampler(prior)]
schedulers = [repeat([DefaultScheduler(0.1)], 3)..., EKSStableScheduler()]
processes = [
repeat([Inversion(), TransformInversion(inv(Γy)), Unscented(prior; impose_prior = true)], 2)...,
Sampler(prior),
]
schedulers = [
repeat([DefaultScheduler(0.1)], 3)..., # for constant timestep Nesterov
repeat([DataMisfitController(terminate_at = 100)], 3)..., # for general Nesterov
EKSStableScheduler(), # for general Nesterov
]
for (process, scheduler) in zip(processes, schedulers)
accelerators = [DefaultAccelerator(), NesterovAccelerator()]
N_iters = [5, 5, 5, 5]
if typeof(scheduler) <: DefaultScheduler
accelerators = [DefaultAccelerator(), ConstantStepNesterovAccelerator(), NesterovAccelerator()]
N_iters = [20, 20, 20]
else #don't test the constantstep accelerator with variable timesteppers
accelerators = [DefaultAccelerator(), NesterovAccelerator()]
N_iters = [20, 20]
end
init_means = []
final_means = []

Expand Down Expand Up @@ -160,9 +195,8 @@ end
push!(init_means, vec(mean(get_u_prior(ekpobj), dims = 2)))
push!(final_means, vec(mean(get_u_final(ekpobj), dims = 2)))

inv_sqrt_Γy = sqrt(inv(Γy))
cost_initial =
norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, init_means[end]))))
norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, initial_ensemble))))
cost_final =
norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, final_means[end]))))
@info "Convergence:" cost_initial cost_final
Expand Down
Loading