From ea5c26d613fe53515852eb82e981e00b8e70742d Mon Sep 17 00:00:00 2001 From: odunbar Date: Tue, 24 Oct 2023 10:15:41 -0700 Subject: [PATCH] modified accelerator for variable timestep simplify expression format rm typo in UKI that causes warning needs a sqrt remove r add ref into docstring re-add original nesterov scheme as ConstantStepNesterovAccelerator for comparisons --- src/Accelerators.jl | 124 +++++++++++++++++++++++-- src/EnsembleKalmanProcess.jl | 2 +- src/UnscentedKalmanInversion.jl | 5 +- test/EnsembleKalmanProcess/runtests.jl | 42 +++++++-- 4 files changed, 153 insertions(+), 20 deletions(-) diff --git a/src/Accelerators.jl b/src/Accelerators.jl index ab34b9d66..5a1d0c48b 100644 --- a/src/Accelerators.jl +++ b/src/Accelerators.jl @@ -1,6 +1,6 @@ # included in EnsembleKalmanProcess.jl -export DefaultAccelerator, NesterovAccelerator +export DefaultAccelerator, NesterovAccelerator, ConstantStepNesterovAccelerator export accelerate!, set_initial_acceleration! """ @@ -10,21 +10,49 @@ Default accelerator provides no acceleration, runs traditional EKI """ struct DefaultAccelerator <: Accelerator end + + """ $(TYPEDEF) -Accelerator that adapts Nesterov's momentum method for EKI. +Accelerator that adapts a Constant-timestep version of Nesterov's momentum method for EKI. Stores a previous state value u_prev for computational purposes (note this is distinct from state returned as "ensemble value") $(TYPEDFIELDS) """ -mutable struct NesterovAccelerator{FT <: AbstractFloat} <: Accelerator +mutable struct ConstantStepNesterovAccelerator{FT <: AbstractFloat} <: Accelerator r::FT u_prev::Any end -function NesterovAccelerator(r = 3.0, initial = Float64[]) - return NesterovAccelerator(r, initial) +function ConstantStepNesterovAccelerator(r = 3.0, initial = Float64[]) + return ConstantStepNesterovAccelerator(r, initial) +end + + +""" +Sets u_prev to the initial parameter values +""" +function set_ICs!(accelerator::ConstantStepNesterovAccelerator{FT}, u::MA) where {FT <: AbstractFloat, MA <: AbstractMatrix} + accelerator.u_prev = u +end + + +""" +$(TYPEDEF) + +Accelerator that adapts Nesterov's momentum method for EKI. +Stores a previous state value u_prev for computational purposes (note this is distinct from state returned as "ensemble value") + +$(TYPEDFIELDS) +""" +mutable struct NesterovAccelerator{FT <: AbstractFloat} <: Accelerator + u_prev::Array{FT} + θ_prev::FT +end + +function NesterovAccelerator(initial = Float64[]) + return NesterovAccelerator(initial, 1.0) end @@ -48,17 +76,27 @@ end """ Performs state update with modified Nesterov momentum approach. +The dependence of the momentum parameter for variable timestep can be found e.g. here "https://www.damtp.cam.ac.uk/user/hf323/M19-OPT/lecture5.pdf" """ function accelerate!( ekp::EnsembleKalmanProcess{FT, IT, P, LRS, NesterovAccelerator{FT}}, u::MA, ) where {FT <: AbstractFloat, IT <: Int, P <: Process, LRS <: LearningRateScheduler, MA <: AbstractMatrix} ## update "v" state: - k = get_N_iterations(ekp) + 2 - v = u .+ (1 - ekp.accelerator.r / k) * (u .- ekp.accelerator.u_prev) + #v = u .+ 2 / (get_N_iterations(ekp) + 2) * (u .- ekp.accelerator.u_prev) + Δt_prev = length(ekp.Δt) == 1 ? 1 : ekp.Δt[end - 1] + Δt = ekp.Δt[end] + θ_prev = ekp.accelerator.θ_prev + + # condition θ_prev^2 * (1 - θ) * Δt \leq Δt_prev * θ^2 + a = sqrt(θ_prev^2 * Δt / Δt_prev) + θ = (-a + sqrt(a^2 + 4)) / 2 + + v = u .+ θ * (1 / θ_prev - 1) * (u .- ekp.accelerator.u_prev) ## update "u" state: ekp.accelerator.u_prev = u + ekp.accelerator.θ_prev = θ ## push "v" state to EKP object push!(ekp.u, DataContainer(v, data_are_columns = true)) @@ -67,6 +105,7 @@ end """ State update method for UKI with Nesterov Accelerator. +The dependence of the momentum parameter for variable timestep can be found e.g. here "https://www.damtp.cam.ac.uk/user/hf323/M19-OPT/lecture5.pdf" Performs identical update as with other methods, but requires reconstruction of mean and covariance of the accelerated positions prior to saving. """ function accelerate!( @@ -74,6 +113,77 @@ function accelerate!( u::AM, ) where {FT <: AbstractFloat, IT <: Int, P <: Unscented, LRS <: LearningRateScheduler, AM <: AbstractMatrix} + #identical update stage as before + ## update "v" state: + Δt_prev = length(uki.Δt) == 1 ? 1 : uki.Δt[end - 1] + Δt = uki.Δt[end] + θ_prev = uki.accelerator.θ_prev + + + # condition θ_prev^2 * (1 - θ) * Δt \leq Δt_prev * θ^2 + a = sqrt(θ_prev^2 * Δt / Δt_prev) + θ = (-a + sqrt(a^2 + 4)) / 2 + + v = u .+ θ * (1 / θ_prev - 1) * (u .- uki.accelerator.u_prev) + + ## update "u" state: + uki.accelerator.u_prev = u + uki.accelerator.θ_prev = θ + + ## push "v" state to UKI object + push!(uki.u, DataContainer(v, data_are_columns = true)) + + # additional complication: the stored "u_mean" and "uu_cov" are not the mean/cov of this ensemble + # the ensemble comes from the prediction operation acted upon this. we invert the prediction of the mean/cov of the sigma ensemble from u_mean/uu_cov + # u_mean = 1/alpha*(mean(v) - r) + r + # uu_cov = 1/alpha^2*(cov(v) - Σ_ω) + α_reg = uki.process.α_reg + r = uki.process.r + Σ_ω = uki.process.Σ_ω + Δt = uki.Δt[end] + + v_mean = construct_mean(uki, v) + vv_cov = construct_cov(uki, v, v_mean) + u_mean = 1 / α_reg * (v_mean - r) + r + uu_cov = (1 / α_reg)^2 * (vv_cov - Σ_ω * Δt) + + # overwrite the saved u_mean/uu_cov + uki.process.u_mean[end] = u_mean # N_ens x N_params + uki.process.uu_cov[end] = uu_cov # N_ens x N_data + +end + + + + +""" +Performs state update with modified constant timestep Nesterov momentum approach. +""" +function accelerate!( + ekp::EnsembleKalmanProcess{FT, IT, P, LRS, ConstantStepNesterovAccelerator{FT}}, + u::MA, +) where {FT <: AbstractFloat, IT <: Int, P <: Process, LRS <: LearningRateScheduler, MA <: AbstractMatrix} + ## update "v" state: + k = get_N_iterations(ekp) + 2 + v = u .+ (1 - ekp.accelerator.r / k) * (u .- ekp.accelerator.u_prev) + + ## update "u" state: + ekp.accelerator.u_prev = u + + ## push "v" state to EKP object + push!(ekp.u, DataContainer(v, data_are_columns = true)) +end + + +""" +State update method for UKI with constant timestep Nesterov Accelerator. +Performs identical update as with other methods, but requires reconstruction of mean and covariance of the accelerated positions prior to saving. +""" +function accelerate!( + uki::EnsembleKalmanProcess{FT, IT, P, LRS, ConstantStepNesterovAccelerator{FT}}, + u::AM, +) where {FT <: AbstractFloat, IT <: Int, P <: Unscented, LRS <: LearningRateScheduler, AM <: AbstractMatrix} + #identical update stage as before ## update "v" state: k = get_N_iterations(uki) + 2 diff --git a/src/EnsembleKalmanProcess.jl b/src/EnsembleKalmanProcess.jl index e885ee8df..5c74002d5 100644 --- a/src/EnsembleKalmanProcess.jl +++ b/src/EnsembleKalmanProcess.jl @@ -214,7 +214,7 @@ function EnsembleKalmanProcess( end AC = typeof(acc) - if AC <: NesterovAccelerator + if !(AC <: DefaultAccelerator) set_ICs!(acc, params) if P <: Sampler @warn "Acceleration is experimental for Sampler processes and may affect convergence." diff --git a/src/UnscentedKalmanInversion.jl b/src/UnscentedKalmanInversion.jl index 3705fb21a..3e9bc4738 100644 --- a/src/UnscentedKalmanInversion.jl +++ b/src/UnscentedKalmanInversion.jl @@ -551,10 +551,7 @@ end UKI prediction step : generate sigma points. """ -function update_ensemble_prediction!( - process::Unscented, - Δt::FT, -) where {FT <: AbstractFloat, AV <: AbstractVector, AM <: AbstractMatrix} +function update_ensemble_prediction!(process::Unscented, Δt::FT) where {FT <: AbstractFloat} process.iter += 1 # update evolution covariance matrix diff --git a/test/EnsembleKalmanProcess/runtests.jl b/test/EnsembleKalmanProcess/runtests.jl index e60044d72..5a82f8cfc 100644 --- a/test/EnsembleKalmanProcess/runtests.jl +++ b/test/EnsembleKalmanProcess/runtests.jl @@ -82,6 +82,8 @@ end @testset "Accelerators" begin # Get an inverse problem y_obs, G, Γy, _ = inv_problems[end - 2] # additive noise inv problem (deterministic map) + inv_sqrt_Γy = sqrt(inv(Γy)) + rng = Random.MersenneTwister(rng_seed) N_ens_tmp = 5 initial_ensemble = EKP.construct_initial_ensemble(rng, prior, N_ens_tmp) @@ -89,6 +91,8 @@ end # build accelerated and non-accelerated processes ekiobj = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion(), accelerator = NesterovAccelerator()) eksobj = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior), accelerator = NesterovAccelerator()) + ekiobj_const = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion(), accelerator = ConstantStepNesterovAccelerator()) + eksobj_const = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior), accelerator = ConstantStepNesterovAccelerator()) ekiobj_noacc = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Inversion()) eksobj_noacc = EKP.EnsembleKalmanProcess(initial_ensemble, y_obs, Γy, Sampler(prior)) ekiobj_noacc_specified = @@ -99,26 +103,49 @@ end ## test EKP object's accelerator type is consistent (EKP constructor reassigns object in some cases) @test typeof(ekiobj.accelerator) <: NesterovAccelerator @test typeof(eksobj.accelerator) <: NesterovAccelerator + @test typeof(ekiobj_const.accelerator) <: ConstantStepNesterovAccelerator + @test typeof(eksobj_const.accelerator) <: ConstantStepNesterovAccelerator @test typeof(ekiobj_noacc.accelerator) <: DefaultAccelerator @test typeof(eksobj_noacc.accelerator) <: DefaultAccelerator @test typeof(ekiobj_noacc_specified.accelerator) <: DefaultAccelerator @test typeof(eksobj_noacc_specified.accelerator) <: DefaultAccelerator ## test NesterovAccelerators satisfy desired ICs - @test ekiobj.accelerator.r ≈ 3.0 @test ekiobj.accelerator.u_prev == initial_ensemble - @test eksobj.accelerator.r ≈ 3.0 + @test ekiobj.accelerator.θ_prev == 1.0 @test eksobj.accelerator.u_prev == initial_ensemble + @test eksobj.accelerator.θ_prev == 1.0 + @test ekiobj_const.accelerator.r ≈ 3.0 + @test ekiobj_const.accelerator.u_prev == initial_ensemble + @test eksobj_const.accelerator.r ≈ 3.0 + @test eksobj_const.accelerator.u_prev == initial_ensemble + ## test method convergence # Note: this test only requires that the final ensemble is an improvement on the initial ensemble, # NOT that the accelerated processes are more effective than the default, as this is not guaranteed. # Specific cost values are printed to give an idea of acceleration. - processes = [Inversion(), TransformInversion(inv(Γy)), Unscented(prior; impose_prior = true), Sampler(prior)] - schedulers = [repeat([DefaultScheduler(0.1)], 3)..., EKSStableScheduler()] + processes = [ + repeat([ + Inversion(), + TransformInversion(inv(Γy)), + Unscented(prior; impose_prior = true), + ],2)..., + Sampler(prior), + ] + schedulers = [ + repeat([DefaultScheduler(0.1)], 3)..., # for constant timestep Nesterov + repeat([DataMisfitController(terminate_at = 100)], 3)..., # for general Nesterov + EKSStableScheduler(), # for general Nesterov + ] for (process, scheduler) in zip(processes, schedulers) - accelerators = [DefaultAccelerator(), NesterovAccelerator()] - N_iters = [5, 5, 5, 5] + if typeof(scheduler) <: DefaultScheduler + accelerators = [DefaultAccelerator(), ConstantStepNesterovAccelerator(), NesterovAccelerator()] + N_iters = [20, 20, 20] + else #don't test the constantstep accelerator with variable timesteppers + accelerators = [DefaultAccelerator(), NesterovAccelerator()] + N_iters = [20, 20] + end init_means = [] final_means = [] @@ -160,9 +187,8 @@ end push!(init_means, vec(mean(get_u_prior(ekpobj), dims = 2))) push!(final_means, vec(mean(get_u_final(ekpobj), dims = 2))) - inv_sqrt_Γy = sqrt(inv(Γy)) cost_initial = - norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, init_means[end])))) + norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, initial_ensemble)))) cost_final = norm(inv_sqrt_Γy * (y_obs .- G(transform_unconstrained_to_constrained(prior, final_means[end])))) @info "Convergence:" cost_initial cost_final