Difference Equations Updates (#140)

* In progress on difference equations updates * Bump DE version requirements * Updated unit tests * SGU passing tests * Bump versions * CI fix
HighDimensionalEconLab · Jun 11, 2022 · e61311b · e61311b · jlperla · Jun 11, 2022
1 parent 64228cd
commit e61311b
Show file tree

Hide file tree

Showing 16 changed files with 66 additions and 91 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -1,7 +1,11 @@
 name: CI
 on:
-  - push
-  - pull_request
+  pull_request:
+    branches:
+      - main
+  push:
+    branches:
+      - main
 jobs:
   test:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}

diff --git a/Project.toml b/Project.toml
@@ -1,12 +1,11 @@
 name = "DifferentiableStateSpaceModels"
 uuid = "beacd9db-9e5e-4956-9b09-459a4b2028df"
 authors = ["Jesse Perla <jesseperla@gmail.com> and contributors"]
-version = "0.4.16"
+version = "0.4.17"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 DifferenceEquations = "e0ca9c66-1f9e-11ec-127a-1304ce62169c"
-Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 GeneralizedSylvesterSolver = "3b00829b-373b-4a35-afa6-09f888b04ecd"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
@@ -17,7 +16,6 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 MatrixEquations = "99c1a7ee-ab34-5fd5-8076-27c950a045f4"
 MultivariatePolynomials = "102ac46a-7ee4-5c85-9060-abc95bfdeaa3"
 NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
-PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
 RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
@@ -27,8 +25,7 @@ Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
 
 [compat]
 ChainRulesCore = "1"
-DifferenceEquations = "0.4.12"
-Distributions = "0.25"
+DifferenceEquations = "0.4.16"
 DocStringExtensions = "0.8"
 MultivariatePolynomials = "0.4.4"
 GeneralizedSylvesterSolver = "0.1"
@@ -37,7 +34,6 @@ Latexify = "0.15"
 MacroTools = "0.5"
 MatrixEquations = "2"
 NLsolve = "4"
-PDMats = "0.11"
 Parameters = "0.12"
 RecursiveFactorization = "0.2"
 StructArrays = "0.6"

diff --git a/src/DifferentiableStateSpaceModels.jl b/src/DifferentiableStateSpaceModels.jl
@@ -1,7 +1,6 @@
 module DifferentiableStateSpaceModels
 
 using ChainRulesCore
-using Distributions
 using DocStringExtensions
 using GeneralizedSylvesterSolver
 using Logging
@@ -16,7 +15,6 @@ using RecursiveFactorization # Or perhaps MKL will dominate this on all hardware
 using StructArrays
 using Symbolics
 using SymbolicUtils
-using PDMats
 using DifferenceEquations
 # Flip off when not debugging
 # using Infiltrator

diff --git a/src/generate_perturbation.jl b/src/generate_perturbation.jl
@@ -266,23 +266,17 @@ function solve_first_order!(m, c, settings)
         if settings.calculate_ergodic_distribution
             try
                 # inplace wouldn't help since allocating for lyapd.  Can use lyapds! perhaps, but would need work and have low payoffs
-                c.V.mat .= lyapd(c.h_x, c.η_Σ_sq)
-                c.V.mat .+= transpose(c.V.mat)
-                lmul!(0.5, c.V.mat)
+                c.V .= lyapd(c.h_x, c.η_Σ_sq)
+                c.V .+= transpose(c.V)
+                lmul!(0.5, c.V)
 
                 # potentially perturb the covariance to try to get positive definite
                 if settings.perturb_covariance > 0.0
-                    c.V.mat .+= settings.perturb_covariance * I(size(c.V.mat, 1))  # perturb to ensure it is positive definite
+                    c.V .+= settings.perturb_covariance * I(size(c.V, 1))  # perturb to ensure it is positive definite
                 end
-                # Do inplace cholesky and catch error.  Note that Cholesky required for MvNormal construction regardless
-                copy!(c.V.chol.factors, c.V.mat) # copy over to the factors for the cholesky and do in place
-
-                # TODO: Later investigate using a pivoted cholesky instead (i.e. Val(true)) because otherwise matrices that are semi-definite will fail
-                cholesky!(c.V.chol.factors, Val(false);
-                          check = settings.check_posdef_cholesky) # inplace uses V_t with cholesky.  Now V[t]'s chol is upper-UpperTriangular
 
                 # check scale of diagonal to see if it was explosive
-                if settings.tol_cholesky > 0 && (norm(c.V.mat, Inf) > settings.tol_cholesky)
+                if settings.tol_cholesky > 0 && (norm(c.V, Inf) > settings.tol_cholesky)
                     throw(ErrorException("Failing on norm of covariance matrix"))
                 end
 

diff --git a/src/generate_perturbation_derivatives.jl b/src/generate_perturbation_derivatives.jl
@@ -157,7 +157,7 @@ function solve_first_order_p!(m, c, settings)
 
             # V derivatives
             if settings.calculate_ergodic_distribution
-                tmp = c.h_x_p[i] * c.V.mat * c.h_x'
+                tmp = c.h_x_p[i] * c.V * c.h_x'
                 c.V_p[i] .= lyapd(c.h_x, c.η * c.Σ_p[i] * c.η' + tmp + tmp')
             end
             # B derivatives
@@ -388,10 +388,11 @@ function ChainRulesCore.rrule(::typeof(generate_perturbation), m::PerturbationMo
                     Δp[i] += dot(c.C_1_p[i], Δsol.C)
                 end
             end
-            if (~isnothing(Δsol.x_ergodic))
-                if ((Δsol.x_ergodic != NoTangent()) & (Δsol.x_ergodic != ZeroTangent()))
+            if (~isnothing(Δsol.x_ergodic_var))
+                if ((Δsol.x_ergodic_var != NoTangent()) &
+                    (Δsol.x_ergodic_var != ZeroTangent()))
                     for i in 1:n_p_d
-                        Δp[i] += dot(c.V_p[i], Δsol.x_ergodic.Σ.mat)
+                        Δp[i] += dot(c.V_p[i], Δsol.x_ergodic_var)
                     end
                 end
             end
@@ -405,10 +406,10 @@ function ChainRulesCore.rrule(::typeof(generate_perturbation), m::PerturbationMo
                     Δp[i] += dot(c.B_p[i], Δsol.B)
                 end
             end
-            if (~isnothing(Δsol.D)) # D is a Distribution object which complicates stuff here
+            if (~isnothing(Δsol.D))
                 if ((Δsol.D != NoTangent()) & (Δsol.D != ZeroTangent()))
                     # Only supports diagonal matrices for now.
-                    ΔΩ = diag(Δsol.D.Σ) .* c.Ω * 2
+                    ΔΩ = Δsol.D .* c.Ω * 2
                     for i in 1:n_p_d
                         Δp[i] += dot(c.Ω_p[i], ΔΩ)
                     end
@@ -498,7 +499,7 @@ function ChainRulesCore.rrule(::typeof(generate_perturbation), m::PerturbationMo
             if (~isnothing(Δsol.D)) # D is a Distribution object which complicates stuff here
                 if ((Δsol.D != NoTangent()) & (Δsol.D != ZeroTangent()))
                     # Only supports diagonal matrices for now.
-                    ΔΩ = diag(Δsol.D.Σ) .* c.Ω * 2
+                    ΔΩ = Δsol.D .* c.Ω * 2
                     for i in 1:n_p_d
                         Δp[i] += dot(c.Ω_p[i], ΔΩ)
                     end

diff --git a/src/sequential.jl b/src/sequential.jl
@@ -1,12 +1,12 @@
 # Utilities for sequential solutions, likelihoods, etc.
-
 function DifferenceEquations.LinearStateSpaceProblem(sol::AbstractFirstOrderPerturbationSolution,
                                                      x0, tspan; observables = nothing,
                                                      noise = nothing,
-                                                     u0_prior = sol.x_ergodic)
+                                                     u0_prior_mean = zeros(size(sol.A, 1)),
+                                                     u0_prior_var = sol.x_ergodic_var)
     return LinearStateSpaceProblem(sol.A, sol.B, x0, tspan; sol.C,
                                    observables_noise = sol.D,
-                                   observables, noise, u0_prior)
+                                   observables, noise, u0_prior_mean, u0_prior_var)
 end
 
 function DifferenceEquations.QuadraticStateSpaceProblem(sol::AbstractSecondOrderPerturbationSolution,

diff --git a/src/types.jl b/src/types.jl
@@ -197,7 +197,7 @@ struct SolverCache{Order,ΩType,Ω_pType,QType,ηType,g_σσType,g_xxType} <:
     A_1_p::Vector{Matrix{Float64}}
     C_1::Matrix{Float64}
     C_1_p::Vector{Matrix{Float64}}
-    V::PDMats.PDMat{Float64,Matrix{Float64}}
+    V::Matrix{Float64}
     V_p::Vector{Matrix{Float64}}
     η_Σ_sq::Symmetric{Float64,Matrix{Float64}}
 
@@ -273,12 +273,7 @@ function SolverCache(::Val{Order}, ::Val{HasΩ}, N_p_d, N_y, N_x, N_ϵ, N_z, Q,
                        [zeros(N_x, N_x) for _ in 1:N_p_d],
                        zeros(N_z, N_x),
                        [zeros(N_z, N_x) for _ in 1:N_p_d],
-                       PDMat{Float64,Matrix{Float64}}(N_x,
-                                                      zeros(N_x, N_x),
-                                                      Cholesky{Float64,Matrix{Float64}}(zeros(N_x,
-                                                                                              N_x),
-                                                                                        'U',
-                                                                                        0)),
+                       zeros(N_x, N_x),
                        [zeros(N_x, N_x) for _ in 1:N_p_d],
                        Symmetric(zeros(N_x, N_x)),
 
@@ -362,7 +357,6 @@ Base.@kwdef struct PerturbationSolverSettings{T1,T2,T3,T4,T5,T6}
     tol_cholesky::Float64 = 1e9 # for checking norm of covariance matrix, etc.
     perturb_covariance::Float64 = 0.0 # perturb the covariance matrix to make positive-semi-definite (to numerical precision) into positive-definite
     singular_covariance_value::Float64 = 1e-12 # if not calculting the ergodic distribution, returns a nearly singular one.  Can't be exactly zero of cholesky fails
-    check_posdef_cholesky::Bool = false
     calculate_ergodic_distribution::Bool = true
     nlsolve_method::Symbol = :trust_region
     nlsolve_iterations::Int64 = 1000
@@ -393,11 +387,11 @@ abstract type AbstractSecondOrderPerturbationSolution <: AbstractPerturbationSol
 struct FirstOrderPerturbationSolution{T1<:AbstractVector,T2<:AbstractVector,
                                       T3<:AbstractMatrix,T4<:AbstractMatrix,
                                       T5<:AbstractMatrix,
-                                      T6<:Union{Nothing,Distribution},
+                                      T6<:Union{Nothing,AbstractVector},
                                       T7<:Union{Nothing,AbstractMatrix,
                                                 UniformScaling},
                                       T8<:AbstractMatrix,T9<:AbstractMatrix,
-                                      T10<:Distribution,T11<:AbstractMatrix} <:
+                                      T10<:AbstractMatrix,T11<:AbstractMatrix} <:
        AbstractFirstOrderPerturbationSolution
     retcode::Symbol
     x_symbols::Vector{Symbol}
@@ -420,12 +414,13 @@ struct FirstOrderPerturbationSolution{T1<:AbstractVector,T2<:AbstractVector,
     D::T6  # current a matrix or nothing, later could make more general
     Q::T7  # can be nothing
     η::T8
-    x_ergodic::T10
+    x_ergodic_var::T10
     Γ::T11
 end
 
-maybe_diagonal(x::AbstractVector) = MvNormal(Diagonal(abs2.(x)))
-maybe_diagonal(x) = x # otherwise, just return raw.  e.g. nothing
+# The "x" is the cholesky of the covariance matrix, so square it if just a vector
+make_covariance_matrix(x::AbstractVector) = abs2.(x)
+make_covariance_matrix(x) = x # otherwise, just return raw.  e.g. nothing
 
 function FirstOrderPerturbationSolution(retcode, m::PerturbationModel, c::SolverCache,
                                         settings)
@@ -446,21 +441,20 @@ function FirstOrderPerturbationSolution(retcode, m::PerturbationModel, c::Solver
                                           c.h_x,
                                           c.B,
                                           c.C_1,
-                                          maybe_diagonal(c.Ω),
+                                          make_covariance_matrix(c.Ω),
                                           c.Q,
                                           c.η,
                                           (settings.calculate_ergodic_distribution == true) ?
-                                          MvNormal(zeros(m.n_x), c.V) :
-                                          MvNormal(zeros(m.n_x),
-                                                   diagm(settings.singular_covariance_value *
-                                                         ones(m.n_x))),
+                                          c.V :
+                                          diagm(settings.singular_covariance_value *
+                                                ones(m.n_x)),
                                           c.Γ)
 end
 
 struct SecondOrderPerturbationSolution{T1<:AbstractVector,T2<:AbstractVector,
                                        T3<:AbstractMatrix,T4<:AbstractMatrix,
                                        T5<:AbstractMatrix,
-                                       T6<:Union{Nothing,Distribution},
+                                       T6<:Union{Nothing,AbstractVector},
                                        T7<:Union{Nothing,AbstractMatrix,
                                                  UniformScaling},
                                        T8<:AbstractMatrix,T9<:AbstractMatrix,
@@ -517,7 +511,7 @@ function SecondOrderPerturbationSolution(retcode, m::PerturbationModel, c::Solve
                                            c.x,
                                            c.g_x,
                                            c.B,
-                                           maybe_diagonal(c.Ω),
+                                           make_covariance_matrix(c.Ω),
                                            c.Q,
                                            c.η,
                                            c.Γ,

diff --git a/src/utils.jl b/src/utils.jl
@@ -51,15 +51,4 @@ end
 function fill_zeros!(x::AbstractMatrix)
     fill!(x, zero(eltype(x)))
     return nothing
-end
-
-function fill_zeros!(x::Cholesky)
-    fill_zeros!(x.factors)
-    return nothing
-end
-
-function fill_zeros!(x::PDMat)
-    fill_zeros!(x.chol)
-    fill_zeros!(x.mat)
-    return nothing
-end
+end
diff --git a/test/FVGQ20.jl b/test/FVGQ20.jl
@@ -26,16 +26,15 @@ p_f = (h = 0.97, δ = 0.025, ε = 10, ϕ = 0, γ2 = 0.001, Ω_ii = sqrt(1e-5),
        γR = 0.77, γy = 0.19, γΠ = 1.29, Πbar = 1.01, ρd = 0.12, ρφ = 0.93, ρg = 0.95,
        g_bar = 0.3, σ_A = exp(-3.97), σ_d = exp(-1.51), σ_φ = exp(-2.36), σ_μ = exp(-5.43),
        σ_m = exp(-5.85), σ_g = exp(-3.0), Λμ = 3.4e-3, ΛA = 2.8e-3)
-settings = PerturbationSolverSettings(; tol_cholesky = 1e9, check_posdef_cholesky = false)  # or zero
+settings = PerturbationSolverSettings(; tol_cholesky = 1e9)  # or zero
 
 test_rrule(Zygote.ZygoteRuleConfig(),
            (args...) -> test_first_order_smaller(args..., p_f, m_fvgq, settings), p_d;
            rrule_f = rrule_via_ad,
            check_inferred = false)
 
 # To examine intermediate values set breakpoints/etc. in the following code
-settings = PerturbationSolverSettings(; tol_cholesky = 1e9, check_posdef_cholesky = false,
-                                      print_level = 1)  # or zero
+settings = PerturbationSolverSettings(; tol_cholesky = 1e9, print_level = 1)  # or zero
 c = SolverCache(m_fvgq, Val(1), p_d)
 sol = generate_perturbation(m_fvgq, p_d, p_f; cache = c, settings)
 generate_perturbation_derivatives!(m_fvgq, p_d, p_f, c; settings)
@@ -163,7 +162,7 @@ test_rrule(Zygote.ZygoteRuleConfig(),
 # function test_first_order(p_d, p_f, m)
 #     sol = generate_perturbation(m, p_d, p_f)#, Val(1); cache = c, settings) # manually passing in order
 #     return sum(sol.y) + sum(sol.x) + sum(sol.A) + sum(sol.B) + sum(sol.C) +
-#            sum(cov(sol.D)) + sum(sol.x_ergodic.Σ.mat)
+#            sum(sol.D) + sum(sol.x_ergodic_var)
 # end
 # test_first_order(p_d, p_f, m_fvgq, settings)
 # gradient((args...) -> test_first_order(args..., p_f, m_fvgq, settings), p_d)

diff --git a/test/first_order_gradients.jl b/test/first_order_gradients.jl
@@ -8,13 +8,13 @@ using ChainRulesTestUtils
 function test_first_order(p_d, p_f, m)
     sol = generate_perturbation(m, p_d, p_f, Val(1))
     return sum(sol.y) + sum(sol.x) + sum(sol.A) + sum(sol.B) + sum(sol.C) +
-           sum(cov(sol.D)) + sum(sol.x_ergodic.Σ.mat) + sum(sol.g_x)
+           sum(sol.D) + sum(sol.x_ergodic_var) + sum(sol.g_x)
 end
 
 function test_first_order_cache(p_d, p_f, m, c)
     sol = generate_perturbation(m, p_d, p_f, Val(1); cache = c)
     return sum(sol.y) + sum(sol.x) + sum(sol.A) + sum(sol.B) + sum(sol.C) +
-           sum(cov(sol.D)) + sum(sol.x_ergodic.Σ.mat) + sum(sol.g_x)
+           sum(sol.D) + sum(sol.x_ergodic_var) + sum(sol.g_x)
 end
 
 # @testset "grad_tests with cache" begin
@@ -52,7 +52,7 @@ function test_first_order_cache_vec(p_d_vec, p_f, m, c)
     sol = generate_perturbation(m, (; α = p_d_vec[1], β = p_d_vec[2]), p_f, Val(1);
                                 cache = c)
     return sum(sol.y) + sum(sol.x) + sum(sol.A) + sum(sol.B) + sum(sol.C) +
-           sum(cov(sol.D)) + sum(sol.g_x) + sum(sol.x_ergodic.Σ.mat)
+           sum(sol.D) + sum(sol.g_x) + sum(sol.x_ergodic_var)
 end
 p_d_vec = [p_d.α, p_d.β]
 test_first_order_cache_vec(p_d_vec, p_f, m_grad, c)

diff --git a/test/first_order_perturbation.jl b/test/first_order_perturbation.jl
@@ -218,8 +218,8 @@ end
     @test all_fields_equal(c, sol, fields_to_compare)
     @test c.h_x ≈ sol.A
     @test c.C_1 ≈ sol.C
-    @test c.V ≈ sol.x_ergodic.Σ # Covariance matrix in MvNormal
-    @test c.Ω ≈ sqrt.(diag(sol.D.Σ))
+    @test c.V ≈ sol.x_ergodic_var # Covariance matrix in MvNormal
+    @test c.Ω ≈ sqrt.(sol.D)
 end
 
 @testset "Evaluate Derivatives into cache" begin
@@ -413,7 +413,7 @@ end
     @test c.g_x ≈ sol.g_x
     @test c.h_x ≈ sol.A
     @test c.B ≈ sol.B
-    @test c.Ω ≈ sqrt.(diag(sol.D.Σ))
+    @test c.Ω ≈ sqrt.(sol.D)
     @test c.Q ≈ sol.Q
     @test c.η ≈ sol.η
     @test sol.retcode == :Success
@@ -650,7 +650,7 @@ end
                                           calculate_ergodic_distribution = false)
     sol = generate_perturbation(m, p_d, p_f; settings, cache)
     @test sol.retcode == :Success
-    @test maximum(sol.x_ergodic.Σ.mat) ≈ settings.singular_covariance_value
+    @test maximum(sol.x_ergodic_var) ≈ settings.singular_covariance_value
 end
 
 @testset "Perturbing covariance matrix" begin
@@ -664,8 +664,8 @@ end
     sol = generate_perturbation(m, p_d, p_f; settings, cache)
     @test sol.retcode == :Success
 
-    @test sol.x_ergodic.Σ.mat ≈ [0.07005411173180152 0.00015997603451513398;
-                                 0.00015997603451513398 0.00010416666666666667]  # numerically the same as the version without perturbing the covariance
+    @test sol.x_ergodic_var ≈ [0.07005411173180152 0.00015997603451513398;
+                               0.00015997603451513398 0.00010416666666666667]  # numerically the same as the version without perturbing the covariance
 end
 
 @testset "Callbacks" begin