Skip to content

Commit

Permalink
Merge branch 'rg/GNKI-docs' of github.com:CliMA/EnsembleKalmanProcess…
Browse files Browse the repository at this point in the history
…es.jl into rg/GNKI-docs
  • Loading branch information
rgjini committed Dec 5, 2024
2 parents 63f7f53 + 075d3b8 commit 3ca1653
Show file tree
Hide file tree
Showing 33 changed files with 1,426 additions and 362 deletions.
7 changes: 7 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
2 changes: 1 addition & 1 deletion .github/workflows/DocPreviewCleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout gh-pages branch
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
ref: gh-pages
- name: Delete preview and history + push changes
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/Docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ jobs:
timeout-minutes: 60
steps:
- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.9.1
uses: styfle/cancel-workflow-action@0.12.1
with:
access_token: ${{ github.token }}
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v1
with:
version: '1'
show-versioninfo: 'true'
- name: Cache artifacts
uses: actions/cache@v1
uses: actions/cache@v4
env:
cache-name: cache-artifacts
with:
Expand All @@ -37,5 +37,6 @@ jobs:
- name: Build and deploy
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
JULIA_DEBUG: Documenter
run: julia --color=yes --project=docs/ docs/make.jl
6 changes: 3 additions & 3 deletions .github/workflows/JuliaFormatter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ jobs:
timeout-minutes: 30
steps:
- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.9.1
uses: styfle/cancel-workflow-action@0.12.1
with:
access_token: ${{ github.token }}

- uses: actions/checkout@v2
- uses: actions/checkout@v4

- uses: dorny/paths-filter@v2.9.1
- uses: dorny/paths-filter@v3.0.2
id: filter
with:
filters: |
Expand Down
16 changes: 7 additions & 9 deletions .github/workflows/Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,23 @@ jobs:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
fail-fast: false #don't cancel all jobs if one fails
matrix:
version:
- '1.6' # Long-Term Support release
- 'lts' # Long-Term Support release
- '1' # Latest 1.x release of julia
os:
- ubuntu-latest
- windows-latest
- macOS-latest
arch:
- x64
steps:
- uses: styfle/cancel-workflow-action@0.9.1
- uses: styfle/cancel-workflow-action@0.12.1
with:
access_token: ${{ github.token }}
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- run: julia --project -e 'using Pkg; Pkg.update()' #windows in particular sometimes doesnt update packages
- uses: julia-actions/julia-buildpkg@v1

Expand All @@ -38,7 +36,7 @@ jobs:
LCOV.writefile("coverage-lcov.info", Codecov.process_folder())'
if: ${{ matrix.os == 'ubuntu-latest' }}
- name: Submit coverage
uses: codecov/codecov-action@v1
uses: codecov/codecov-action@v5
with:
token: ${{secrets.CODECOV_TOKEN}}
if: ${{ matrix.os == 'ubuntu-latest' }}
Expand Down
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ version = "2.0.1"
Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
FFMPEG = "c87230d0-a227-11e9-1b43-d7ebe4e7570a"
GaussianRandomFields = "e4b2fa32-6e09-5554-b718-106ed5adafe9"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Expand All @@ -26,6 +26,7 @@ TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
Convex = "0.15, 0.16"
Distributions = "0.24.14, 0.25"
DocStringExtensions = "0.8, 0.9"
FFMPEG = "0.4"
GaussianRandomFields = "2"
Interpolations = "0.13, 0.14, 0.15"
LinearAlgebra = "1"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ If you use the examples or code, please cite our article at JOSS in your publish
[joss-url]: https://joss.theoj.org/papers/5cb2d4c6af8840af61b44071ae1e672a

### Requirements
Julia version 1.6+
Julia LTS version or newer

# Quick links!

Expand All @@ -51,4 +51,4 @@ Julia version 1.6+
![eki-getting-started](https://github.com/CliMA/EnsembleKalmanProcesses.jl/assets/45243236/e083ab8c-4f93-432f-9ad5-97aff22764ad)
<!---
# Link to Miro for editing photo (ask haakon for access): https://miro.com/app/board/uXjVNm_1teY=/?share_link_id=329380184889
-->
-->
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ pages = [
"Learning rate schedulers" => "learning_rate_scheduler.md",
"Prior distributions" => "parameter_distributions.md",
"Observations and Minibatching" => "observations.md",
"Update Groups" => "update_groups.md",
"Localization and SEC" => "localization.md",
"Inflation" => "inflation.md",
"Parallelism and HPC" => "parallel_hpc.md",
Expand Down
101 changes: 101 additions & 0 deletions docs/src/update_groups.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# [Update Groups] (@id update-groups)

The `UpdateGroup` object facilitates blocked EKP updates, based on a provided updating a series user-defined pairs of parameters and data. This allows users to enforce any *known* (in)dependences between different groups of parameters during the update. For example,
```julia
# update parameter 1 with data 1 and 2
# update parameters 2 and 3 jointly with data 2, 3, and 4
Dict(
["parameter_1"] => ["data_1", "data_2"],
["parameter_2", "parameter_3"] => ["data_2", "data_3", "data_4"],
)
```
Construction and passing of this into the EnsembleKalmanProcesses is detailed below.

!!! note "This improves scaling at the cost of user-imposed structure"
As many of the `Process` updates scale say with ``d^\alpha``, in the data dimension ``d`` and ``\alpha > 1`` (super-linearly), update groups with ``K`` groups of equal size will improving this scaling to ``K (\frac{d}{K})^\alpha``.

## Recommended construction - shown by example

The key component to construct update groups starts with constructing the prior and the observations. Parameter distributions and observations may be constructed in units and given names, and these names are utilized to build the update groups with a convenient constructor `create_update_groups`.

For illustration, we take code snippets from the example found [here](https://github.com/CliMA/EnsembleKalmanProcesses.jl/blob/main/examples/UpdateGroups/). This example is concerned with learning several parameters in a coupled two-scale Lorenz 96 system:
```math
\begin{aligned}
\frac{\partial X_i}{\partial t} & = -X_{i-1}(X_{i-2} - X_{i+1}) - X_i - GY_i + F_1 + F_2\,\sin(2\pi t F_3)\\
\frac{\partial Y_i}{\partial t} & = -cbY_{i+1}(Y_{i+2} - Y_{i-1}) - cY_i + \frac{hc}{b} X_i
\end{aligned}
```
Parameters are learnt by fitting estimated moments of a realized `X` and `Y` system, to some target moments over a time interval.

We create a prior by combining several *named* `ParameterDistribution`s.
```julia
param_names = ["F", "G", "h", "c", "b"]

prior_F = ParameterDistribution(
Dict(
"name" => param_names[1],
"distribution" => Parameterized(MvNormal([1.0, 0.0, -2.0], I)),
"constraint" => repeat([bounded_below(0)], 3),
),
) # gives 3-D dist
prior_G = constrained_gaussian(param_names[2], 5.0, 4.0, 0, Inf)
prior_h = constrained_gaussian(param_names[3], 5.0, 4.0, 0, Inf)
prior_c = constrained_gaussian(param_names[4], 5.0, 4.0, 0, Inf)
prior_b = constrained_gaussian(param_names[5], 5.0, 4.0, 0, Inf)
priors = combine_distributions([prior_F, prior_G, prior_h, prior_c, prior_b])
```
Now we likewise construct observed moments by combining several *named* `Observation`s
```julia
# given a list of vector statistics y and their covariances Γ
data_block_names = ["<X>", "<Y>", "<X^2>", "<Y^2>", "<XY>"]

observation_vec = []
for i in 1:length(data_block_names)
push!(
observation_vec,
Observation(Dict(
"samples" => y[i],
"covariances" => Γ[i],
"names" => data_block_names[i]
)),
)
end
observation = combine_observations(observation_vec)
```
Finally, we are ready to define the update groups. We may specify our choice by partitioning the parameter names as keys of a dictionary, and their paired data names as values. Here we create two groups:
```julia
# update parameters F,G with data <X>, <X^2>, <XY>
# update parameters h, c, b with data <Y>, <Y^2>, <XY>
group_identifiers = Dict(
["F", "G"] => ["<X>", "<X^2>", "<XY>"],
["h", "c", "b"] => ["<Y>", "<Y^2>", "<XY>"],
)
```
We then create the update groups with our convenient constructor
```julia
update_groups = create_update_groups(prior, observation, group_identifiers)
```
and this can then be entered into the `EnsembleKalmanProcess` object as a keyword argument
```julia
# initial_params = construct_initial_ensemble(rng, priors, N_ens)
ekiobj = EnsembleKalmanProcess(
initial_params,
observation,
Inversion(),
update_groups = update_groups
)
```

## What happens internally?

We simply perform an independent `update_ensemble!` for each provided pairing and combine model output and updated parameters afterwards. Note that even without specifying an update group, by default EKP will always be construct one under-the-hood.



## Advice for constructing blocks
1. A parameter cannot appear in more than one block (i.e. parameters cannot be updated more than once)
2. The block structure is user-defined, and directly assumes that there is no correlation between blocks. It is up to the user to confirm if there truly is independence between different blocks. Otherwise convergence properties may suffer.
3. This can be used in conjunction with minibatching, so long as the defined data objects are available in all `Observation`s in the series.

!!! note "In future..."
In theory this opens up the possibility to have different configurations, or even processes, in different groups. This could be useful when parameter-data pairings are highly heterogeneous and so the user may wish to exploit, for example, the different processes scaling properties. However this has not yet been implemented.
52 changes: 43 additions & 9 deletions examples/Localization/localization_example_lorenz96.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ prior = combine_distributions(priors)

initial_ensemble = EKP.construct_initial_ensemble(rng, prior, N_ens)


# Solve problem without localization
ekiobj_vanilla = EKP.EnsembleKalmanProcess(initial_ensemble, y, Γ, Inversion(); rng = rng)
ekiobj_vanilla =
EKP.EnsembleKalmanProcess(initial_ensemble, y, Γ, Inversion(); rng = rng, scheduler = DefaultScheduler())
for i in 1:N_iter
g_ens_vanilla = G(get_ϕ_final(prior, ekiobj_vanilla))
EKP.update_ensemble!(ekiobj_vanilla, g_ens_vanilla, deterministic_forward_map = true)
Expand All @@ -91,6 +93,7 @@ ekiobj_inflated = EKP.EnsembleKalmanProcess(
Γ,
Inversion();
rng = rng,
scheduler = DefaultScheduler(),
# localization_method = BernoulliDropout(0.98),
)

Expand All @@ -108,7 +111,15 @@ end
@info "EKI (inflated) - complete"

# Test SEC
ekiobj_sec = EKP.EnsembleKalmanProcess(initial_ensemble, y, Γ, Inversion(); rng = rng, localization_method = SEC(1.0))
ekiobj_sec = EKP.EnsembleKalmanProcess(
initial_ensemble,
y,
Γ,
Inversion();
rng = rng,
localization_method = SEC(1.0),
scheduler = DefaultScheduler(),
)

for i in 1:N_iter
g_ens = G(get_ϕ_final(prior, ekiobj_sec))
Expand All @@ -117,8 +128,15 @@ end
@info "EKI (SEC) - complete"

# Test SEC with cutoff
ekiobj_sec_cutoff =
EKP.EnsembleKalmanProcess(initial_ensemble, y, Γ, Inversion(); rng = rng, localization_method = SEC(1.0, 0.1))
ekiobj_sec_cutoff = EKP.EnsembleKalmanProcess(
initial_ensemble,
y,
Γ,
Inversion();
rng = rng,
localization_method = SEC(1.0, 0.1),
scheduler = DefaultScheduler(),
)

for i in 1:N_iter
g_ens = G(get_ϕ_final(prior, ekiobj_sec_cutoff))
Expand All @@ -127,8 +145,15 @@ end
@info "EKI (SEC cut-off) - complete"

# Test SECFisher
ekiobj_sec_fisher =
EKP.EnsembleKalmanProcess(initial_ensemble, y, Γ, Inversion(); rng = rng, localization_method = SECFisher())
ekiobj_sec_fisher = EKP.EnsembleKalmanProcess(
initial_ensemble,
y,
Γ,
Inversion();
rng = rng,
localization_method = SECFisher(),
scheduler = DefaultScheduler(),
)

for i in 1:N_iter
g_ens = G(get_ϕ_final(prior, ekiobj_sec_fisher))
Expand All @@ -137,8 +162,15 @@ end
@info "EKI (SEC Fisher) - complete"

# Test SECNice
ekiobj_sec_nice =
EKP.EnsembleKalmanProcess(initial_ensemble, y, Γ, Inversion(); rng = rng, localization_method = SECNice())
ekiobj_sec_nice = EKP.EnsembleKalmanProcess(
initial_ensemble,
y,
Γ,
Inversion();
rng = rng,
localization_method = SECNice(),
scheduler = DefaultScheduler(),
)

for i in 1:N_iter
g_ens = G(get_ϕ_final(prior, ekiobj_sec_nice))
Expand All @@ -150,7 +182,9 @@ end
u_final = get_u_final(ekiobj_sec)
g_final = get_g_final(ekiobj_sec)
cov_est = cov([u_final; g_final], [u_final; g_final], dims = 2, corrected = false)
cov_localized = get_localizer(ekiobj_sec).localize(cov_est)
# need dimension args too
cov_localized =
get_localizer(ekiobj_sec).localize(cov_est, eltype(g_final), size(u_final, 1), size(g_final, 1), size(u_final, 2))

fig = plot(
get_error(ekiobj_vanilla),
Expand Down
7 changes: 7 additions & 0 deletions examples/UpdateGroups/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[deps]
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Loading

0 comments on commit 3ca1653

Please sign in to comment.