Skip to content

Commit

Permalink
Adaptive localization with AdaptiveESMDA (#168)
Browse files Browse the repository at this point in the history
Added AdaptiveESMDA class in experimental.py.

* full implementation with dying realizations
* generalized to more parameter groups
* base case for ESMDA. tests for adaptive localization
* added masking to notebook after discussion with team
* added groupby_indices function
* updated code with masking of Y. added tests
* added test for ESMDA vs. SIES with one iteration
* simplified perturb_observations() argument from 'size' to 'ensemble_size'
* run test_that_ESMDA_and_SIES_produce_same_result_with_one_step() with 10 seeds
* moved example for adaptive localization to a test
* doctests. renaming method. docstring for assimilate()
* compute cov(Y, Y) once, then index on it
* review nitpick + correction_threshold arg can now be float
* simplified code using np.array_split

---------

Co-authored-by: Tommy Odland <tommy.odland>
Co-authored-by: Feda Curic <feda.curic@gmail.com>
  • Loading branch information
tommyod and dafeda authored Nov 28, 2023
1 parent b62cb7a commit 32f436b
Show file tree
Hide file tree
Showing 4 changed files with 808 additions and 84 deletions.
179 changes: 98 additions & 81 deletions src/iterative_ensemble_smoother/esmda.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
"""

import numbers
from typing import Tuple, Union
from abc import ABC
from typing import Union

import numpy as np
import numpy.typing as npt
Expand All @@ -34,9 +35,96 @@
inversion_subspace,
normalize_alpha,
)
from iterative_ensemble_smoother.utils import sample_mvnormal


class ESMDA:
class BaseESMDA(ABC):
def __init__(
self,
covariance: npt.NDArray[np.double],
observations: npt.NDArray[np.double],
seed: Union[np.random._generator.Generator, int, None] = None,
) -> None:
"""Initialize the instance."""
# Validate inputs
if not (isinstance(covariance, np.ndarray) and covariance.ndim in (1, 2)):
raise TypeError(
"Argument `covariance` must be a NumPy array of dimension 1 or 2."
)

if covariance.ndim == 2 and covariance.shape[0] != covariance.shape[1]:
raise ValueError("Argument `covariance` must be square if it's 2D.")

if not (isinstance(observations, np.ndarray) and observations.ndim == 1):
raise TypeError("Argument `observations` must be a 1D NumPy array.")

if not observations.shape[0] == covariance.shape[0]:
raise ValueError("Shapes of `observations` and `covariance` must match.")

if not (
isinstance(seed, (int, np.random._generator.Generator)) or seed is None
):
raise TypeError(
"Argument `seed` must be an integer "
"or numpy.random._generator.Generator."
)

# Store data
self.observations = observations
self.iteration = 0
self.rng = np.random.default_rng(seed)

# Only compute the covariance factorization once
# If it's a full matrix, we gain speedup by only computing cholesky once
# If it's a diagonal, we gain speedup by never having to compute cholesky
if isinstance(covariance, np.ndarray) and covariance.ndim == 2:
self.C_D_L = sp.linalg.cholesky(covariance, lower=False)
elif isinstance(covariance, np.ndarray) and covariance.ndim == 1:
self.C_D_L = np.sqrt(covariance)
else:
raise TypeError("Argument `covariance` must be 1D or 2D array")

self.C_D = covariance
assert isinstance(self.C_D, np.ndarray) and self.C_D.ndim in (1, 2)

def perturb_observations(
self, *, ensemble_size: int, alpha: float
) -> npt.NDArray[np.double]:
"""Create a matrix D with perturbed observations.
In the Emerick (2013) paper, the matrix D is defined in section 6.
See section 2(b) of the ES-MDA algorithm in the paper.
Parameters
----------
ensemble_size : int
The ensemble size, i.e., the number of columns in the returned array,
which is of shape (num_observations, ensemble_size).
alpha : float
The covariance inflation factor. The sequence of alphas should
obey the equation sum_i (1/alpha_i) = 1. However, this is NOT enforced
in this method call. The user/caller is responsible for this.
Returns
-------
D : np.ndarray
Each column consists of perturbed observations, scaled by alpha.
"""
# Draw samples from zero-centered multivariate normal with cov=alpha * C_D,
# and add them to the observations. Notice that
# if C_D = L @ L.T by the cholesky factorization, then drawing y from
# a zero cented normal means that y := L @ z, where z ~ norm(0, 1).
# Therefore, scaling C_D by alpha is equivalent to scaling L with sqrt(alpha).

D: npt.NDArray[np.double] = self.observations[:, np.newaxis] + np.sqrt(
alpha
) * sample_mvnormal(C_dd_cholesky=self.C_D_L, rng=self.rng, size=ensemble_size)
assert D.shape == (len(self.observations), ensemble_size)
return D


class ESMDA(BaseESMDA):
"""
Implement an Ensemble Smoother with Multiple Data Assimilation (ES-MDA).
Expand Down Expand Up @@ -92,35 +180,15 @@ def __init__(
inversion: str = "exact",
) -> None:
"""Initialize the instance."""
# Validate inputs
if not (isinstance(covariance, np.ndarray) and covariance.ndim in (1, 2)):
raise TypeError(
"Argument `covariance` must be a NumPy array of dimension 1 or 2."
)

if covariance.ndim == 2 and covariance.shape[0] != covariance.shape[1]:
raise ValueError("Argument `covariance` must be square if it's 2D.")

if not (isinstance(observations, np.ndarray) and observations.ndim == 1):
raise TypeError("Argument `observations` must be a 1D NumPy array.")

if not observations.shape[0] == covariance.shape[0]:
raise ValueError("Shapes of `observations` and `covariance` must match.")
super().__init__(covariance=covariance, observations=observations, seed=seed)

if not (
(isinstance(alpha, np.ndarray) and alpha.ndim == 1)
or isinstance(alpha, numbers.Integral)
):
raise TypeError("Argument `alpha` must be an integer or a 1D NumPy array.")

if not (
isinstance(seed, (int, np.random._generator.Generator)) or seed is None
):
raise TypeError(
"Argument `seed` must be an integer "
"or numpy.random._generator.Generator."
)

if not isinstance(inversion, str):
raise TypeError(
"Argument `inversion` must be a string in "
Expand All @@ -133,9 +201,6 @@ def __init__(
)

# Store data
self.observations = observations
self.iteration = 0
self.rng = np.random.default_rng(seed)
self.inversion = inversion

# Alpha can either be an integer (num iterations) or a list of weights
Expand All @@ -147,19 +212,6 @@ def __init__(
else:
raise TypeError("Alpha must be integer or 1D array.")

# Only compute the covariance factorization once
# If it's a full matrix, we gain speedup by only computing cholesky once
# If it's a diagonal, we gain speedup by never having to compute cholesky

if isinstance(covariance, np.ndarray) and covariance.ndim == 2:
self.C_D_L = sp.linalg.cholesky(covariance, lower=False)
elif isinstance(covariance, np.ndarray) and covariance.ndim == 1:
self.C_D_L = np.sqrt(covariance)
else:
raise TypeError("Argument `covariance` must be 1D or 2D array")

self.C_D = covariance

def num_assimilations(self) -> int:
return len(self.alpha)

Expand All @@ -173,6 +225,8 @@ def assimilate(
) -> npt.NDArray[np.double]:
"""Assimilate data and return an updated ensemble X_posterior.
X_posterior = smoother.assimilate(X, Y)
Parameters
----------
X : np.ndarray
Expand All @@ -195,7 +249,7 @@ def assimilate(
Returns
-------
X_posterior : np.ndarray
2D array of shape (num_parameters, num_ensemble_members).
2D array of shape (num_parameters, ensemble_size).
"""
if self.iteration >= self.num_assimilations():
Expand Down Expand Up @@ -225,8 +279,9 @@ def assimilate(
# if C_D = L L.T by the cholesky factorization, then drawing y from
# a zero cented normal means that y := L @ z, where z ~ norm(0, 1)
# Therefore, scaling C_D by alpha is equivalent to scaling L with sqrt(alpha)
size = (num_outputs, num_ensemble)
D = self.perturb_observations(size=size)
D = self.perturb_observations(
ensemble_size=num_ensemble, alpha=self.alpha[self.iteration]
)
assert D.shape == (num_outputs, num_ensemble)

# Line 2 (c) in the description of ES-MDA in the 2013 Emerick paper
Expand Down Expand Up @@ -290,7 +345,7 @@ def compute_transition_matrix(
# or
# X += X @ T

D = self.perturb_observations(size=Y.shape)
D = self.perturb_observations(ensemble_size=Y.shape[1], alpha=alpha)
inversion_func = self._inversion_methods[self.inversion]
return inversion_func(
alpha=alpha,
Expand All @@ -302,44 +357,6 @@ def compute_transition_matrix(
return_T=True, # Ensures that we don't need X
)

def perturb_observations(self, *, size: Tuple[int, int]) -> npt.NDArray[np.double]:
"""Create a matrix D with perturbed observations.
In the Emerick (2013) paper, the matrix D is defined in section 6.
See section 2(b) of the ES-MDA algorithm in the paper.
Parameters
----------
size : Tuple[int, int]
The size, a tuple with (num_observations, ensemble_size).
Returns
-------
D : np.ndarray
Each column consists of perturbed observations, scaled by alpha.
"""
# Draw samples from zero-centered multivariate normal with cov=alpha * C_D,
# and add them to the observations. Notice that
# if C_D = L @ L.T by the cholesky factorization, then drawing y from
# a zero cented normal means that y := L @ z, where z ~ norm(0, 1).
# Therefore, scaling C_D by alpha is equivalent to scaling L with sqrt(alpha).

D: npt.NDArray[np.double]

# Two cases, depending on whether C_D was given as 1D or 2D array
if self.C_D.ndim == 2:
D = self.observations.reshape(-1, 1) + np.sqrt(
self.alpha[self.iteration]
) * self.C_D_L @ self.rng.normal(size=size)
else:
D = self.observations.reshape(-1, 1) + np.sqrt(
self.alpha[self.iteration]
) * self.rng.normal(size=size) * self.C_D_L.reshape(-1, 1)
assert D.shape == size

return D


if __name__ == "__main__":
import pytest
Expand Down
Loading

0 comments on commit 32f436b

Please sign in to comment.