Skip to content

Commit

Permalink
Rename C_D to covariance in ESMDA API (#157)
Browse files Browse the repository at this point in the history
* renamed C_D to covariance in ESMDA public API

* fix notebook

* updated docstring
  • Loading branch information
tommyod authored Oct 18, 2023
1 parent 479c1cb commit f616c88
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 44 deletions.
12 changes: 7 additions & 5 deletions docs/source/Oscillator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# language: python
# name: python3
# ---

# %%
# ruff: noqa: E402
# %% [markdown]
# # Estimating parameters of an anharmonic oscillator
Expand Down Expand Up @@ -54,7 +56,7 @@
#
#

#%%
# %%
import numpy as np
from matplotlib import pyplot as plt
from scipy import stats
Expand Down Expand Up @@ -91,6 +93,7 @@ def plot_result(A, response_x_axis, title=None):
# %% [markdown]
# ## Setup


# %%
def generate_observations(K):
"""Run the model with true parameters, then generate observations."""
Expand Down Expand Up @@ -286,10 +289,9 @@ def iterative_smoother(A):

# %%
smoother = ies.ESMDA(
# Here C_D is a covariance matrix. If a 1D array is passed,
# it is interpreted as the diagonal of the covariance matrix,
# and NOT as a vector of standard deviations
C_D=observation_errors**2,
# If a 1D array is passed, it is interpreted
# as the diagonal of the covariance matrix.
covariance=observation_errors**2,
observations=observation_values,
# The inflation factors used in ESMDA
# They are scaled so that sum_i alpha_i^-1 = 1
Expand Down
55 changes: 33 additions & 22 deletions src/iterative_ensemble_smoother/esmda.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,18 @@ class ESMDA:
Parameters
----------
C_D : np.ndarray
Covariance matrix of outputs of shape (num_outputs, num_outputs).
If a 1D array is passed, it represents a diagonal covariance matrix.
covariance : npt.NDArray[np.double]
Either a 1D array of diagonal covariances, or a 2D covariance matrix.
The shape is either (num_observations,) or (num_observations, num_observations).
This is C_D in Emerick (2013), and represents observation or measurement
errors. We observe d from the real world, y from the model g(x), and
assume that d = y + e, where the error e is multivariate normal with
covariance given by `covariance`.
observations : np.ndarray
1D array of shape (num_inputs,) representing real-world observations.
1D array of shape (num_observations,) representing real-world observations.
This is d_obs in Emerick (2013).
alpha : int or 1D np.ndarray, optional
Multiplicative factor for the covariance.
If an integer `alpha` is given, an array with length `alpha` and
elements `alpha` is constructed. If an 1D array is given, it is
normalized so sum_i 1/alpha_i = 1 and used. The default is 5, which
Expand All @@ -58,12 +64,13 @@ class ESMDA:
The default is None.
inversion : str, optional
Which inversion method to use. The default is "exact".
See the dictionary ESMDA._inversion_methods for more information.
Examples
--------
>>> C_D = np.diag([1, 1, 1])
>>> covariance = np.diag([1, 1, 1])
>>> observations = np.array([1, 2, 3])
>>> esmda = ESMDA(C_D, observations)
>>> esmda = ESMDA(covariance, observations)
"""

Expand All @@ -76,24 +83,26 @@ class ESMDA:

def __init__(
self,
C_D: npt.NDArray[np.double],
covariance: npt.NDArray[np.double],
observations: npt.NDArray[np.double],
alpha: Union[int, npt.NDArray[np.double]] = 5,
seed: Union[np.random._generator.Generator, int, None] = None,
inversion: str = "exact",
) -> None:
# Validate inputs
if not (isinstance(C_D, np.ndarray) and C_D.ndim in (1, 2)):
raise TypeError("Argument `C_D` must be a NumPy array of dimension 1 or 2.")
if not (isinstance(covariance, np.ndarray) and covariance.ndim in (1, 2)):
raise TypeError(
"Argument `covariance` must be a NumPy array of dimension 1 or 2."
)

if C_D.ndim == 2 and C_D.shape[0] != C_D.shape[1]:
raise ValueError("Argument `C_D` must be square if it's 2D.")
if covariance.ndim == 2 and covariance.shape[0] != covariance.shape[1]:
raise ValueError("Argument `covariance` must be square if it's 2D.")

if not (isinstance(observations, np.ndarray) and observations.ndim == 1):
raise TypeError("Argument `observations` must be a 1D NumPy array.")

if not observations.shape[0] == C_D.shape[0]:
raise ValueError("Shapes of `observations` and `C_D` must match.")
if not observations.shape[0] == covariance.shape[0]:
raise ValueError("Shapes of `observations` and `covariance` must match.")

if not (
(isinstance(alpha, np.ndarray) and alpha.ndim == 1)
Expand Down Expand Up @@ -139,14 +148,14 @@ def __init__(
# If it's a full matrix, we gain speedup by only computing cholesky once
# If it's a diagonal, we gain speedup by never having to compute cholesky

if isinstance(C_D, np.ndarray) and C_D.ndim == 2:
self.C_D_L = sp.linalg.cholesky(C_D, lower=False)
elif isinstance(C_D, np.ndarray) and C_D.ndim == 1:
self.C_D_L = np.sqrt(C_D)
if isinstance(covariance, np.ndarray) and covariance.ndim == 2:
self.C_D_L = sp.linalg.cholesky(covariance, lower=False)
elif isinstance(covariance, np.ndarray) and covariance.ndim == 1:
self.C_D_L = np.sqrt(covariance)
else:
raise TypeError("Argument `C_D` must be 1D or 2D array")
raise TypeError("Argument `covariance` must be 1D or 2D array")

self.C_D = C_D
self.C_D = covariance
assert isinstance(self.C_D, np.ndarray) and self.C_D.ndim in (1, 2)

def num_assimilations(self) -> int:
Expand All @@ -162,14 +171,16 @@ def assimilate(
) -> npt.NDArray[np.double]:
"""Assimilate data and return an updated ensemble X_posterior.
num_parameters, ensemble_size
Parameters
----------
X : np.ndarray
2D array of shape (num_inputs, num_ensemble_members).
2D array of shape (num_parameters, ensemble_size).
Y : np.ndarray
2D array of shape (num_ouputs, num_ensemble_members).
2D array of shape (num_parameters, ensemble_size).
ensemble_mask : np.ndarray
1D boolean array of length `num_ensemble_members`, describing which
1D boolean array of length `ensemble_size`, describing which
ensemble members are active. Inactive realizations are ignored.
Defaults to all active.
overwrite : bool
Expand Down
40 changes: 23 additions & 17 deletions tests/test_esmda.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,24 @@ def test_that_diagonal_covariance_gives_same_answer_as_dense(self, seed, inversi
Y_prior = rng.normal(size=(num_outputs, num_ensemble))

# Measurement errors
C_D = np.exp(rng.normal(size=num_outputs))
covariance = np.exp(rng.normal(size=num_outputs))

# Observations
observations = rng.normal(size=num_outputs, loc=1)

esmda = ESMDA(C_D, observations, alpha=alpha, seed=seed, inversion=inversion)
esmda = ESMDA(
covariance, observations, alpha=alpha, seed=seed, inversion=inversion
)
X_posterior1 = np.copy(X_prior)
for _ in range(esmda.num_assimilations()):
X_posterior1 = esmda.assimilate(X_posterior1, Y_prior)

esmda = ESMDA(
np.diag(C_D), observations, alpha=alpha, seed=seed, inversion=inversion
np.diag(covariance),
observations,
alpha=alpha,
seed=seed,
inversion=inversion,
)
X_posterior2 = np.copy(X_prior)
for _ in range(esmda.num_assimilations()):
Expand Down Expand Up @@ -88,18 +94,18 @@ def G(X):
X_prior = rng.normal(size=(num_inputs, num_ensemble))

# Measurement errors
C_D = np.eye(num_outputs)
covariance = np.eye(num_outputs)

# The true inputs and observations, a result of running with X_true = 1
X_true = np.ones(shape=(num_inputs,))
observations = G(X_true)

# Prepare ESMDA instance running with lower number of ensemble members
esmda_subset = ESMDA(C_D, observations, alpha=alpha, seed=seed)
esmda_subset = ESMDA(covariance, observations, alpha=alpha, seed=seed)
X_i_subset = np.copy(X_prior[:, ensemble_mask])

# Prepare ESMDA instance running with all ensemble members
esmda_masked = ESMDA(C_D, observations, alpha=alpha, seed=seed)
esmda_masked = ESMDA(covariance, observations, alpha=alpha, seed=seed)
X_i = np.copy(X_prior)

# Run both
Expand Down Expand Up @@ -133,20 +139,20 @@ def G(X):
X_prior = rng.normal(size=(num_inputs, num_ensemble))

# Measurement errors
C_D = np.eye(num_outputs)
covariance = np.eye(num_outputs)

# The true inputs and observations, a result of running with N(1, 1)
X_true = rng.normal(size=(num_inputs,)) + 1
observations = G(X_true)

# Create ESMDA instance from an integer `alpha` and run it
esmda_integer = ESMDA(C_D, observations, alpha=5, seed=seed)
esmda_integer = ESMDA(covariance, observations, alpha=5, seed=seed)
X_i_int = np.copy(X_prior)
for _ in range(esmda_integer.num_assimilations()):
X_i_int = esmda_integer.assimilate(X_i_int, G(X_i_int))

# Create another ESMDA instance from a vector `alpha` and run it
esmda_array = ESMDA(C_D, observations, alpha=np.ones(5), seed=seed)
esmda_array = ESMDA(covariance, observations, alpha=np.ones(5), seed=seed)
X_i_array = np.copy(X_prior)
for _ in range(esmda_array.num_assimilations()):
X_i_array = esmda_array.assimilate(X_i_array, G(X_i_array))
Expand Down Expand Up @@ -355,21 +361,21 @@ def setup(self):
Y_prior = rng.normal(size=(num_outputs, num_ensemble))

# Measurement errors
C_D = np.exp(rng.normal(size=num_outputs))
covariance = np.exp(rng.normal(size=num_outputs))

# Observations
observations = rng.normal(size=num_outputs, loc=1)

return X_prior, Y_prior, C_D, observations
return X_prior, Y_prior, covariance, observations

@pytest.mark.limit_memory("138 MB")
def test_ESMDA_memory_usage_subspace_inversion(self, setup):
# TODO: Currently this is a regression test. Work to improve memory usage.

X_prior, Y_prior, C_D, observations = setup
X_prior, Y_prior, covariance, observations = setup

# Create ESMDA instance from an integer `alpha` and run it
esmda = ESMDA(C_D, observations, alpha=1, seed=1, inversion="subspace")
esmda = ESMDA(covariance, observations, alpha=1, seed=1, inversion="subspace")

for _ in range(esmda.num_assimilations()):
esmda.assimilate(X_prior, Y_prior)
Expand All @@ -393,21 +399,21 @@ def test_that_float_dtypes_are_preserved(self, inversion, dtype, diagonal):
Y_prior = rng.normal(size=(num_outputs, num_ensemble))

# Measurement errors
C_D = np.exp(rng.normal(size=num_outputs))
covariance = np.exp(rng.normal(size=num_outputs))
if not diagonal:
C_D = np.diag(C_D)
covariance = np.diag(covariance)

# Observations
observations = rng.normal(size=num_outputs, loc=1)

# Convert types
X_prior = X_prior.astype(dtype)
Y_prior = Y_prior.astype(dtype)
C_D = C_D.astype(dtype)
covariance = covariance.astype(dtype)
observations = observations.astype(dtype)

# Create ESMDA instance from an integer `alpha` and run it
esmda = ESMDA(C_D, observations, alpha=1, seed=1, inversion=inversion)
esmda = ESMDA(covariance, observations, alpha=1, seed=1, inversion=inversion)

for _ in range(esmda.num_assimilations()):
X_posterior = esmda.assimilate(X_prior, Y_prior)
Expand Down

0 comments on commit f616c88

Please sign in to comment.