Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename C_D to covariance in ESMDA API #157

Merged
merged 3 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions docs/source/Oscillator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# language: python
# name: python3
# ---

# %%
# ruff: noqa: E402
# %% [markdown]
# # Estimating parameters of an anharmonic oscillator
Expand Down Expand Up @@ -54,7 +56,7 @@
#
#

#%%
# %%
import numpy as np
from matplotlib import pyplot as plt
from scipy import stats
Expand Down Expand Up @@ -91,6 +93,7 @@ def plot_result(A, response_x_axis, title=None):
# %% [markdown]
# ## Setup


# %%
def generate_observations(K):
"""Run the model with true parameters, then generate observations."""
Expand Down Expand Up @@ -286,10 +289,9 @@ def iterative_smoother(A):

# %%
smoother = ies.ESMDA(
# Here C_D is a covariance matrix. If a 1D array is passed,
# it is interpreted as the diagonal of the covariance matrix,
# and NOT as a vector of standard deviations
C_D=observation_errors**2,
# If a 1D array is passed, it is interpreted
# as the diagonal of the covariance matrix.
covariance=observation_errors**2,
observations=observation_values,
# The inflation factors used in ESMDA
# They are scaled so that sum_i alpha_i^-1 = 1
Expand Down
55 changes: 33 additions & 22 deletions src/iterative_ensemble_smoother/esmda.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,18 @@ class ESMDA:

Parameters
----------
C_D : np.ndarray
Covariance matrix of outputs of shape (num_outputs, num_outputs).
If a 1D array is passed, it represents a diagonal covariance matrix.
covariance : npt.NDArray[np.double]
Either a 1D array of diagonal covariances, or a 2D covariance matrix.
The shape is either (num_observations,) or (num_observations, num_observations).
This is C_D in Emerick (2013), and represents observation or measurement
errors. We observe d from the real world, y from the model g(x), and
assume that d = y + e, where the error e is multivariate normal with
covariance given by `covariance`.
observations : np.ndarray
1D array of shape (num_inputs,) representing real-world observations.
1D array of shape (num_observations,) representing real-world observations.
This is d_obs in Emerick (2013).
alpha : int or 1D np.ndarray, optional
Multiplicative factor for the covariance.
If an integer `alpha` is given, an array with length `alpha` and
elements `alpha` is constructed. If an 1D array is given, it is
normalized so sum_i 1/alpha_i = 1 and used. The default is 5, which
Expand All @@ -58,12 +64,13 @@ class ESMDA:
The default is None.
inversion : str, optional
Which inversion method to use. The default is "exact".
See the dictionary ESMDA._inversion_methods for more information.

Examples
--------
>>> C_D = np.diag([1, 1, 1])
>>> covariance = np.diag([1, 1, 1])
>>> observations = np.array([1, 2, 3])
>>> esmda = ESMDA(C_D, observations)
>>> esmda = ESMDA(covariance, observations)

"""

Expand All @@ -76,24 +83,26 @@ class ESMDA:

def __init__(
self,
C_D: npt.NDArray[np.double],
covariance: npt.NDArray[np.double],
observations: npt.NDArray[np.double],
alpha: Union[int, npt.NDArray[np.double]] = 5,
seed: Union[np.random._generator.Generator, int, None] = None,
inversion: str = "exact",
) -> None:
# Validate inputs
if not (isinstance(C_D, np.ndarray) and C_D.ndim in (1, 2)):
raise TypeError("Argument `C_D` must be a NumPy array of dimension 1 or 2.")
if not (isinstance(covariance, np.ndarray) and covariance.ndim in (1, 2)):
raise TypeError(
"Argument `covariance` must be a NumPy array of dimension 1 or 2."
)

if C_D.ndim == 2 and C_D.shape[0] != C_D.shape[1]:
raise ValueError("Argument `C_D` must be square if it's 2D.")
if covariance.ndim == 2 and covariance.shape[0] != covariance.shape[1]:
raise ValueError("Argument `covariance` must be square if it's 2D.")

if not (isinstance(observations, np.ndarray) and observations.ndim == 1):
raise TypeError("Argument `observations` must be a 1D NumPy array.")

if not observations.shape[0] == C_D.shape[0]:
raise ValueError("Shapes of `observations` and `C_D` must match.")
if not observations.shape[0] == covariance.shape[0]:
raise ValueError("Shapes of `observations` and `covariance` must match.")

if not (
(isinstance(alpha, np.ndarray) and alpha.ndim == 1)
Expand Down Expand Up @@ -139,14 +148,14 @@ def __init__(
# If it's a full matrix, we gain speedup by only computing cholesky once
# If it's a diagonal, we gain speedup by never having to compute cholesky

if isinstance(C_D, np.ndarray) and C_D.ndim == 2:
self.C_D_L = sp.linalg.cholesky(C_D, lower=False)
elif isinstance(C_D, np.ndarray) and C_D.ndim == 1:
self.C_D_L = np.sqrt(C_D)
if isinstance(covariance, np.ndarray) and covariance.ndim == 2:
self.C_D_L = sp.linalg.cholesky(covariance, lower=False)
elif isinstance(covariance, np.ndarray) and covariance.ndim == 1:
self.C_D_L = np.sqrt(covariance)
else:
raise TypeError("Argument `C_D` must be 1D or 2D array")
raise TypeError("Argument `covariance` must be 1D or 2D array")

self.C_D = C_D
self.C_D = covariance
assert isinstance(self.C_D, np.ndarray) and self.C_D.ndim in (1, 2)

def num_assimilations(self) -> int:
Expand All @@ -162,14 +171,16 @@ def assimilate(
) -> npt.NDArray[np.double]:
"""Assimilate data and return an updated ensemble X_posterior.

num_parameters, ensemble_size

Parameters
----------
X : np.ndarray
2D array of shape (num_inputs, num_ensemble_members).
2D array of shape (num_parameters, ensemble_size).
Y : np.ndarray
2D array of shape (num_ouputs, num_ensemble_members).
2D array of shape (num_parameters, ensemble_size).
ensemble_mask : np.ndarray
1D boolean array of length `num_ensemble_members`, describing which
1D boolean array of length `ensemble_size`, describing which
ensemble members are active. Inactive realizations are ignored.
Defaults to all active.
overwrite : bool
Expand Down
40 changes: 23 additions & 17 deletions tests/test_esmda.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,24 @@ def test_that_diagonal_covariance_gives_same_answer_as_dense(self, seed, inversi
Y_prior = rng.normal(size=(num_outputs, num_ensemble))

# Measurement errors
C_D = np.exp(rng.normal(size=num_outputs))
covariance = np.exp(rng.normal(size=num_outputs))

# Observations
observations = rng.normal(size=num_outputs, loc=1)

esmda = ESMDA(C_D, observations, alpha=alpha, seed=seed, inversion=inversion)
esmda = ESMDA(
covariance, observations, alpha=alpha, seed=seed, inversion=inversion
)
X_posterior1 = np.copy(X_prior)
for _ in range(esmda.num_assimilations()):
X_posterior1 = esmda.assimilate(X_posterior1, Y_prior)

esmda = ESMDA(
np.diag(C_D), observations, alpha=alpha, seed=seed, inversion=inversion
np.diag(covariance),
observations,
alpha=alpha,
seed=seed,
inversion=inversion,
)
X_posterior2 = np.copy(X_prior)
for _ in range(esmda.num_assimilations()):
Expand Down Expand Up @@ -88,18 +94,18 @@ def G(X):
X_prior = rng.normal(size=(num_inputs, num_ensemble))

# Measurement errors
C_D = np.eye(num_outputs)
covariance = np.eye(num_outputs)

# The true inputs and observations, a result of running with X_true = 1
X_true = np.ones(shape=(num_inputs,))
observations = G(X_true)

# Prepare ESMDA instance running with lower number of ensemble members
esmda_subset = ESMDA(C_D, observations, alpha=alpha, seed=seed)
esmda_subset = ESMDA(covariance, observations, alpha=alpha, seed=seed)
X_i_subset = np.copy(X_prior[:, ensemble_mask])

# Prepare ESMDA instance running with all ensemble members
esmda_masked = ESMDA(C_D, observations, alpha=alpha, seed=seed)
esmda_masked = ESMDA(covariance, observations, alpha=alpha, seed=seed)
X_i = np.copy(X_prior)

# Run both
Expand Down Expand Up @@ -133,20 +139,20 @@ def G(X):
X_prior = rng.normal(size=(num_inputs, num_ensemble))

# Measurement errors
C_D = np.eye(num_outputs)
covariance = np.eye(num_outputs)

# The true inputs and observations, a result of running with N(1, 1)
X_true = rng.normal(size=(num_inputs,)) + 1
observations = G(X_true)

# Create ESMDA instance from an integer `alpha` and run it
esmda_integer = ESMDA(C_D, observations, alpha=5, seed=seed)
esmda_integer = ESMDA(covariance, observations, alpha=5, seed=seed)
X_i_int = np.copy(X_prior)
for _ in range(esmda_integer.num_assimilations()):
X_i_int = esmda_integer.assimilate(X_i_int, G(X_i_int))

# Create another ESMDA instance from a vector `alpha` and run it
esmda_array = ESMDA(C_D, observations, alpha=np.ones(5), seed=seed)
esmda_array = ESMDA(covariance, observations, alpha=np.ones(5), seed=seed)
X_i_array = np.copy(X_prior)
for _ in range(esmda_array.num_assimilations()):
X_i_array = esmda_array.assimilate(X_i_array, G(X_i_array))
Expand Down Expand Up @@ -355,21 +361,21 @@ def setup(self):
Y_prior = rng.normal(size=(num_outputs, num_ensemble))

# Measurement errors
C_D = np.exp(rng.normal(size=num_outputs))
covariance = np.exp(rng.normal(size=num_outputs))

# Observations
observations = rng.normal(size=num_outputs, loc=1)

return X_prior, Y_prior, C_D, observations
return X_prior, Y_prior, covariance, observations

@pytest.mark.limit_memory("138 MB")
def test_ESMDA_memory_usage_subspace_inversion(self, setup):
# TODO: Currently this is a regression test. Work to improve memory usage.

X_prior, Y_prior, C_D, observations = setup
X_prior, Y_prior, covariance, observations = setup

# Create ESMDA instance from an integer `alpha` and run it
esmda = ESMDA(C_D, observations, alpha=1, seed=1, inversion="subspace")
esmda = ESMDA(covariance, observations, alpha=1, seed=1, inversion="subspace")

for _ in range(esmda.num_assimilations()):
esmda.assimilate(X_prior, Y_prior)
Expand All @@ -393,21 +399,21 @@ def test_that_float_dtypes_are_preserved(self, inversion, dtype, diagonal):
Y_prior = rng.normal(size=(num_outputs, num_ensemble))

# Measurement errors
C_D = np.exp(rng.normal(size=num_outputs))
covariance = np.exp(rng.normal(size=num_outputs))
if not diagonal:
C_D = np.diag(C_D)
covariance = np.diag(covariance)

# Observations
observations = rng.normal(size=num_outputs, loc=1)

# Convert types
X_prior = X_prior.astype(dtype)
Y_prior = Y_prior.astype(dtype)
C_D = C_D.astype(dtype)
covariance = covariance.astype(dtype)
observations = observations.astype(dtype)

# Create ESMDA instance from an integer `alpha` and run it
esmda = ESMDA(C_D, observations, alpha=1, seed=1, inversion=inversion)
esmda = ESMDA(covariance, observations, alpha=1, seed=1, inversion=inversion)

for _ in range(esmda.num_assimilations()):
X_posterior = esmda.assimilate(X_prior, Y_prior)
Expand Down