Skip to content

Commit

Permalink
review comments. compute cov(Y, Y) once, then index on it
Browse files Browse the repository at this point in the history
  • Loading branch information
Tommy Odland committed Nov 27, 2023
1 parent f931e82 commit e20349b
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 16 deletions.
15 changes: 5 additions & 10 deletions src/iterative_ensemble_smoother/esmda.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,8 @@ def perturb_observations(
Parameters
----------
ensemble_size : int
The ensemble size, i.e., the number of perturbed observations.
This represents the number of columns in the returned matrix, which
is of shape (num_observations, ensemble_size).
The ensemble size, i.e., the number of columns in the returned array,
which is of shape (num_observations, ensemble_size).
alpha : float
The covariance inflation factor. The sequence of alphas should
obey the equation sum_i (1/alpha_i) = 1. However, this is NOT enforced
Expand All @@ -118,13 +117,10 @@ def perturb_observations(
# a zero cented normal means that y := L @ z, where z ~ norm(0, 1).
# Therefore, scaling C_D by alpha is equivalent to scaling L with sqrt(alpha).

# Two cases, depending on whether C_D was given as 1D or 2D array
D: npt.NDArray[np.double]
D = self.observations.reshape(-1, 1) + np.sqrt(alpha) * sample_mvnormal(
C_dd_cholesky=self.C_D_L, rng=self.rng, size=ensemble_size
)
D: npt.NDArray[np.double] = self.observations[:, None] + np.sqrt(
alpha
) * sample_mvnormal(C_dd_cholesky=self.C_D_L, rng=self.rng, size=ensemble_size)
assert D.shape == (len(self.observations), ensemble_size)

return D


Expand Down Expand Up @@ -185,7 +181,6 @@ def __init__(
) -> None:
"""Initialize the instance."""

# Initialize the super-class
super().__init__(covariance=covariance, observations=observations, seed=seed)

if not (
Expand Down
23 changes: 19 additions & 4 deletions src/iterative_ensemble_smoother/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Contains (publicly available, but not officially supported) experimental
features of iterative_ensemble_smoother
"""
from typing import List, Tuple, Union
from typing import List, Optional, Tuple, Union

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -82,6 +82,7 @@ def compute_cross_covariance_multiplier(
C_D: npt.NDArray[np.double],
D: npt.NDArray[np.double],
Y: npt.NDArray[np.double],
cov_YY: Optional[npt.NDArray[np.double]] = None,
) -> npt.NDArray[np.double]:
"""Compute transition matrix T such that:
Expand All @@ -107,9 +108,16 @@ def compute_cross_covariance_multiplier(
cov_XY, then we can apply the same T to a reduced number of rows (parameters)
in cov_XY.
"""
# Compute cov(Y, Y) if it was not passed to the function.
# Pre-computation might be faster, since covariance is commutative with
# respect to indexing, ie, cov(Y[mask, :], YY[mask, :]) = cov(Y, Y)[mask, mask]
if cov_YY is None:
C_DD = empirical_cross_covariance(Y, Y)
else:
C_DD = cov_YY

# TODO: This is re-computed in each call. Can we pre compute it?
C_DD = empirical_cross_covariance(Y, Y)
assert C_DD.shape[0] == C_DD.shape[1]
assert C_DD.shape[0] == Y.shape[0]

# Arguments for sp.linalg.solve
solver_kwargs = {
Expand Down Expand Up @@ -206,6 +214,9 @@ def assimilate(self, X, Y, D, alpha, correlation_threshold=None, verbose=False):
threshold = correlation_threshold(ensemble_size=X.shape[1])
significant_corr_XY = np.abs(corr_XY) > threshold

# Pre-compute the covariance cov(Y, Y) here, and index on it later
cov_YY = empirical_cross_covariance(Y, Y)

# TODO: memory could be saved by overwriting the input X
X_out = np.copy(X)
for (unique_row, indices_of_row) in groupby_indices(significant_corr_XY):
Expand All @@ -224,10 +235,13 @@ def assimilate(self, X, Y, D, alpha, correlation_threshold=None, verbose=False):
X_subset = X[indices_of_row, :]
Y_subset = Y[unique_row, :]

# Compute the update
# Compute the masked arrays for these variables
cov_XY_mask = np.ix_(indices_of_row, unique_row)
cov_XY_subset = cov_XY[cov_XY_mask]

cov_YY_mask = np.ix_(unique_row, unique_row)
cov_YY_subset = cov_YY[cov_YY_mask]

C_D_subset = self.C_D[unique_row]
D_subset = D[unique_row, :]

Expand All @@ -237,6 +251,7 @@ def assimilate(self, X, Y, D, alpha, correlation_threshold=None, verbose=False):
C_D=C_D_subset,
D=D_subset,
Y=Y_subset,
cov_YY=cov_YY_subset, # Passing cov(Y, Y) avoids re-computation
)
X_out[indices_of_row, :] = X_subset + cov_XY_subset @ T

Expand Down
5 changes: 3 additions & 2 deletions tests/test_esmda.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ def test_that_ESMDA_and_SIES_produce_same_result_with_one_step(
covariance, observations, alpha=alpha, seed=seed + 99, inversion="exact"
)
X_ESMDA = np.copy(X)
for _ in range(esmda.num_assimilations()):
X_ESMDA = esmda.assimilate(X_ESMDA, Y)

# Perform one iteration of ESMDA
X_ESMDA = esmda.assimilate(X_ESMDA, Y)

# Create SIES instance and perform one iteration
sies = SIES(
Expand Down

0 comments on commit e20349b

Please sign in to comment.