diff --git a/src/iterative_ensemble_smoother/esmda.py b/src/iterative_ensemble_smoother/esmda.py index 3d729e3b..ff2ec34d 100644 --- a/src/iterative_ensemble_smoother/esmda.py +++ b/src/iterative_ensemble_smoother/esmda.py @@ -98,9 +98,8 @@ def perturb_observations( Parameters ---------- ensemble_size : int - The ensemble size, i.e., the number of perturbed observations. - This represents the number of columns in the returned matrix, which - is of shape (num_observations, ensemble_size). + The ensemble size, i.e., the number of columns in the returned array, + which is of shape (num_observations, ensemble_size). alpha : float The covariance inflation factor. The sequence of alphas should obey the equation sum_i (1/alpha_i) = 1. However, this is NOT enforced @@ -118,13 +117,10 @@ def perturb_observations( # a zero cented normal means that y := L @ z, where z ~ norm(0, 1). # Therefore, scaling C_D by alpha is equivalent to scaling L with sqrt(alpha). - # Two cases, depending on whether C_D was given as 1D or 2D array - D: npt.NDArray[np.double] - D = self.observations.reshape(-1, 1) + np.sqrt(alpha) * sample_mvnormal( - C_dd_cholesky=self.C_D_L, rng=self.rng, size=ensemble_size - ) + D: npt.NDArray[np.double] = self.observations[:, None] + np.sqrt( + alpha + ) * sample_mvnormal(C_dd_cholesky=self.C_D_L, rng=self.rng, size=ensemble_size) assert D.shape == (len(self.observations), ensemble_size) - return D @@ -185,7 +181,6 @@ def __init__( ) -> None: """Initialize the instance.""" - # Initialize the super-class super().__init__(covariance=covariance, observations=observations, seed=seed) if not ( diff --git a/src/iterative_ensemble_smoother/experimental.py b/src/iterative_ensemble_smoother/experimental.py index 593224cb..13dc47b8 100644 --- a/src/iterative_ensemble_smoother/experimental.py +++ b/src/iterative_ensemble_smoother/experimental.py @@ -2,7 +2,7 @@ Contains (publicly available, but not officially supported) experimental features of iterative_ensemble_smoother """ -from typing import List, Tuple, Union +from typing import List, Optional, Tuple, Union import numpy as np import numpy.typing as npt @@ -82,6 +82,7 @@ def compute_cross_covariance_multiplier( C_D: npt.NDArray[np.double], D: npt.NDArray[np.double], Y: npt.NDArray[np.double], + cov_YY: Optional[npt.NDArray[np.double]] = None, ) -> npt.NDArray[np.double]: """Compute transition matrix T such that: @@ -107,9 +108,16 @@ def compute_cross_covariance_multiplier( cov_XY, then we can apply the same T to a reduced number of rows (parameters) in cov_XY. """ + # Compute cov(Y, Y) if it was not passed to the function. + # Pre-computation might be faster, since covariance is commutative with + # respect to indexing, ie, cov(Y[mask, :], YY[mask, :]) = cov(Y, Y)[mask, mask] + if cov_YY is None: + C_DD = empirical_cross_covariance(Y, Y) + else: + C_DD = cov_YY - # TODO: This is re-computed in each call. Can we pre compute it? - C_DD = empirical_cross_covariance(Y, Y) + assert C_DD.shape[0] == C_DD.shape[1] + assert C_DD.shape[0] == Y.shape[0] # Arguments for sp.linalg.solve solver_kwargs = { @@ -206,6 +214,9 @@ def assimilate(self, X, Y, D, alpha, correlation_threshold=None, verbose=False): threshold = correlation_threshold(ensemble_size=X.shape[1]) significant_corr_XY = np.abs(corr_XY) > threshold + # Pre-compute the covariance cov(Y, Y) here, and index on it later + cov_YY = empirical_cross_covariance(Y, Y) + # TODO: memory could be saved by overwriting the input X X_out = np.copy(X) for (unique_row, indices_of_row) in groupby_indices(significant_corr_XY): @@ -224,10 +235,13 @@ def assimilate(self, X, Y, D, alpha, correlation_threshold=None, verbose=False): X_subset = X[indices_of_row, :] Y_subset = Y[unique_row, :] - # Compute the update + # Compute the masked arrays for these variables cov_XY_mask = np.ix_(indices_of_row, unique_row) cov_XY_subset = cov_XY[cov_XY_mask] + cov_YY_mask = np.ix_(unique_row, unique_row) + cov_YY_subset = cov_YY[cov_YY_mask] + C_D_subset = self.C_D[unique_row] D_subset = D[unique_row, :] @@ -237,6 +251,7 @@ def assimilate(self, X, Y, D, alpha, correlation_threshold=None, verbose=False): C_D=C_D_subset, D=D_subset, Y=Y_subset, + cov_YY=cov_YY_subset, # Passing cov(Y, Y) avoids re-computation ) X_out[indices_of_row, :] = X_subset + cov_XY_subset @ T diff --git a/tests/test_esmda.py b/tests/test_esmda.py index 6a47aee0..2f404ef4 100644 --- a/tests/test_esmda.py +++ b/tests/test_esmda.py @@ -57,8 +57,9 @@ def test_that_ESMDA_and_SIES_produce_same_result_with_one_step( covariance, observations, alpha=alpha, seed=seed + 99, inversion="exact" ) X_ESMDA = np.copy(X) - for _ in range(esmda.num_assimilations()): - X_ESMDA = esmda.assimilate(X_ESMDA, Y) + + # Perform one iteration of ESMDA + X_ESMDA = esmda.assimilate(X_ESMDA, Y) # Create SIES instance and perform one iteration sies = SIES(