Allow cov_YY as arg to AdaptiveESMDA assimilate (#195)

* allow cov_YY as arg to assimilate * wrote test * docstring --------- Co-authored-by: Tommy Odland <tommy.odland>
equinor · Dec 8, 2023 · cf2c872 · cf2c872
1 parent de28e8d
commit cf2c872
Show file tree

Hide file tree

Showing 3 changed files with 79 additions and 3 deletions.
diff --git a/src/iterative_ensemble_smoother/esmda_inversion.py b/src/iterative_ensemble_smoother/esmda_inversion.py
@@ -7,7 +7,7 @@
 
 def empirical_covariance_upper(X: npt.NDArray[np.double]) -> npt.NDArray[np.double]:
     """Compute the upper triangular part of the empirical covariance matrix X
-    with shape (num_variables, num_observations).
+    with shape (parameters, ensemble_size).
 
     Examples
     --------

diff --git a/src/iterative_ensemble_smoother/experimental.py b/src/iterative_ensemble_smoother/experimental.py
@@ -176,6 +176,7 @@ def assimilate(
         D: npt.NDArray[np.double],
         alpha: float,
         correlation_threshold: Union[Callable[[int], float], float, None] = None,
+        cov_YY: Optional[npt.NDArray[np.double]] = None,
         verbose: bool = False,
     ) -> npt.NDArray[np.double]:
         """Assimilate data and return an updated ensemble X_posterior.
@@ -211,6 +212,10 @@ def assimilate(
             float in the range [0, 1]. Entries in the covariance matrix that
             are lower than the correlation threshold will be set to zero.
             If None, the default 3/sqrt(ensemble_size) is used.
+        cov_YY : np.ndarray or None
+            A 2D array of shape (num_observations, num_observations) with the
+            empirical covariance of Y. If passed, this is not computed in the
+            method call, potentially saving time and computation.
         verbose : bool
             Whether to print information.
 
@@ -265,7 +270,11 @@ def correlation_threshold(ensemble_size: int) -> float:
         significant_corr_XY = np.abs(corr_XY) > threshold
 
         # Pre-compute the covariance cov(Y, Y) here, and index on it later
-        cov_YY = empirical_cross_covariance(Y, Y)
+        if cov_YY is None:
+            cov_YY = empirical_cross_covariance(Y, Y)
+        else:
+            assert cov_YY.ndim == 2, "'cov_YY' must be a 2D array"
+            assert cov_YY.shape == (Y.shape[0], Y.shape[0])
 
         # TODO: memory could be saved by overwriting the input X
         X_out: npt.NDArray[np.double] = np.copy(X)

diff --git a/tests/test_experimental.py b/tests/test_experimental.py
@@ -6,7 +6,10 @@
 import pytest
 
 from iterative_ensemble_smoother import ESMDA
-from iterative_ensemble_smoother.esmda_inversion import normalize_alpha
+from iterative_ensemble_smoother.esmda_inversion import (
+    empirical_cross_covariance,
+    normalize_alpha,
+)
 from iterative_ensemble_smoother.experimental import (
     AdaptiveESMDA,
     ensemble_smoother_update_step_row_scaling,
@@ -385,6 +388,70 @@ def zero_correlation_threshold(ensemble_size):
 
         print("------------------------------------------")
 
+    @pytest.mark.parametrize(
+        "linear_problem",
+        range(25),
+        indirect=True,
+        ids=[f"seed-{i+1}" for i in range(25)],
+    )
+    def test_that_cov_YY_can_be_computed_outside_of_assimilate(
+        self,
+        linear_problem,
+    ):
+        """Cov(Y, Y) may be computed once in each assimilation round.
+        This saves time if the user wants to iterate over parameters groups,
+        since Cov(Y, Y) is independent of the parameters X, there is no reason
+        to compute it more than once.
+
+        Below we do not loop over parameter groups in X, so there is no speed
+        gain when passing the covariance cov(Y, Y). This test is just to check
+        that the result is the same regardless of whether the user passes
+        the covariance matrix or not.
+        """
+
+        # Create a problem with g(x) = A @ x
+        X, g, observations, covariance, rng = linear_problem
+        num_parameters, ensemble_size = X.shape
+
+        alpha = normalize_alpha(np.array([5, 4, 3, 2, 1]))  # Vector of inflation values
+        smoother = AdaptiveESMDA(
+            covariance=covariance, observations=observations, seed=1
+        )
+
+        X_i = np.copy(X)
+        for i, alpha_i in enumerate(alpha, 1):
+            print(f"ESMDA iteration {i} with alpha_i={alpha_i}")
+
+            # Run forward model
+            Y_i = g(X_i)
+
+            # Create noise D - common to this ESMDA update
+            D_i = smoother.perturb_observations(
+                ensemble_size=ensemble_size, alpha=alpha_i
+            )
+
+            # Update the parameters without using pre-computed cov_YY
+            X_i1 = smoother.assimilate(
+                X=X_i,
+                Y=Y_i,
+                D=D_i,
+                alpha=alpha_i,
+            )
+
+            # Update the parameters using pre-computed cov_YY
+            X_i2 = smoother.assimilate(
+                X=X_i,
+                Y=Y_i,
+                D=D_i,
+                alpha=alpha_i,
+                cov_YY=empirical_cross_covariance(Y_i, Y_i),
+            )
+
+            # Check that the update is the same, whether or not Cov_YY is passed
+            assert np.allclose(X_i1, X_i2)
+
+            X_i = X_i1
+
 
 class TestRowScaling:
     def test_that_row_scaling_updates_parameters(self):