From cf2c87270d31cbc1539e9d9cfb64f3167333233c Mon Sep 17 00:00:00 2001 From: Tommy <10076072+tommyod@users.noreply.github.com> Date: Fri, 8 Dec 2023 12:33:15 +0100 Subject: [PATCH] Allow cov_YY as arg to AdaptiveESMDA assimilate (#195) * allow cov_YY as arg to assimilate * wrote test * docstring --------- Co-authored-by: Tommy Odland --- .../esmda_inversion.py | 2 +- .../experimental.py | 11 ++- tests/test_experimental.py | 69 ++++++++++++++++++- 3 files changed, 79 insertions(+), 3 deletions(-) diff --git a/src/iterative_ensemble_smoother/esmda_inversion.py b/src/iterative_ensemble_smoother/esmda_inversion.py index 5e5d503b..2732fbaa 100644 --- a/src/iterative_ensemble_smoother/esmda_inversion.py +++ b/src/iterative_ensemble_smoother/esmda_inversion.py @@ -7,7 +7,7 @@ def empirical_covariance_upper(X: npt.NDArray[np.double]) -> npt.NDArray[np.double]: """Compute the upper triangular part of the empirical covariance matrix X - with shape (num_variables, num_observations). + with shape (parameters, ensemble_size). Examples -------- diff --git a/src/iterative_ensemble_smoother/experimental.py b/src/iterative_ensemble_smoother/experimental.py index b4ca5dfe..cf287845 100644 --- a/src/iterative_ensemble_smoother/experimental.py +++ b/src/iterative_ensemble_smoother/experimental.py @@ -176,6 +176,7 @@ def assimilate( D: npt.NDArray[np.double], alpha: float, correlation_threshold: Union[Callable[[int], float], float, None] = None, + cov_YY: Optional[npt.NDArray[np.double]] = None, verbose: bool = False, ) -> npt.NDArray[np.double]: """Assimilate data and return an updated ensemble X_posterior. @@ -211,6 +212,10 @@ def assimilate( float in the range [0, 1]. Entries in the covariance matrix that are lower than the correlation threshold will be set to zero. If None, the default 3/sqrt(ensemble_size) is used. + cov_YY : np.ndarray or None + A 2D array of shape (num_observations, num_observations) with the + empirical covariance of Y. If passed, this is not computed in the + method call, potentially saving time and computation. verbose : bool Whether to print information. @@ -265,7 +270,11 @@ def correlation_threshold(ensemble_size: int) -> float: significant_corr_XY = np.abs(corr_XY) > threshold # Pre-compute the covariance cov(Y, Y) here, and index on it later - cov_YY = empirical_cross_covariance(Y, Y) + if cov_YY is None: + cov_YY = empirical_cross_covariance(Y, Y) + else: + assert cov_YY.ndim == 2, "'cov_YY' must be a 2D array" + assert cov_YY.shape == (Y.shape[0], Y.shape[0]) # TODO: memory could be saved by overwriting the input X X_out: npt.NDArray[np.double] = np.copy(X) diff --git a/tests/test_experimental.py b/tests/test_experimental.py index d4d5b83c..1c532a5c 100644 --- a/tests/test_experimental.py +++ b/tests/test_experimental.py @@ -6,7 +6,10 @@ import pytest from iterative_ensemble_smoother import ESMDA -from iterative_ensemble_smoother.esmda_inversion import normalize_alpha +from iterative_ensemble_smoother.esmda_inversion import ( + empirical_cross_covariance, + normalize_alpha, +) from iterative_ensemble_smoother.experimental import ( AdaptiveESMDA, ensemble_smoother_update_step_row_scaling, @@ -385,6 +388,70 @@ def zero_correlation_threshold(ensemble_size): print("------------------------------------------") + @pytest.mark.parametrize( + "linear_problem", + range(25), + indirect=True, + ids=[f"seed-{i+1}" for i in range(25)], + ) + def test_that_cov_YY_can_be_computed_outside_of_assimilate( + self, + linear_problem, + ): + """Cov(Y, Y) may be computed once in each assimilation round. + This saves time if the user wants to iterate over parameters groups, + since Cov(Y, Y) is independent of the parameters X, there is no reason + to compute it more than once. + + Below we do not loop over parameter groups in X, so there is no speed + gain when passing the covariance cov(Y, Y). This test is just to check + that the result is the same regardless of whether the user passes + the covariance matrix or not. + """ + + # Create a problem with g(x) = A @ x + X, g, observations, covariance, rng = linear_problem + num_parameters, ensemble_size = X.shape + + alpha = normalize_alpha(np.array([5, 4, 3, 2, 1])) # Vector of inflation values + smoother = AdaptiveESMDA( + covariance=covariance, observations=observations, seed=1 + ) + + X_i = np.copy(X) + for i, alpha_i in enumerate(alpha, 1): + print(f"ESMDA iteration {i} with alpha_i={alpha_i}") + + # Run forward model + Y_i = g(X_i) + + # Create noise D - common to this ESMDA update + D_i = smoother.perturb_observations( + ensemble_size=ensemble_size, alpha=alpha_i + ) + + # Update the parameters without using pre-computed cov_YY + X_i1 = smoother.assimilate( + X=X_i, + Y=Y_i, + D=D_i, + alpha=alpha_i, + ) + + # Update the parameters using pre-computed cov_YY + X_i2 = smoother.assimilate( + X=X_i, + Y=Y_i, + D=D_i, + alpha=alpha_i, + cov_YY=empirical_cross_covariance(Y_i, Y_i), + ) + + # Check that the update is the same, whether or not Cov_YY is passed + assert np.allclose(X_i1, X_i2) + + X_i = X_i1 + class TestRowScaling: def test_that_row_scaling_updates_parameters(self):