diff --git a/docs/source/Adaptive.py b/docs/source/Adaptive.py index dc08ccb6..635ffb09 100644 --- a/docs/source/Adaptive.py +++ b/docs/source/Adaptive.py @@ -108,6 +108,7 @@ # [Kullback–Leibler divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence), # but we do not pursue this here. + # %% def get_prior(num_parameters, num_ensemble, prior_std): """Sample prior from N(0, prior_std).""" @@ -289,6 +290,7 @@ def g(X): # - The draw of prior realizations $X \sim N(0, \sigma)$. # - The noise (given by the `covariance` argument) that ESMDA adds to the observations. + # %% def corr_true(array): return sp.stats.pearsonr(x_true, array).statistic diff --git a/docs/source/Polynomial.py b/docs/source/Polynomial.py index 90dfcd21..f6a00922 100644 --- a/docs/source/Polynomial.py +++ b/docs/source/Polynomial.py @@ -159,7 +159,6 @@ def poly(a, b, c, x): ) n_ies_iter = 7 for i in range(n_ies_iter): - step_length = steplength_exponential(i + 1) X_IES_ert = smoother_ies.sies_iteration(Y_IES_ert, step_length=step_length) diff --git a/src/iterative_ensemble_smoother/experimental.py b/src/iterative_ensemble_smoother/experimental.py index cf287845..632c23af 100644 --- a/src/iterative_ensemble_smoother/experimental.py +++ b/src/iterative_ensemble_smoother/experimental.py @@ -139,34 +139,46 @@ def compute_cross_covariance_multiplier( return sp.linalg.solve(C_DD, D - Y, **solver_kwargs) # type: ignore - def _correlation_matrix( + def _cov_to_corr_inplace( self, cov_XY: npt.NDArray[np.double], X: npt.NDArray[np.double], Y: npt.NDArray[np.double], - ) -> npt.NDArray[np.double]: - """Compute a correlation matrix given a covariance matrix.""" + ) -> None: + """Convert a covariance matrix to a correlation matrix in-place.""" assert cov_XY.shape == (X.shape[0], Y.shape[0]) stds_Y = np.std(Y, axis=1, ddof=1) stds_X = np.std(X, axis=1, ddof=1) - # Compute the correlation matrix from the covariance matrix - corr_XY: npt.NDArray[np.double] = (cov_XY / stds_X[:, np.newaxis]) / stds_Y[ - np.newaxis, : - ] + # Divide each element of cov_XY by the corresponding standard deviations + cov_XY /= stds_X[:, np.newaxis] + cov_XY /= stds_Y[np.newaxis, :] - # Perform checks. There appears to be occasional numerical issues in - # the equation. With 2 ensemble members, we get e.g. a max value of - # 1.0000000000016778. We allow some leeway and clip the results. + # Perform checks and clip values to [-1, 1] eps = 1e-8 - if not ((corr_XY.max() <= 1 + eps) and (corr_XY.min() >= -1 - eps)): + if not ((cov_XY.max() <= 1 + eps) and (cov_XY.min() >= -1 - eps)): msg = "Cross-correlation matrix has entries not in [-1, 1]." - msg += f"The min and max values are: {corr_XY.min()} and {corr_XY.max()}" + msg += f"The min and max values are: {cov_XY.min()} and {cov_XY.max()}" warnings.warn(msg) - corr_XY = np.clip(corr_XY, a_min=-1, a_max=1) - return corr_XY + np.clip(cov_XY, a_min=-1, a_max=1, out=cov_XY) + + def _corr_to_cov_inplace( + self, + corr_XY: npt.NDArray[np.double], + X: npt.NDArray[np.double], + Y: npt.NDArray[np.double], + ) -> None: + """Convert a correlation matrix to a covariance matrix in-place.""" + assert corr_XY.shape == (X.shape[0], Y.shape[0]) + + stds_Y = np.std(Y, axis=1, ddof=1) + stds_X = np.std(X, axis=1, ddof=1) + + # Multiply each element of corr_XY by the corresponding standard deviations + corr_XY *= stds_X[:, np.newaxis] + corr_XY *= stds_Y[np.newaxis, :] def assimilate( self, @@ -261,13 +273,14 @@ def correlation_threshold(ensemble_size: int) -> float: # X by parameter group (rows), the storage requirement decreases cov_XY = empirical_cross_covariance(X, Y) assert cov_XY.shape == (X.shape[0], Y.shape[0]) - corr_XY = self._correlation_matrix(cov_XY, X, Y) - assert corr_XY.shape == cov_XY.shape + self._cov_to_corr_inplace(cov_XY, X, Y) # Determine which elements in the cross covariance matrix that will # be set to zero threshold = correlation_threshold(X.shape[1]) - significant_corr_XY = np.abs(corr_XY) > threshold + significant_corr_XY = np.abs(cov_XY) > threshold + + self._corr_to_cov_inplace(cov_XY, X, Y) # Pre-compute the covariance cov(Y, Y) here, and index on it later if cov_YY is None: @@ -278,8 +291,7 @@ def correlation_threshold(ensemble_size: int) -> float: # TODO: memory could be saved by overwriting the input X X_out: npt.NDArray[np.double] = np.copy(X) - for (unique_row, indices_of_row) in groupby_indices(significant_corr_XY): - + for unique_row, indices_of_row in groupby_indices(significant_corr_XY): if verbose: print( f" Assimilating {len(indices_of_row)} parameters" diff --git a/tests/test_experimental.py b/tests/test_experimental.py index 1c532a5c..c9940ed0 100644 --- a/tests/test_experimental.py +++ b/tests/test_experimental.py @@ -19,7 +19,6 @@ @pytest.mark.parametrize("seed", range(25)) def test_groupby_indices(seed): - rng = np.random.default_rng(seed) rows = rng.integers(10, 100) columns = rng.integers(2, 9) @@ -39,8 +38,7 @@ def test_groupby_indices(seed): assert intersection_idx == set() # Verify each entry in the groups - for (unique_row, indices_of_row) in groups: - + for unique_row, indices_of_row in groups: # Repeat this unique row the number of times it occurs in X repeated = np.repeat( unique_row[np.newaxis, :], repeats=len(indices_of_row), axis=0 @@ -50,7 +48,6 @@ def test_groupby_indices(seed): @pytest.fixture() def linear_problem(request): - # Seed the problem using indirect parametrization: # https://docs.pytest.org/en/latest/example/parametrize.html#indirect-parametrization rng = np.random.default_rng(request.param) @@ -86,7 +83,6 @@ class TestAdaptiveESMDA: def test_that_adaptive_localization_with_cutoff_1_equals_ensemble_prior( self, linear_problem ): - # Create a problem with g(x) = A @ x X, g, observations, covariance, _ = linear_problem @@ -98,7 +94,6 @@ def test_that_adaptive_localization_with_cutoff_1_equals_ensemble_prior( X_i = np.copy(X) alpha = normalize_alpha(np.ones(5)) for alpha_i in alpha: - # Run forward model Y_i = g(X_i) @@ -127,7 +122,6 @@ def test_that_adaptive_localization_with_cutoff_1_equals_ensemble_prior( def test_that_adaptive_localization_with_cutoff_0_equals_standard_ESMDA_update( self, linear_problem ): - # Create a problem with g(x) = A @ x X, g, observations, covariance, rng = linear_problem @@ -142,7 +136,6 @@ def test_that_adaptive_localization_with_cutoff_0_equals_standard_ESMDA_update( X_i = np.copy(X) for i, alpha_i in enumerate(alpha, 1): - # Run forward model Y_i = g(X_i) @@ -209,7 +202,6 @@ def test_that_posterior_generalized_variance_increases_in_cutoff( X_i = np.copy(X) for i, alpha_i in enumerate(alpha, 1): - # Run forward model Y_i = g(X_i) @@ -566,7 +558,6 @@ def multiply(self, X, K): if __name__ == "__main__": - pytest.main( args=[ __file__,