Skip to content

Commit

Permalink
Use in-place operations for cross-correlations
Browse files Browse the repository at this point in the history
  • Loading branch information
dafeda committed Jan 11, 2024
1 parent 5e96924 commit 14def30
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 30 deletions.
2 changes: 2 additions & 0 deletions docs/source/Adaptive.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
# [Kullback–Leibler divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence),
# but we do not pursue this here.


# %%
def get_prior(num_parameters, num_ensemble, prior_std):
"""Sample prior from N(0, prior_std)."""
Expand Down Expand Up @@ -289,6 +290,7 @@ def g(X):
# - The draw of prior realizations $X \sim N(0, \sigma)$.
# - The noise (given by the `covariance` argument) that ESMDA adds to the observations.


# %%
def corr_true(array):
return sp.stats.pearsonr(x_true, array).statistic
Expand Down
1 change: 0 additions & 1 deletion docs/source/Polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ def poly(a, b, c, x):
)
n_ies_iter = 7
for i in range(n_ies_iter):

step_length = steplength_exponential(i + 1)
X_IES_ert = smoother_ies.sies_iteration(Y_IES_ert, step_length=step_length)

Expand Down
50 changes: 31 additions & 19 deletions src/iterative_ensemble_smoother/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,34 +139,46 @@ def compute_cross_covariance_multiplier(

return sp.linalg.solve(C_DD, D - Y, **solver_kwargs) # type: ignore

def _correlation_matrix(
def _cov_to_corr_inplace(
self,
cov_XY: npt.NDArray[np.double],
X: npt.NDArray[np.double],
Y: npt.NDArray[np.double],
) -> npt.NDArray[np.double]:
"""Compute a correlation matrix given a covariance matrix."""
) -> None:
"""Convert a covariance matrix to a correlation matrix in-place."""
assert cov_XY.shape == (X.shape[0], Y.shape[0])

stds_Y = np.std(Y, axis=1, ddof=1)
stds_X = np.std(X, axis=1, ddof=1)

# Compute the correlation matrix from the covariance matrix
corr_XY: npt.NDArray[np.double] = (cov_XY / stds_X[:, np.newaxis]) / stds_Y[
np.newaxis, :
]
# Divide each element of cov_XY by the corresponding standard deviations
cov_XY /= stds_X[:, np.newaxis]
cov_XY /= stds_Y[np.newaxis, :]

# Perform checks. There appears to be occasional numerical issues in
# the equation. With 2 ensemble members, we get e.g. a max value of
# 1.0000000000016778. We allow some leeway and clip the results.
# Perform checks and clip values to [-1, 1]
eps = 1e-8
if not ((corr_XY.max() <= 1 + eps) and (corr_XY.min() >= -1 - eps)):
if not ((cov_XY.max() <= 1 + eps) and (cov_XY.min() >= -1 - eps)):
msg = "Cross-correlation matrix has entries not in [-1, 1]."
msg += f"The min and max values are: {corr_XY.min()} and {corr_XY.max()}"
msg += f"The min and max values are: {cov_XY.min()} and {cov_XY.max()}"
warnings.warn(msg)

corr_XY = np.clip(corr_XY, a_min=-1, a_max=1)
return corr_XY
np.clip(cov_XY, a_min=-1, a_max=1, out=cov_XY)

def _corr_to_cov_inplace(
self,
corr_XY: npt.NDArray[np.double],
X: npt.NDArray[np.double],
Y: npt.NDArray[np.double],
) -> None:
"""Convert a correlation matrix to a covariance matrix in-place."""
assert corr_XY.shape == (X.shape[0], Y.shape[0])

stds_Y = np.std(Y, axis=1, ddof=1)
stds_X = np.std(X, axis=1, ddof=1)

# Multiply each element of corr_XY by the corresponding standard deviations
corr_XY *= stds_X[:, np.newaxis]
corr_XY *= stds_Y[np.newaxis, :]

def assimilate(
self,
Expand Down Expand Up @@ -261,13 +273,14 @@ def correlation_threshold(ensemble_size: int) -> float:
# X by parameter group (rows), the storage requirement decreases
cov_XY = empirical_cross_covariance(X, Y)
assert cov_XY.shape == (X.shape[0], Y.shape[0])
corr_XY = self._correlation_matrix(cov_XY, X, Y)
assert corr_XY.shape == cov_XY.shape
self._cov_to_corr_inplace(cov_XY, X, Y)

# Determine which elements in the cross covariance matrix that will
# be set to zero
threshold = correlation_threshold(X.shape[1])
significant_corr_XY = np.abs(corr_XY) > threshold
significant_corr_XY = np.abs(cov_XY) > threshold

self._corr_to_cov_inplace(cov_XY, X, Y)

# Pre-compute the covariance cov(Y, Y) here, and index on it later
if cov_YY is None:
Expand All @@ -278,8 +291,7 @@ def correlation_threshold(ensemble_size: int) -> float:

# TODO: memory could be saved by overwriting the input X
X_out: npt.NDArray[np.double] = np.copy(X)
for (unique_row, indices_of_row) in groupby_indices(significant_corr_XY):

for unique_row, indices_of_row in groupby_indices(significant_corr_XY):
if verbose:
print(
f" Assimilating {len(indices_of_row)} parameters"
Expand Down
11 changes: 1 addition & 10 deletions tests/test_experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

@pytest.mark.parametrize("seed", range(25))
def test_groupby_indices(seed):

rng = np.random.default_rng(seed)
rows = rng.integers(10, 100)
columns = rng.integers(2, 9)
Expand All @@ -39,8 +38,7 @@ def test_groupby_indices(seed):
assert intersection_idx == set()

# Verify each entry in the groups
for (unique_row, indices_of_row) in groups:

for unique_row, indices_of_row in groups:
# Repeat this unique row the number of times it occurs in X
repeated = np.repeat(
unique_row[np.newaxis, :], repeats=len(indices_of_row), axis=0
Expand All @@ -50,7 +48,6 @@ def test_groupby_indices(seed):

@pytest.fixture()
def linear_problem(request):

# Seed the problem using indirect parametrization:
# https://docs.pytest.org/en/latest/example/parametrize.html#indirect-parametrization
rng = np.random.default_rng(request.param)
Expand Down Expand Up @@ -86,7 +83,6 @@ class TestAdaptiveESMDA:
def test_that_adaptive_localization_with_cutoff_1_equals_ensemble_prior(
self, linear_problem
):

# Create a problem with g(x) = A @ x
X, g, observations, covariance, _ = linear_problem

Expand All @@ -98,7 +94,6 @@ def test_that_adaptive_localization_with_cutoff_1_equals_ensemble_prior(
X_i = np.copy(X)
alpha = normalize_alpha(np.ones(5))
for alpha_i in alpha:

# Run forward model
Y_i = g(X_i)

Expand Down Expand Up @@ -127,7 +122,6 @@ def test_that_adaptive_localization_with_cutoff_1_equals_ensemble_prior(
def test_that_adaptive_localization_with_cutoff_0_equals_standard_ESMDA_update(
self, linear_problem
):

# Create a problem with g(x) = A @ x
X, g, observations, covariance, rng = linear_problem

Expand All @@ -142,7 +136,6 @@ def test_that_adaptive_localization_with_cutoff_0_equals_standard_ESMDA_update(

X_i = np.copy(X)
for i, alpha_i in enumerate(alpha, 1):

# Run forward model
Y_i = g(X_i)

Expand Down Expand Up @@ -209,7 +202,6 @@ def test_that_posterior_generalized_variance_increases_in_cutoff(

X_i = np.copy(X)
for i, alpha_i in enumerate(alpha, 1):

# Run forward model
Y_i = g(X_i)

Expand Down Expand Up @@ -566,7 +558,6 @@ def multiply(self, X, K):


if __name__ == "__main__":

pytest.main(
args=[
__file__,
Expand Down

0 comments on commit 14def30

Please sign in to comment.