Skip to content

Commit

Permalink
Remove grouping trick
Browse files Browse the repository at this point in the history
It seems to only be faster when the Gaussian Field is very smooth,
which is not realistic for real fields.
  • Loading branch information
dafeda committed Jan 3, 2024
1 parent 5e96924 commit 9d672d3
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 49 deletions.
23 changes: 7 additions & 16 deletions src/iterative_ensemble_smoother/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,30 +276,21 @@ def correlation_threshold(ensemble_size: int) -> float:
assert cov_YY.ndim == 2, "'cov_YY' must be a 2D array"
assert cov_YY.shape == (Y.shape[0], Y.shape[0])

# Identify rows with at least one significant correlation.
significant_rows = np.any(significant_corr_XY, axis=1)

# TODO: memory could be saved by overwriting the input X
X_out: npt.NDArray[np.double] = np.copy(X)
for (unique_row, indices_of_row) in groupby_indices(significant_corr_XY):

if verbose:
print(
f" Assimilating {len(indices_of_row)} parameters"
+ " with identical correlation thresholds to responses."
)
print(
" The parameters are significant wrt "
+ f"{np.sum(unique_row)} / {len(unique_row)} responses."
)

# These parameters are not significantly correlated to any responses
if np.all(~unique_row):
continue
# Loop only over rows with significant correlations
for indices_of_row in np.where(significant_rows)[0]:
unique_row = significant_corr_XY[indices_of_row]

# Get the parameters (X) that have identical significant responses (Y)
X_subset = X[indices_of_row, :]
Y_subset = Y[unique_row, :]

# Compute the masked arrays for these variables
cov_XY_mask = np.ix_(indices_of_row, unique_row)
cov_XY_mask = np.ix_([indices_of_row], unique_row)
cov_XY_subset = cov_XY[cov_XY_mask]

cov_YY_mask = np.ix_(unique_row, unique_row)
Expand Down
33 changes: 0 additions & 33 deletions tests/test_experimental.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools
import time
from copy import deepcopy

Expand All @@ -13,41 +12,9 @@
from iterative_ensemble_smoother.experimental import (
AdaptiveESMDA,
ensemble_smoother_update_step_row_scaling,
groupby_indices,
)


@pytest.mark.parametrize("seed", range(25))
def test_groupby_indices(seed):

rng = np.random.default_rng(seed)
rows = rng.integers(10, 100)
columns = rng.integers(2, 9)

# Create data matrix
X = rng.integers(0, 10, size=(rows, columns))

groups = list(groupby_indices(X))
indices = [set(idx) for (_, idx) in groups]

# Verify that every row is included
union_idx = functools.reduce(set.union, indices)
assert union_idx == set(range(rows))

# Verify that no duplicate rows occur
intersection_idx = functools.reduce(set.intersection, indices)
assert intersection_idx == set()

# Verify each entry in the groups
for (unique_row, indices_of_row) in groups:

# Repeat this unique row the number of times it occurs in X
repeated = np.repeat(
unique_row[np.newaxis, :], repeats=len(indices_of_row), axis=0
)
assert np.allclose(X[indices_of_row, :], repeated)


@pytest.fixture()
def linear_problem(request):

Expand Down

0 comments on commit 9d672d3

Please sign in to comment.