Remove grouping trick

It seems to only be faster when the Gaussian Field is very smooth, which is not realistic for real fields.
equinor · Jan 3, 2024 · 9d672d3 · 9d672d3
1 parent 5e96924
commit 9d672d3
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 49 deletions.
diff --git a/src/iterative_ensemble_smoother/experimental.py b/src/iterative_ensemble_smoother/experimental.py
@@ -276,30 +276,21 @@ def correlation_threshold(ensemble_size: int) -> float:
             assert cov_YY.ndim == 2, "'cov_YY' must be a 2D array"
             assert cov_YY.shape == (Y.shape[0], Y.shape[0])
 
+        # Identify rows with at least one significant correlation.
+        significant_rows = np.any(significant_corr_XY, axis=1)
+
         # TODO: memory could be saved by overwriting the input X
         X_out: npt.NDArray[np.double] = np.copy(X)
-        for (unique_row, indices_of_row) in groupby_indices(significant_corr_XY):
-
-            if verbose:
-                print(
-                    f"    Assimilating {len(indices_of_row)} parameters"
-                    + " with identical correlation thresholds to responses."
-                )
-                print(
-                    "    The parameters are significant wrt "
-                    + f"{np.sum(unique_row)} / {len(unique_row)} responses."
-                )
-
-            # These parameters are not significantly correlated to any responses
-            if np.all(~unique_row):
-                continue
+        # Loop only over rows with significant correlations
+        for indices_of_row in np.where(significant_rows)[0]:
+            unique_row = significant_corr_XY[indices_of_row]
 
             # Get the parameters (X) that have identical significant responses (Y)
             X_subset = X[indices_of_row, :]
             Y_subset = Y[unique_row, :]
 
             # Compute the masked arrays for these variables
-            cov_XY_mask = np.ix_(indices_of_row, unique_row)
+            cov_XY_mask = np.ix_([indices_of_row], unique_row)
             cov_XY_subset = cov_XY[cov_XY_mask]
 
             cov_YY_mask = np.ix_(unique_row, unique_row)

diff --git a/tests/test_experimental.py b/tests/test_experimental.py
@@ -1,4 +1,3 @@
-import functools
 import time
 from copy import deepcopy
 
@@ -13,41 +12,9 @@
 from iterative_ensemble_smoother.experimental import (
     AdaptiveESMDA,
     ensemble_smoother_update_step_row_scaling,
-    groupby_indices,
 )
 
 
-@pytest.mark.parametrize("seed", range(25))
-def test_groupby_indices(seed):
-
-    rng = np.random.default_rng(seed)
-    rows = rng.integers(10, 100)
-    columns = rng.integers(2, 9)
-
-    # Create data matrix
-    X = rng.integers(0, 10, size=(rows, columns))
-
-    groups = list(groupby_indices(X))
-    indices = [set(idx) for (_, idx) in groups]
-
-    # Verify that every row is included
-    union_idx = functools.reduce(set.union, indices)
-    assert union_idx == set(range(rows))
-
-    # Verify that no duplicate rows occur
-    intersection_idx = functools.reduce(set.intersection, indices)
-    assert intersection_idx == set()
-
-    # Verify each entry in the groups
-    for (unique_row, indices_of_row) in groups:
-
-        # Repeat this unique row the number of times it occurs in X
-        repeated = np.repeat(
-            unique_row[np.newaxis, :], repeats=len(indices_of_row), axis=0
-        )
-        assert np.allclose(X[indices_of_row, :], repeated)
-
-
 @pytest.fixture()
 def linear_problem(request):