From 31e9b5479f1790e0f67a85b86672576b7cadf634 Mon Sep 17 00:00:00 2001
From: Feda Curic <feda.curic@gmail.com>
Date: Thu, 23 Nov 2023 11:39:25 +0100
Subject: [PATCH] Use T for transition matrix instead of K (#181)

K usually refers to Kalman gain
---
 src/iterative_ensemble_smoother/esmda.py      | 26 ++++++++---------
 .../esmda_inversion.py                        | 28 +++++++++----------
 tests/test_esmda_inversion.py                 |  8 +++---
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/iterative_ensemble_smoother/esmda.py b/src/iterative_ensemble_smoother/esmda.py
index 5357f0a8..70ca4f39 100644
--- a/src/iterative_ensemble_smoother/esmda.py
+++ b/src/iterative_ensemble_smoother/esmda.py
@@ -180,7 +180,7 @@ def assimilate(
             corresponds to a parameter in the model, and each column corresponds
             to an ensemble member (realization).
         Y : np.ndarray
-            2D array of shape (num_parameters, ensemble_size), containing
+            2D array of shape (num_observations, ensemble_size), containing
             responses when evaluating the model at X. In other words, Y = g(X),
             where g is the forward model.
         overwrite : bool
@@ -195,7 +195,7 @@ def assimilate(
         Returns
         -------
         X_posterior : np.ndarray
-            2D array of shape (num_inputs, num_ensemble_members).
+            2D array of shape (num_parameters, num_ensemble_members).
 
         """
         if self.iteration >= self.num_assimilations():
@@ -208,9 +208,9 @@ def assimilate(
             num_ensemble == num_emsemble2
         ), "Number of ensemble members in X and Y must match"
         if not np.issubdtype(X.dtype, np.floating):
-            raise TypeError("Argument `X` must be contain floats")
+            raise TypeError("Argument `X` must contain floats")
         if not np.issubdtype(Y.dtype, np.floating):
-            raise TypeError("Argument `Y` must be contain floats")
+            raise TypeError("Argument `Y` must contain floats")
 
         assert 0 < truncation <= 1.0
 
@@ -254,7 +254,7 @@ def compute_transition_matrix(
         alpha: float,
         truncation: float = 1.0,
     ) -> npt.NDArray[np.double]:
-        """Return a matrix K such that X_posterior = X_prior + X_prior @ K.
+        """Return a matrix T such that X_posterior = X_prior + X_prior @ T.
 
         The purpose of this method is to facilitate row-by-row, or batch-by-batch,
         updates of X. This is useful if X is too large to fit in memory.
@@ -262,7 +262,7 @@ def compute_transition_matrix(
         Parameters
         ----------
         Y : np.ndarray
-            2D array of shape (num_parameters, ensemble_size), containing
+            2D array of shape (num_observations, ensemble_size), containing
             responses when evaluating the model at X. In other words, Y = g(X),
             where g is the forward model.
         alpha : float
@@ -276,19 +276,19 @@ def compute_transition_matrix(
 
         Returns
         -------
-        K : np.ndarray
-            A matrix K such that X_posterior = X_prior + X_prior @ K.
+        T : np.ndarray
+            A matrix T such that X_posterior = X_prior + X_prior @ T.
             It has shape (num_ensemble_members, num_ensemble_members).
         """
 
         # Recall the update equation:
         # X += C_MD @ (C_DD + alpha * C_D)^(-1)  @ (D - Y)
         # X += X @ center(Y).T / (N-1) @ (C_DD + alpha * C_D)^(-1) @ (D - Y)
-        # We form K := center(Y).T / (N-1) @ (C_DD + alpha * C_D)^(-1) @ (D - Y),
+        # We form T := center(Y).T / (N-1) @ (C_DD + alpha * C_D)^(-1) @ (D - Y),
         # so that
-        # X_new = X_old + X_old @ K
+        # X_new = X_old + X_old @ T
         # or
-        # X += X @ K
+        # X += X @ T
 
         D = self.perturb_observations(size=Y.shape)
         inversion_func = self._inversion_methods[self.inversion]
@@ -297,9 +297,9 @@ def compute_transition_matrix(
             C_D=self.C_D,
             D=D,
             Y=Y,
-            X=None,  # We don't need X to compute the factor K
+            X=None,  # We don't need X to compute the factor T
             truncation=truncation,
-            return_K=True,  # Ensures that we don't need X
+            return_T=True,  # Ensures that we don't need X
         )
 
     def perturb_observations(self, *, size: Tuple[int, int]) -> npt.NDArray[np.double]:
diff --git a/src/iterative_ensemble_smoother/esmda_inversion.py b/src/iterative_ensemble_smoother/esmda_inversion.py
index 700efe9e..ca96e2e1 100644
--- a/src/iterative_ensemble_smoother/esmda_inversion.py
+++ b/src/iterative_ensemble_smoother/esmda_inversion.py
@@ -188,16 +188,16 @@ def inversion_exact_cholesky(
     Y: npt.NDArray[np.double],
     X: Optional[npt.NDArray[np.double]],
     truncation: float = 1.0,
-    return_K: bool = False,
+    return_T: bool = False,
 ) -> npt.NDArray[np.double]:
     """Computes an exact inversion using `sp.linalg.solve`, which uses a
     Cholesky factorization in the case of symmetric, positive definite matrices.
 
     The goal is to compute: C_MD @ inv(C_DD + alpha * C_D) @ (D - Y)
 
-    First we solve (C_DD + alpha * C_D) @ K = (D - Y) for K, so that
-    K = inv(C_DD + alpha * C_D) @ (D - Y), then we compute
-    C_MD @ K, but we don't explicitly form C_MD, since it might be more
+    First we solve (C_DD + alpha * C_D) @ T = (D - Y) for T, so that
+    T = inv(C_DD + alpha * C_D) @ (D - Y), then we compute
+    C_MD @ T, but we don't explicitly form C_MD, since it might be more
     efficient to perform the matrix products in another order.
     """
     C_DD = empirical_covariance_upper(Y)  # Only compute upper part
@@ -210,25 +210,25 @@ def inversion_exact_cholesky(
         "lower": False,  # Only use the upper part while solving
     }
 
-    # Compute K := sp.linalg.inv(C_DD + alpha * C_D) @ (D - Y)
+    # Compute T := sp.linalg.inv(C_DD + alpha * C_D) @ (D - Y)
     if C_D.ndim == 2:
         # C_D is a covariance matrix
         C_DD += alpha * C_D  # Save memory by mutating
-        K = sp.linalg.solve(C_DD, D - Y, **solver_kwargs)
+        T = sp.linalg.solve(C_DD, D - Y, **solver_kwargs)
     elif C_D.ndim == 1:
         # C_D is an array, so add it to the diagonal without forming diag(C_D)
         C_DD.flat[:: C_DD.shape[1] + 1] += alpha * C_D
-        K = sp.linalg.solve(C_DD, D - Y, **solver_kwargs)
+        T = sp.linalg.solve(C_DD, D - Y, **solver_kwargs)
 
     # Center matrix
     Y = Y - np.mean(Y, axis=1, keepdims=True)
     _, num_ensemble = Y.shape
 
     # Don't left-multiply the X
-    if return_K:
-        return (Y.T @ K) / (num_ensemble - 1)  # type: ignore
+    if return_T:
+        return (Y.T @ T) / (num_ensemble - 1)  # type: ignore
 
-    return np.linalg.multi_dot([X, Y.T / (num_ensemble - 1), K])  # type: ignore
+    return np.linalg.multi_dot([X, Y.T / (num_ensemble - 1), T])  # type: ignore
 
 
 def inversion_exact_lstsq(
@@ -253,8 +253,8 @@ def inversion_exact_lstsq(
         lhs = C_DD
         lhs.flat[:: lhs.shape[0] + 1] += alpha * C_D
 
-    # K = lhs^-1 @ (D - Y)
-    # lhs @ K = (D - Y)
+    # T = lhs^-1 @ (D - Y)
+    # lhs @ T = (D - Y)
     ans, *_ = sp.linalg.lstsq(
         lhs, D - Y, overwrite_a=True, overwrite_b=True, lapack_driver="gelsy"
     )
@@ -405,7 +405,7 @@ def inversion_subspace(
     Y: npt.NDArray[np.double],
     X: npt.NDArray[np.double],
     truncation: float = 1.0,
-    return_K: bool = False,
+    return_T: bool = False,
 ) -> npt.NDArray[np.double]:
     """See Appendix A.2 in :cite:t:`emerickHistoryMatchingTimelapse2012`.
 
@@ -478,7 +478,7 @@ def inversion_subspace(
     # and finally we multiiply by (D - Y)
     term = U_r_w_inv @ Z
 
-    if return_K:
+    if return_T:
         return np.linalg.multi_dot(  # type: ignore
             [D_delta.T, (term / (1 + T)), term.T, (D - Y)]
         )
diff --git a/tests/test_esmda_inversion.py b/tests/test_esmda_inversion.py
index df534741..9335f2c5 100644
--- a/tests/test_esmda_inversion.py
+++ b/tests/test_esmda_inversion.py
@@ -45,7 +45,7 @@ class TestEsmdaInversion:
     @pytest.mark.parametrize("ensemble_members", [5, 10, 15])
     @pytest.mark.parametrize("num_outputs", [5, 10, 15])
     @pytest.mark.parametrize("num_inputs", [5, 10, 15])
-    def test_that_returning_K_is_equivalent_to_full_computation(
+    def test_that_returning_T_is_equivalent_to_full_computation(
         self, function, ensemble_members, num_outputs, num_inputs
     ):
         # ensemble_members, num_outputs, num_inputs = 5, 10, 15
@@ -64,11 +64,11 @@ def test_that_returning_K_is_equivalent_to_full_computation(
         Y = np.random.randn(num_outputs, ensemble_members)
         X = np.random.randn(num_inputs, ensemble_members)
 
-        # Test both with and without X / return_K
+        # Test both with and without X / return_T
         ans = function(alpha=alpha, C_D=C_D, D=D, Y=Y, X=X)
-        K = function(alpha=alpha, C_D=C_D, D=D, Y=Y, X=None, return_K=True)
+        T = function(alpha=alpha, C_D=C_D, D=D, Y=Y, X=None, return_T=True)
 
-        assert np.allclose(X + ans, X + X @ K)
+        assert np.allclose(X + ans, X + X @ T)
 
     @pytest.mark.parametrize("length", list(range(1, 101, 5)))
     def test_that_the_sum_of_normalize_alpha_is_one(self, length):