From 6b505859a42fc35d2dee911e512c26ed9043e9a8 Mon Sep 17 00:00:00 2001 From: tommyod Date: Fri, 6 Oct 2023 09:01:19 +0200 Subject: [PATCH 1/3] renamed C_D to covariance in ESMDA public API --- src/iterative_ensemble_smoother/esmda.py | 34 ++++++++++---------- tests/test_esmda.py | 40 ++++++++++++++---------- 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/iterative_ensemble_smoother/esmda.py b/src/iterative_ensemble_smoother/esmda.py index 90c46048..c4c68056 100644 --- a/src/iterative_ensemble_smoother/esmda.py +++ b/src/iterative_ensemble_smoother/esmda.py @@ -42,7 +42,7 @@ class ESMDA: Parameters ---------- - C_D : np.ndarray + covariance : np.ndarray Covariance matrix of outputs of shape (num_outputs, num_outputs). If a 1D array is passed, it represents a diagonal covariance matrix. observations : np.ndarray @@ -61,9 +61,9 @@ class ESMDA: Examples -------- - >>> C_D = np.diag([1, 1, 1]) + >>> covariance = np.diag([1, 1, 1]) >>> observations = np.array([1, 2, 3]) - >>> esmda = ESMDA(C_D, observations) + >>> esmda = ESMDA(covariance, observations) """ @@ -76,24 +76,26 @@ class ESMDA: def __init__( self, - C_D: npt.NDArray[np.double], + covariance: npt.NDArray[np.double], observations: npt.NDArray[np.double], alpha: Union[int, npt.NDArray[np.double]] = 5, seed: Union[np.random._generator.Generator, int, None] = None, inversion: str = "exact", ) -> None: # Validate inputs - if not (isinstance(C_D, np.ndarray) and C_D.ndim in (1, 2)): - raise TypeError("Argument `C_D` must be a NumPy array of dimension 1 or 2.") + if not (isinstance(covariance, np.ndarray) and covariance.ndim in (1, 2)): + raise TypeError( + "Argument `covariance` must be a NumPy array of dimension 1 or 2." + ) - if C_D.ndim == 2 and C_D.shape[0] != C_D.shape[1]: - raise ValueError("Argument `C_D` must be square if it's 2D.") + if covariance.ndim == 2 and covariance.shape[0] != covariance.shape[1]: + raise ValueError("Argument `covariance` must be square if it's 2D.") if not (isinstance(observations, np.ndarray) and observations.ndim == 1): raise TypeError("Argument `observations` must be a 1D NumPy array.") - if not observations.shape[0] == C_D.shape[0]: - raise ValueError("Shapes of `observations` and `C_D` must match.") + if not observations.shape[0] == covariance.shape[0]: + raise ValueError("Shapes of `observations` and `covariance` must match.") if not ( (isinstance(alpha, np.ndarray) and alpha.ndim == 1) @@ -139,14 +141,14 @@ def __init__( # If it's a full matrix, we gain speedup by only computing cholesky once # If it's a diagonal, we gain speedup by never having to compute cholesky - if isinstance(C_D, np.ndarray) and C_D.ndim == 2: - self.C_D_L = sp.linalg.cholesky(C_D, lower=False) - elif isinstance(C_D, np.ndarray) and C_D.ndim == 1: - self.C_D_L = np.sqrt(C_D) + if isinstance(covariance, np.ndarray) and covariance.ndim == 2: + self.C_D_L = sp.linalg.cholesky(covariance, lower=False) + elif isinstance(covariance, np.ndarray) and covariance.ndim == 1: + self.C_D_L = np.sqrt(covariance) else: - raise TypeError("Argument `C_D` must be 1D or 2D array") + raise TypeError("Argument `covariance` must be 1D or 2D array") - self.C_D = C_D + self.C_D = covariance assert isinstance(self.C_D, np.ndarray) and self.C_D.ndim in (1, 2) def num_assimilations(self) -> int: diff --git a/tests/test_esmda.py b/tests/test_esmda.py index d28c3d45..b60c4eff 100644 --- a/tests/test_esmda.py +++ b/tests/test_esmda.py @@ -43,18 +43,24 @@ def test_that_diagonal_covariance_gives_same_answer_as_dense(self, seed, inversi Y_prior = rng.normal(size=(num_outputs, num_ensemble)) # Measurement errors - C_D = np.exp(rng.normal(size=num_outputs)) + covariance = np.exp(rng.normal(size=num_outputs)) # Observations observations = rng.normal(size=num_outputs, loc=1) - esmda = ESMDA(C_D, observations, alpha=alpha, seed=seed, inversion=inversion) + esmda = ESMDA( + covariance, observations, alpha=alpha, seed=seed, inversion=inversion + ) X_posterior1 = np.copy(X_prior) for _ in range(esmda.num_assimilations()): X_posterior1 = esmda.assimilate(X_posterior1, Y_prior) esmda = ESMDA( - np.diag(C_D), observations, alpha=alpha, seed=seed, inversion=inversion + np.diag(covariance), + observations, + alpha=alpha, + seed=seed, + inversion=inversion, ) X_posterior2 = np.copy(X_prior) for _ in range(esmda.num_assimilations()): @@ -88,18 +94,18 @@ def G(X): X_prior = rng.normal(size=(num_inputs, num_ensemble)) # Measurement errors - C_D = np.eye(num_outputs) + covariance = np.eye(num_outputs) # The true inputs and observations, a result of running with X_true = 1 X_true = np.ones(shape=(num_inputs,)) observations = G(X_true) # Prepare ESMDA instance running with lower number of ensemble members - esmda_subset = ESMDA(C_D, observations, alpha=alpha, seed=seed) + esmda_subset = ESMDA(covariance, observations, alpha=alpha, seed=seed) X_i_subset = np.copy(X_prior[:, ensemble_mask]) # Prepare ESMDA instance running with all ensemble members - esmda_masked = ESMDA(C_D, observations, alpha=alpha, seed=seed) + esmda_masked = ESMDA(covariance, observations, alpha=alpha, seed=seed) X_i = np.copy(X_prior) # Run both @@ -133,20 +139,20 @@ def G(X): X_prior = rng.normal(size=(num_inputs, num_ensemble)) # Measurement errors - C_D = np.eye(num_outputs) + covariance = np.eye(num_outputs) # The true inputs and observations, a result of running with N(1, 1) X_true = rng.normal(size=(num_inputs,)) + 1 observations = G(X_true) # Create ESMDA instance from an integer `alpha` and run it - esmda_integer = ESMDA(C_D, observations, alpha=5, seed=seed) + esmda_integer = ESMDA(covariance, observations, alpha=5, seed=seed) X_i_int = np.copy(X_prior) for _ in range(esmda_integer.num_assimilations()): X_i_int = esmda_integer.assimilate(X_i_int, G(X_i_int)) # Create another ESMDA instance from a vector `alpha` and run it - esmda_array = ESMDA(C_D, observations, alpha=np.ones(5), seed=seed) + esmda_array = ESMDA(covariance, observations, alpha=np.ones(5), seed=seed) X_i_array = np.copy(X_prior) for _ in range(esmda_array.num_assimilations()): X_i_array = esmda_array.assimilate(X_i_array, G(X_i_array)) @@ -355,21 +361,21 @@ def setup(self): Y_prior = rng.normal(size=(num_outputs, num_ensemble)) # Measurement errors - C_D = np.exp(rng.normal(size=num_outputs)) + covariance = np.exp(rng.normal(size=num_outputs)) # Observations observations = rng.normal(size=num_outputs, loc=1) - return X_prior, Y_prior, C_D, observations + return X_prior, Y_prior, covariance, observations @pytest.mark.limit_memory("138 MB") def test_ESMDA_memory_usage_subspace_inversion(self, setup): # TODO: Currently this is a regression test. Work to improve memory usage. - X_prior, Y_prior, C_D, observations = setup + X_prior, Y_prior, covariance, observations = setup # Create ESMDA instance from an integer `alpha` and run it - esmda = ESMDA(C_D, observations, alpha=1, seed=1, inversion="subspace") + esmda = ESMDA(covariance, observations, alpha=1, seed=1, inversion="subspace") for _ in range(esmda.num_assimilations()): esmda.assimilate(X_prior, Y_prior) @@ -393,9 +399,9 @@ def test_that_float_dtypes_are_preserved(self, inversion, dtype, diagonal): Y_prior = rng.normal(size=(num_outputs, num_ensemble)) # Measurement errors - C_D = np.exp(rng.normal(size=num_outputs)) + covariance = np.exp(rng.normal(size=num_outputs)) if not diagonal: - C_D = np.diag(C_D) + covariance = np.diag(covariance) # Observations observations = rng.normal(size=num_outputs, loc=1) @@ -403,11 +409,11 @@ def test_that_float_dtypes_are_preserved(self, inversion, dtype, diagonal): # Convert types X_prior = X_prior.astype(dtype) Y_prior = Y_prior.astype(dtype) - C_D = C_D.astype(dtype) + covariance = covariance.astype(dtype) observations = observations.astype(dtype) # Create ESMDA instance from an integer `alpha` and run it - esmda = ESMDA(C_D, observations, alpha=1, seed=1, inversion=inversion) + esmda = ESMDA(covariance, observations, alpha=1, seed=1, inversion=inversion) for _ in range(esmda.num_assimilations()): X_posterior = esmda.assimilate(X_prior, Y_prior) From 06ef964254bc1291cac90bc81b4478f7d04b23e6 Mon Sep 17 00:00:00 2001 From: tommyod Date: Fri, 6 Oct 2023 09:08:15 +0200 Subject: [PATCH 2/3] fix notebook --- docs/source/Oscillator.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/source/Oscillator.py b/docs/source/Oscillator.py index 611770dc..568dba11 100644 --- a/docs/source/Oscillator.py +++ b/docs/source/Oscillator.py @@ -12,6 +12,8 @@ # language: python # name: python3 # --- + +# %% # ruff: noqa: E402 # %% [markdown] # # Estimating parameters of an anharmonic oscillator @@ -54,7 +56,7 @@ # # -#%% +# %% import numpy as np from matplotlib import pyplot as plt from scipy import stats @@ -91,6 +93,7 @@ def plot_result(A, response_x_axis, title=None): # %% [markdown] # ## Setup + # %% def generate_observations(K): """Run the model with true parameters, then generate observations.""" @@ -286,10 +289,9 @@ def iterative_smoother(A): # %% smoother = ies.ESMDA( - # Here C_D is a covariance matrix. If a 1D array is passed, - # it is interpreted as the diagonal of the covariance matrix, - # and NOT as a vector of standard deviations - C_D=observation_errors**2, + # If a 1D array is passed, it is interpreted + # as the diagonal of the covariance matrix. + covariance=observation_errors**2, observations=observation_values, # The inflation factors used in ESMDA # They are scaled so that sum_i alpha_i^-1 = 1 From aa3d86cb4c12edb78101ab9c224c2fce16acfa41 Mon Sep 17 00:00:00 2001 From: tommyod Date: Wed, 18 Oct 2023 06:30:23 +0200 Subject: [PATCH 3/3] updated docstring --- src/iterative_ensemble_smoother/esmda.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/iterative_ensemble_smoother/esmda.py b/src/iterative_ensemble_smoother/esmda.py index c4c68056..8255d5f3 100644 --- a/src/iterative_ensemble_smoother/esmda.py +++ b/src/iterative_ensemble_smoother/esmda.py @@ -42,12 +42,18 @@ class ESMDA: Parameters ---------- - covariance : np.ndarray - Covariance matrix of outputs of shape (num_outputs, num_outputs). - If a 1D array is passed, it represents a diagonal covariance matrix. + covariance : npt.NDArray[np.double] + Either a 1D array of diagonal covariances, or a 2D covariance matrix. + The shape is either (num_observations,) or (num_observations, num_observations). + This is C_D in Emerick (2013), and represents observation or measurement + errors. We observe d from the real world, y from the model g(x), and + assume that d = y + e, where the error e is multivariate normal with + covariance given by `covariance`. observations : np.ndarray - 1D array of shape (num_inputs,) representing real-world observations. + 1D array of shape (num_observations,) representing real-world observations. + This is d_obs in Emerick (2013). alpha : int or 1D np.ndarray, optional + Multiplicative factor for the covariance. If an integer `alpha` is given, an array with length `alpha` and elements `alpha` is constructed. If an 1D array is given, it is normalized so sum_i 1/alpha_i = 1 and used. The default is 5, which @@ -58,6 +64,7 @@ class ESMDA: The default is None. inversion : str, optional Which inversion method to use. The default is "exact". + See the dictionary ESMDA._inversion_methods for more information. Examples -------- @@ -164,14 +171,16 @@ def assimilate( ) -> npt.NDArray[np.double]: """Assimilate data and return an updated ensemble X_posterior. + num_parameters, ensemble_size + Parameters ---------- X : np.ndarray - 2D array of shape (num_inputs, num_ensemble_members). + 2D array of shape (num_parameters, ensemble_size). Y : np.ndarray - 2D array of shape (num_ouputs, num_ensemble_members). + 2D array of shape (num_parameters, ensemble_size). ensemble_mask : np.ndarray - 1D boolean array of length `num_ensemble_members`, describing which + 1D boolean array of length `ensemble_size`, describing which ensemble members are active. Inactive realizations are ignored. Defaults to all active. overwrite : bool