From 5c2fec334d7114ad4dd16632f2191fb6f98d18f5 Mon Sep 17 00:00:00 2001
From: Tommy Odland <tommy.odland>
Date: Thu, 16 Nov 2023 07:40:51 +0100
Subject: [PATCH 1/4] added linear regression

---
 docs/source/Oscillator.py | 2 +-
 docs/source/conf.py       | 5 ++---
 docs/source/examples.rst  | 1 +
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/Oscillator.py b/docs/source/Oscillator.py
index 568dba11..f0f6f337 100644
--- a/docs/source/Oscillator.py
+++ b/docs/source/Oscillator.py
@@ -6,7 +6,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.14.5
+#       jupytext_version: 1.15.2
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
diff --git a/docs/source/conf.py b/docs/source/conf.py
index eb42c062..30177164 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -20,11 +20,10 @@
 version = re.sub(r"\.dev.*$", r".dev", ies.__version__)
 release = version
 
-# convert the python file to a notebook
+# Convert Python files to notebooks
 check_output(["jupytext", "Polynomial.py", "-o", "Polynomial.ipynb"])
-
-# Do the same for this file
 check_output(["jupytext", "Oscillator.py", "-o", "Oscillator.ipynb"])
+check_output(["jupytext", "LinearRegression.py", "-o", "LinearRegression.ipynb"])
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index d6506b3e..c043aee6 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -7,5 +7,6 @@ Example Usage
 
    Polynomial
    Oscillator
+   LinearRegression
 
 * :ref:`genindex`

From 172898066f0a5a66765ed8bec4657031a5c9cd9f Mon Sep 17 00:00:00 2001
From: Tommy Odland <tommy.odland>
Date: Thu, 16 Nov 2023 07:48:44 +0100
Subject: [PATCH 2/4] added notebook .py file

---
 docs/source/LinearRegression.py | 172 ++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 docs/source/LinearRegression.py

diff --git a/docs/source/LinearRegression.py b/docs/source/LinearRegression.py
new file mode 100644
index 00000000..32a4b07d
--- /dev/null
+++ b/docs/source/LinearRegression.py
@@ -0,0 +1,172 @@
+# ---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.15.2
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# %%
+# ruff: noqa: E402
+
+# %% [markdown]
+# # Linear regression with ESMDA
+#
+# We solve a linear regression problem using ESMDA.
+# First we define the forward model as $g(x) = Ax$,
+# then we set up a prior ensemble on the linear
+# regression coefficients, so $x \sim \mathcal{N}(0, 1)$.
+#
+# As shown in the 2013 paper by Emerick et al, when a set of
+# inflation weights $\alpha_i$ is chosen so that $\sum_i \alpha_i^{-1} = 1$,
+# ESMDA yields the correct posterior mean for the linear-Gaussian case.
+
+# %% [markdown]
+# ## Import packages
+
+# %%
+import numpy as np
+from matplotlib import pyplot as plt
+
+from iterative_ensemble_smoother import ESMDA
+
+# %% [markdown]
+# ## Create problem data
+#
+# Some settings worth experimenting with:
+#
+# - Decreasing `prior_std=1` will pull the posterior solution toward zero.
+# - Increasing `num_ensemble` will increase the quality of the solution.
+# - Increasing `num_observations / num_parameters`
+#   will increase the quality of the solution.
+
+# %%
+num_parameters = 25
+num_observations = 100
+num_ensemble = 30
+prior_std = 1
+
+# %%
+rng = np.random.default_rng(42)
+
+# Create a problem with g(x) = A @ x
+A = rng.standard_normal(size=(num_observations, num_parameters))
+
+
+def g(X):
+    """Forward model."""
+    return A @ X
+
+
+# Create observations: obs = g(x) + N(0, 1)
+x_true = np.linspace(-1, 1, num=num_parameters)
+observation_noise = rng.standard_normal(size=num_observations)
+observations = g(x_true) + observation_noise
+
+# Initial ensemble X ~ N(0, prior_std) and diagonal covariance with ones
+X = rng.normal(size=(num_parameters, num_ensemble)) * prior_std
+
+# Covariance matches the noise added to observations above
+covariance = np.ones(num_observations)
+
+# %% [markdown]
+# ## Solve the maximum likelihood problem
+#
+# We can solve $Ax = b$, where $b$ is the observations,
+# for the maximum likelihood estimate.
+# Notice that unlike using a Ridge model,
+# solving $Ax = b$ directly does not use any prior information.
+
+# %%
+x_ml, *_ = np.linalg.lstsq(A, observations, rcond=None)
+
+plt.figure(figsize=(8, 3))
+plt.scatter(np.arange(len(x_true)), x_true, label="True parameter values")
+plt.scatter(np.arange(len(x_true)), x_ml, label="ML estimate (no prior)")
+plt.xlabel("Parameter index")
+plt.ylabel("Parameter value")
+plt.grid(True, ls="--", zorder=0, alpha=0.33)
+plt.legend()
+plt.show()
+
+# %% [markdown]
+# ## Solve using ESMDA
+#
+# We crease an `ESMDA` instance and solve the Guass-linear problem.
+
+# %%
+smoother = ESMDA(
+    covariance=covariance,
+    observations=observations,
+    alpha=5,
+    seed=1,
+)
+
+X_i = np.copy(X)
+for i, alpha_i in enumerate(smoother.alpha, 1):
+    print(
+        f"ESMDA iteration {i}/{smoother.num_assimilations()}"
+        + f" with inflation factor alpha_i={alpha_i}"
+    )
+    X_i = smoother.assimilate(X_i, Y=g(X_i))
+
+
+X_posterior = np.copy(X_i)
+
+# %% [markdown]
+# ## Plot and compare solutions
+#
+# Compare the true parameters with both the ML estimate
+# from linear regression and the posterior means obtained using `ESMDA`.
+
+# %%
+plt.figure(figsize=(8, 3))
+plt.scatter(np.arange(len(x_true)), x_true, label="True parameter values")
+plt.scatter(np.arange(len(x_true)), x_ml, label="ML estimate (no prior)")
+plt.scatter(
+    np.arange(len(x_true)), np.mean(X_posterior, axis=1), label="Posterior mean"
+)
+plt.xlabel("Parameter index")
+plt.ylabel("Parameter value")
+plt.grid(True, ls="--", zorder=0, alpha=0.33)
+plt.legend()
+plt.show()
+
+# %% [markdown]
+# We now include the posterior samples as well.
+
+# %%
+plt.figure(figsize=(8, 3))
+plt.scatter(np.arange(len(x_true)), x_true, label="True parameter values")
+plt.scatter(np.arange(len(x_true)), x_ml, label="ML estimate (no prior)")
+plt.scatter(
+    np.arange(len(x_true)), np.mean(X_posterior, axis=1), label="Posterior mean"
+)
+
+# Loop over every ensemble member and plot it
+for j in range(num_ensemble):
+    # Jitter along the x-axis a little bit
+    x_jitter = np.arange(len(x_true)) + rng.normal(loc=0, scale=0.1, size=len(x_true))
+
+    # Plot this ensemble member
+    plt.scatter(
+        x_jitter,
+        X_posterior[:, j],
+        label=("Posterior values" if j == 0 else None),
+        color="black",
+        alpha=0.2,
+        s=5,
+        zorder=0,
+    )
+plt.xlabel("Parameter index")
+plt.ylabel("Parameter value")
+plt.grid(True, ls="--", zorder=0, alpha=0.33)
+plt.legend()
+plt.show()

From 919627a13b7c2d26494b9de5446ffaee0fdf64bc Mon Sep 17 00:00:00 2001
From: Tommy Odland <tommy.odland>
Date: Thu, 16 Nov 2023 08:27:23 +0100
Subject: [PATCH 3/4] remove change to Oscillator.py

---
 docs/source/Oscillator.py | 341 ++++++++++++--------------------------
 1 file changed, 103 insertions(+), 238 deletions(-)

diff --git a/docs/source/Oscillator.py b/docs/source/Oscillator.py
index f0f6f337..56ecf1ed 100644
--- a/docs/source/Oscillator.py
+++ b/docs/source/Oscillator.py
@@ -6,317 +6,182 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.15.2
+#       jupytext_version: 1.14.4
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
 #     name: python3
 # ---
 
-# %%
-# ruff: noqa: E402
 # %% [markdown]
-# # Estimating parameters of an anharmonic oscillator
+# # Example: Estimating parameters of an anharmonic oscillator
 #
 # The anharnomic oscillator can be modelled by a non-linear partial differential
-# equation as described in section 6.3.4 of the book [Fundamentals of Algorithms
-# and Data Assimilation](https://www.amazon.com/Data-Assimilation-Methods-Algorithms-Applications/dp/1611974534)
-# by Mark Asch, Marc Bocquet and Maëlle Nodet.
-#
-# -------------
-#
-# The discrete two-parameter model is:
-#
-# $$x_{k+1} - 2 x_{k} + x_{k-1} = \omega^2 x_{k} - \lambda^2 x_{k}^3$$
-#
-# with initial conditions $x_0 = 0$ and $x_1 = 1$.
-#
-# In other words we have the following recurrence relationship:
-#
-# $x_{k+1} = \omega^2 x_{k} - \lambda^2 x_{k}^3 + 2 x_{k} - x_{k-1}$
-#
-#
-# -------------
-# The state vector can be written as:
-#
-# $$
-# \mathbf{u}_k = \begin{bmatrix}
-# x_{k} \\
-# x_{k-1}
-# \end{bmatrix}
-# \quad
-# \mathcal{M}_{k+1} =
-#  \begin{bmatrix}
-# 2 + \omega^2 - \lambda^2 x_k^2 & -1 \\
-# 1 & 0
-# \end{bmatrix}
-# $$
-#
-# so that $\mathbf{u}_{k+1} = \mathcal{M}_{k+1}(\mathbf{u}_k)$.
-#
-#
+# equation as described in section 6.4.3 of the book Fundamentals of Algorithms
+# and Data Assimilation by Mark Asch, Marc Bocquet and Maëlle Nodet.
 
 # %%
-import numpy as np
+# Simple plotting of forward-model with a single response and parameters
 from matplotlib import pyplot as plt
-from scipy import stats
 
-import iterative_ensemble_smoother as ies
-
-rng = np.random.default_rng(12345)
 
-plt.rcParams["figure.figsize"] = [8, 3]
+def plot_result(A, response_x_axis, trans_func=lambda x: x, priors=[]):
+    responses = forward_model(A, priors, response_x_axis)
+    plt.rcParams["figure.figsize"] = [15, 4]
+    _, axs = plt.subplots(1, 1 + len(A))
 
-
-# %%
-def plot_result(A, response_x_axis, title=None):
-    """Plot the anharmonic oscillator, as well as marginal
-    and joint distributions for the parameters."""
-
-    responses = forward_model(A, response_x_axis)
-
-    fig, axes = plt.subplots(1, 2 + len(A), figsize=(9, 2.25))
-    if title:
-        fig.suptitle(title)
-
-    axes[0].plot(response_x_axis, responses, color="black", alpha=0.1)
-    for ax, param, label in zip(axes[1:], A, ["omega", "lambda"]):
-        ax.hist(param, bins="fd")
-        ax.set_xlabel(label)
-
-    axes[-1].scatter(A[0, :], A[1, :], s=5)
-
-    fig.tight_layout()
+    axs[0].plot(response_x_axis, responses)
+    for i, param in enumerate(A):
+        A_trans = np.array([trans_func(v, *priors[i]) for v in param])
+        axs[i + 1].hist(A_trans, bins=10)
     plt.show()
 
 
 # %% [markdown]
 # ## Setup
 
-
 # %%
-def generate_observations(K):
-    """Run the model with true parameters, then generate observations."""
-    # Evaluate using true parameter values on a fine grid with K sample points
-    x = simulate_anharmonic(omega=3.5e-2, lmbda=3e-4, K=K)
+# Oscilator example
+import numpy as np
+from math import sqrt
+from scipy.special import erf
 
-    # Generate observations every `nobs` steps
+
+def _generate_observations(K):
+    x = _evaluate(omega=3.5e-2, lmbda=3e-4, K=K)
+    rng = np.random.default_rng(12345)
     nobs = 50
-    # Generate observation points [0, 50, 100, 150, ...] when `nobs` = 50
-    observation_x_axis = np.arange(K // nobs) * nobs
+    obs_points = np.linspace(0, K, nobs, endpoint=False, dtype=int)
 
-    # Generate noisy observations, with value at least 5
-    observation_errors = np.maximum(5, 0.2 * np.abs(x[observation_x_axis]))
-    observation_values = rng.normal(loc=x[observation_x_axis], scale=observation_errors)
+    obs_with_std = np.zeros(shape=(len(obs_points), 2))
 
-    return observation_values, observation_errors, observation_x_axis
+    for obs_idx, obs_point in enumerate(obs_points):
+        # Set observation error's standard deviation to some
+        # percentage of the amplitude of x with a minimum of, e.g., 1.
+        obs_std = max(1, 0.02 * abs(x[obs_point]))
+        obs_with_std[obs_idx, 0] = x[obs_point] + rng.normal(loc=0.0, scale=obs_std)
+        obs_with_std[obs_idx, 1] = obs_std
+    return obs_with_std, obs_points
 
 
-def simulate_anharmonic(omega, lmbda, K):
-    """Evaluate the anharmonic oscillator."""
+def _evaluate(omega, lmbda, K):
     x = np.zeros(K)
     x[0] = 0
     x[1] = 1
 
     # Looping from 2 because we have initial conditions at k=0 and k=1.
-    for k in range(2, K):
-        x[k] = x[k - 1] * (2 + omega**2 - lmbda**2 * x[k - 1] ** 2) - x[k - 2]
+    for k in range(2, K - 1):
+        M = np.array([[2 + omega**2 - lmbda**2 * x[k] ** 2, -1], [1, 0]])
+        u = np.array([x[k], x[k - 1]])
+        u = M @ u
+        x[k + 1] = u[0]
+        x[k] = u[1]
 
     return x
 
 
-def forward_model(A, response_x_axis):
-    """Evaluate on each column (ensemble realization)."""
-    return np.vstack(
-        [simulate_anharmonic(*params, K=len(response_x_axis)) for params in A.T]
-    ).T
+def uniform(x, min_x, max_x):
+    y = 0.5 * (1 + erf(x / sqrt(2.0)))
+    return y * (max_x - min_x) + min_x
 
 
-response_x_axis = range(2500)
-observation_values, observation_errors, observation_x_axis = generate_observations(
-    len(response_x_axis)
-)
+def forward_model(A, prior, response_x_axis):
+    responses = []
+    for [omega, lmbda] in A.T:
+        r = _evaluate(
+            omega=uniform(omega, *prior[0]),
+            lmbda=uniform(lmbda, *prior[1]),
+            K=len(response_x_axis),
+        )
+        responses.append(r)
+    return np.array(responses).T
 
 
-# %% [markdown]
-# ## Plot observations
+response_x_axis = range(2500)
+realizations = 100
 
-# %%
-fig, ax = plt.subplots(figsize=(8, 3))
-ax.set_title("Anharmonic Oscillator")
-ax.plot(
-    np.arange(2500),
-    simulate_anharmonic(omega=3.5e-2, lmbda=3e-4, K=2500),
-    label="Truth",
-    color="black",
-)
+observations, observation_x_axis = _generate_observations(len(response_x_axis))
+observation_values = observations[:, 0]
+observation_errors = observations[:, 1]
+
+A = np.random.normal(0, 1, size=(2, realizations))
 
-ax.scatter(observation_x_axis, observation_values, label="Observations")
+priors = [(2.5e-2, 4.5e-2), (2.0e-4, 4.0e-4)]
+plot_result(A, response_x_axis, uniform, priors)
 
-fig.tight_layout()
-plt.legend()
-plt.show()
 
 # %% [markdown]
-# ## Create and plot prior
-#
-#
-# This section shows a "trick" for working with non-normal priors.
-#
-# Let $g$ be the forward model, so that $y = g(x)$.
-# Assume for instance that $x$ must be a positive parameter.
-# If so, then we can for instance use an exponential prior on $x$.
-# But if we use samples from an exponential prior directly in an ensemble smoother,
-# there is no guarantee that the posterior samples will be positive.
-#
-# The trick is to sample $x \sim \mathcal{N}(0, 1)$,
-# then define a function $f$ that maps from standard normal to the
-# exponential distribution.
-# This function can be constructed by first mapping from standard normal to
-# the interval $[0, 1)$ using the CDF, then mapping to exponential using the
-# quantile function (inverse CDF) of the exponential distribution.
-#
-# In summary:
-#
-# - We send $x \sim \mathcal{N}(0, 1)$ into the ensemble smoother as before
-# - We use the composite function $g(f(x))$ as our forward model, instead of $g(x)$
-# - When we plot the prior and posterior, we plot $f(x)$ instead of $x$ directly
+# ## Update step
 
 # %%
-# Create Uniform prior distributions
-realizations = 100
-PRIORS = [stats.uniform(loc=0.025, scale=0.02), stats.uniform(loc=0.0002, scale=0.0002)]
-
+import numpy as np
+import iterative_ensemble_smoother as ies
 
-# %%
-def prior_to_standard_normal(A):
-    """Map prior to U(0, 1), then use inverse of CDF to map to N(0, 1)."""
-    return np.vstack(
-        [stats.norm().ppf(prior.cdf(A_param)) for (A_param, prior) in zip(A, PRIORS)]
-    )
+plot_result(A, response_x_axis, uniform, priors)
 
+responses_before = forward_model(A, priors, response_x_axis)
+Y = responses_before[observation_x_axis]
 
-def standard_normal_to_prior(A):
-    """Reverse mapping."""
-    return np.vstack(
-        [prior.ppf(stats.norm().cdf(A_param)) for (A_param, prior) in zip(A, PRIORS)]
-    )
+smoother = ies.ES()
+smoother.fit(Y, observation_errors, observation_values)
+new_A = smoother.update(A)
 
+plot_result(new_A, response_x_axis, uniform, priors)
 
-# verify that mappings are invertible
-A_uniform = np.vstack([prior.rvs(realizations) for prior in PRIORS])
-assert np.allclose(
-    standard_normal_to_prior(prior_to_standard_normal(A_uniform)), A_uniform
-)
-assert np.allclose(
-    prior_to_standard_normal(standard_normal_to_prior(A_uniform)), A_uniform
-)
+# %% [markdown]
+# ## Iterative smoother
 
 # %%
-# Create a standard normal prior and plot it in transformed space
-A = rng.standard_normal(size=(2, realizations))
-plot_result(standard_normal_to_prior(A), response_x_axis)
+import numpy as np
+from matplotlib import pyplot as plt
+import iterative_ensemble_smoother as ies
 
-# %% [markdown]
-# ## A single update step
 
-# %%
-plot_result(standard_normal_to_prior(A), response_x_axis)
+def iterative_smoother():
+    A_current = np.copy(A)
+    iterations = 4
+    smoother = ies.SIES(realizations)
 
-# The forward model is composed with the transformation from N(0, 1) to uniform priors
-responses_before = forward_model(standard_normal_to_prior(A), response_x_axis)
-Y = responses_before[observation_x_axis]
+    for _ in range(iterations):
+        plot_result(A_current, response_x_axis, uniform, priors)
 
-# Run smoother for one step
-smoother = ies.SIES(
-    parameters=A,
-    covariance=observation_errors**2,
-    observations=observation_values,
-    seed=42,
-)
+        responses_before = forward_model(A_current, priors, response_x_axis)
+        Y = responses_before[observation_x_axis]
 
-new_A = smoother.sies_iteration(Y, step_length=1.0)
+        smoother.fit(Y, observation_errors, observation_values)
+        A_current = smoother.update(A_current)
+    plot_result(A_current, response_x_axis, uniform, priors)
 
 
-plot_result(standard_normal_to_prior(new_A), response_x_axis)
+iterative_smoother()
 
 # %% [markdown]
-# ## Iterative smoother
+# ## ES-MDA (Multiple Data Assimilation - Ensemble Smoother)
 
 # %%
-from iterative_ensemble_smoother.utils import steplength_exponential
+import numpy as np
+from matplotlib import pyplot as plt
+import iterative_ensemble_smoother as ies
 
 
-def iterative_smoother(A):
+def es_mda():
     A_current = np.copy(A)
-    iterations = 4
-    smoother = ies.SIES(
-        parameters=A,
-        covariance=observation_errors**2,
-        observations=observation_values,
-        seed=42,
-    )
-
-    plot_result(
-        standard_normal_to_prior(A_current),
-        response_x_axis,
-        title="Prior",
-    )
-
-    for iteration in range(iterations):
-        responses_before = forward_model(
-            standard_normal_to_prior(A_current), response_x_axis
-        )
-
-        Y = responses_before[observation_x_axis]
-
-        A_current = smoother.sies_iteration(
-            Y, step_length=steplength_exponential(iteration + 1)
-        )
+    weights = [8, 4, 2, 1]
+    length = sum(1.0 / x for x in weights)
 
-        plot_result(
-            standard_normal_to_prior(A_current),
-            response_x_axis,
-            title=f"SIES iteration {iteration+1}",
-        )
+    smoother = ies.ES()
+    for weight in weights:
+        plot_result(A_current, response_x_axis, uniform, priors)
 
+        responses_before = forward_model(A_current, priors, response_x_axis)
+        Y = responses_before[observation_x_axis]
 
-iterative_smoother(A)
+        observation_errors_scaled = observation_errors * sqrt(weight * length)
+        smoother.fit(Y, observation_errors_scaled, observation_values)
+        A_current = smoother.update(A_current)
+    plot_result(A_current, response_x_axis, uniform, priors)
 
 
-# %% [markdown]
-# ## ES-MDA (Ensemble Smoother - Multiple Data Assimilation)
+es_mda()
 
 # %%
-smoother = ies.ESMDA(
-    # If a 1D array is passed, it is interpreted
-    # as the diagonal of the covariance matrix.
-    covariance=observation_errors**2,
-    observations=observation_values,
-    # The inflation factors used in ESMDA
-    # They are scaled so that sum_i alpha_i^-1 = 1
-    alpha=np.array([8, 4, 2, 1]),
-    seed=42,
-)
-
-
-A_current = np.copy(A)
-
-plot_result(standard_normal_to_prior(A_current), response_x_axis, title="Prior")
-
-for iteration in range(smoother.num_assimilations()):
-    # Evaluate the model
-    responses_before = forward_model(
-        standard_normal_to_prior(A_current), response_x_axis
-    )
-    Y = responses_before[observation_x_axis]
-
-    # Assimilate data
-    A_current = smoother.assimilate(A_current, Y)
-
-    # Plot
-    plot_result(
-        standard_normal_to_prior(A_current),
-        response_x_axis,
-        title=f"ESMDA iteration {iteration+1}",
-    )

From d06a69421fa25b8eec99dbd7af889fd63fb29e8d Mon Sep 17 00:00:00 2001
From: Tommy Odland <tommy.odland>
Date: Thu, 16 Nov 2023 08:28:09 +0100
Subject: [PATCH 4/4] remove change to Oscillator.py

---
 docs/source/Oscillator.py | 341 ++++++++++++++++++++++++++------------
 1 file changed, 238 insertions(+), 103 deletions(-)

diff --git a/docs/source/Oscillator.py b/docs/source/Oscillator.py
index 56ecf1ed..568dba11 100644
--- a/docs/source/Oscillator.py
+++ b/docs/source/Oscillator.py
@@ -6,182 +6,317 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.14.4
+#       jupytext_version: 1.14.5
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
 #     name: python3
 # ---
 
+# %%
+# ruff: noqa: E402
 # %% [markdown]
-# # Example: Estimating parameters of an anharmonic oscillator
+# # Estimating parameters of an anharmonic oscillator
 #
 # The anharnomic oscillator can be modelled by a non-linear partial differential
-# equation as described in section 6.4.3 of the book Fundamentals of Algorithms
-# and Data Assimilation by Mark Asch, Marc Bocquet and Maëlle Nodet.
+# equation as described in section 6.3.4 of the book [Fundamentals of Algorithms
+# and Data Assimilation](https://www.amazon.com/Data-Assimilation-Methods-Algorithms-Applications/dp/1611974534)
+# by Mark Asch, Marc Bocquet and Maëlle Nodet.
+#
+# -------------
+#
+# The discrete two-parameter model is:
+#
+# $$x_{k+1} - 2 x_{k} + x_{k-1} = \omega^2 x_{k} - \lambda^2 x_{k}^3$$
+#
+# with initial conditions $x_0 = 0$ and $x_1 = 1$.
+#
+# In other words we have the following recurrence relationship:
+#
+# $x_{k+1} = \omega^2 x_{k} - \lambda^2 x_{k}^3 + 2 x_{k} - x_{k-1}$
+#
+#
+# -------------
+# The state vector can be written as:
+#
+# $$
+# \mathbf{u}_k = \begin{bmatrix}
+# x_{k} \\
+# x_{k-1}
+# \end{bmatrix}
+# \quad
+# \mathcal{M}_{k+1} =
+#  \begin{bmatrix}
+# 2 + \omega^2 - \lambda^2 x_k^2 & -1 \\
+# 1 & 0
+# \end{bmatrix}
+# $$
+#
+# so that $\mathbf{u}_{k+1} = \mathcal{M}_{k+1}(\mathbf{u}_k)$.
+#
+#
 
 # %%
-# Simple plotting of forward-model with a single response and parameters
+import numpy as np
 from matplotlib import pyplot as plt
+from scipy import stats
 
+import iterative_ensemble_smoother as ies
+
+rng = np.random.default_rng(12345)
 
-def plot_result(A, response_x_axis, trans_func=lambda x: x, priors=[]):
-    responses = forward_model(A, priors, response_x_axis)
-    plt.rcParams["figure.figsize"] = [15, 4]
-    _, axs = plt.subplots(1, 1 + len(A))
+plt.rcParams["figure.figsize"] = [8, 3]
 
-    axs[0].plot(response_x_axis, responses)
-    for i, param in enumerate(A):
-        A_trans = np.array([trans_func(v, *priors[i]) for v in param])
-        axs[i + 1].hist(A_trans, bins=10)
+
+# %%
+def plot_result(A, response_x_axis, title=None):
+    """Plot the anharmonic oscillator, as well as marginal
+    and joint distributions for the parameters."""
+
+    responses = forward_model(A, response_x_axis)
+
+    fig, axes = plt.subplots(1, 2 + len(A), figsize=(9, 2.25))
+    if title:
+        fig.suptitle(title)
+
+    axes[0].plot(response_x_axis, responses, color="black", alpha=0.1)
+    for ax, param, label in zip(axes[1:], A, ["omega", "lambda"]):
+        ax.hist(param, bins="fd")
+        ax.set_xlabel(label)
+
+    axes[-1].scatter(A[0, :], A[1, :], s=5)
+
+    fig.tight_layout()
     plt.show()
 
 
 # %% [markdown]
 # ## Setup
 
-# %%
-# Oscilator example
-import numpy as np
-from math import sqrt
-from scipy.special import erf
 
+# %%
+def generate_observations(K):
+    """Run the model with true parameters, then generate observations."""
+    # Evaluate using true parameter values on a fine grid with K sample points
+    x = simulate_anharmonic(omega=3.5e-2, lmbda=3e-4, K=K)
 
-def _generate_observations(K):
-    x = _evaluate(omega=3.5e-2, lmbda=3e-4, K=K)
-    rng = np.random.default_rng(12345)
+    # Generate observations every `nobs` steps
     nobs = 50
-    obs_points = np.linspace(0, K, nobs, endpoint=False, dtype=int)
+    # Generate observation points [0, 50, 100, 150, ...] when `nobs` = 50
+    observation_x_axis = np.arange(K // nobs) * nobs
 
-    obs_with_std = np.zeros(shape=(len(obs_points), 2))
+    # Generate noisy observations, with value at least 5
+    observation_errors = np.maximum(5, 0.2 * np.abs(x[observation_x_axis]))
+    observation_values = rng.normal(loc=x[observation_x_axis], scale=observation_errors)
 
-    for obs_idx, obs_point in enumerate(obs_points):
-        # Set observation error's standard deviation to some
-        # percentage of the amplitude of x with a minimum of, e.g., 1.
-        obs_std = max(1, 0.02 * abs(x[obs_point]))
-        obs_with_std[obs_idx, 0] = x[obs_point] + rng.normal(loc=0.0, scale=obs_std)
-        obs_with_std[obs_idx, 1] = obs_std
-    return obs_with_std, obs_points
+    return observation_values, observation_errors, observation_x_axis
 
 
-def _evaluate(omega, lmbda, K):
+def simulate_anharmonic(omega, lmbda, K):
+    """Evaluate the anharmonic oscillator."""
     x = np.zeros(K)
     x[0] = 0
     x[1] = 1
 
     # Looping from 2 because we have initial conditions at k=0 and k=1.
-    for k in range(2, K - 1):
-        M = np.array([[2 + omega**2 - lmbda**2 * x[k] ** 2, -1], [1, 0]])
-        u = np.array([x[k], x[k - 1]])
-        u = M @ u
-        x[k + 1] = u[0]
-        x[k] = u[1]
+    for k in range(2, K):
+        x[k] = x[k - 1] * (2 + omega**2 - lmbda**2 * x[k - 1] ** 2) - x[k - 2]
 
     return x
 
 
-def uniform(x, min_x, max_x):
-    y = 0.5 * (1 + erf(x / sqrt(2.0)))
-    return y * (max_x - min_x) + min_x
-
-
-def forward_model(A, prior, response_x_axis):
-    responses = []
-    for [omega, lmbda] in A.T:
-        r = _evaluate(
-            omega=uniform(omega, *prior[0]),
-            lmbda=uniform(lmbda, *prior[1]),
-            K=len(response_x_axis),
-        )
-        responses.append(r)
-    return np.array(responses).T
+def forward_model(A, response_x_axis):
+    """Evaluate on each column (ensemble realization)."""
+    return np.vstack(
+        [simulate_anharmonic(*params, K=len(response_x_axis)) for params in A.T]
+    ).T
 
 
 response_x_axis = range(2500)
-realizations = 100
+observation_values, observation_errors, observation_x_axis = generate_observations(
+    len(response_x_axis)
+)
+
 
-observations, observation_x_axis = _generate_observations(len(response_x_axis))
-observation_values = observations[:, 0]
-observation_errors = observations[:, 1]
+# %% [markdown]
+# ## Plot observations
 
-A = np.random.normal(0, 1, size=(2, realizations))
+# %%
+fig, ax = plt.subplots(figsize=(8, 3))
+ax.set_title("Anharmonic Oscillator")
+ax.plot(
+    np.arange(2500),
+    simulate_anharmonic(omega=3.5e-2, lmbda=3e-4, K=2500),
+    label="Truth",
+    color="black",
+)
 
-priors = [(2.5e-2, 4.5e-2), (2.0e-4, 4.0e-4)]
-plot_result(A, response_x_axis, uniform, priors)
+ax.scatter(observation_x_axis, observation_values, label="Observations")
 
+fig.tight_layout()
+plt.legend()
+plt.show()
 
 # %% [markdown]
-# ## Update step
+# ## Create and plot prior
+#
+#
+# This section shows a "trick" for working with non-normal priors.
+#
+# Let $g$ be the forward model, so that $y = g(x)$.
+# Assume for instance that $x$ must be a positive parameter.
+# If so, then we can for instance use an exponential prior on $x$.
+# But if we use samples from an exponential prior directly in an ensemble smoother,
+# there is no guarantee that the posterior samples will be positive.
+#
+# The trick is to sample $x \sim \mathcal{N}(0, 1)$,
+# then define a function $f$ that maps from standard normal to the
+# exponential distribution.
+# This function can be constructed by first mapping from standard normal to
+# the interval $[0, 1)$ using the CDF, then mapping to exponential using the
+# quantile function (inverse CDF) of the exponential distribution.
+#
+# In summary:
+#
+# - We send $x \sim \mathcal{N}(0, 1)$ into the ensemble smoother as before
+# - We use the composite function $g(f(x))$ as our forward model, instead of $g(x)$
+# - When we plot the prior and posterior, we plot $f(x)$ instead of $x$ directly
 
 # %%
-import numpy as np
-import iterative_ensemble_smoother as ies
+# Create Uniform prior distributions
+realizations = 100
+PRIORS = [stats.uniform(loc=0.025, scale=0.02), stats.uniform(loc=0.0002, scale=0.0002)]
 
-plot_result(A, response_x_axis, uniform, priors)
 
-responses_before = forward_model(A, priors, response_x_axis)
-Y = responses_before[observation_x_axis]
+# %%
+def prior_to_standard_normal(A):
+    """Map prior to U(0, 1), then use inverse of CDF to map to N(0, 1)."""
+    return np.vstack(
+        [stats.norm().ppf(prior.cdf(A_param)) for (A_param, prior) in zip(A, PRIORS)]
+    )
 
-smoother = ies.ES()
-smoother.fit(Y, observation_errors, observation_values)
-new_A = smoother.update(A)
 
-plot_result(new_A, response_x_axis, uniform, priors)
+def standard_normal_to_prior(A):
+    """Reverse mapping."""
+    return np.vstack(
+        [prior.ppf(stats.norm().cdf(A_param)) for (A_param, prior) in zip(A, PRIORS)]
+    )
 
-# %% [markdown]
-# ## Iterative smoother
+
+# verify that mappings are invertible
+A_uniform = np.vstack([prior.rvs(realizations) for prior in PRIORS])
+assert np.allclose(
+    standard_normal_to_prior(prior_to_standard_normal(A_uniform)), A_uniform
+)
+assert np.allclose(
+    prior_to_standard_normal(standard_normal_to_prior(A_uniform)), A_uniform
+)
 
 # %%
-import numpy as np
-from matplotlib import pyplot as plt
-import iterative_ensemble_smoother as ies
+# Create a standard normal prior and plot it in transformed space
+A = rng.standard_normal(size=(2, realizations))
+plot_result(standard_normal_to_prior(A), response_x_axis)
 
+# %% [markdown]
+# ## A single update step
 
-def iterative_smoother():
-    A_current = np.copy(A)
-    iterations = 4
-    smoother = ies.SIES(realizations)
+# %%
+plot_result(standard_normal_to_prior(A), response_x_axis)
 
-    for _ in range(iterations):
-        plot_result(A_current, response_x_axis, uniform, priors)
+# The forward model is composed with the transformation from N(0, 1) to uniform priors
+responses_before = forward_model(standard_normal_to_prior(A), response_x_axis)
+Y = responses_before[observation_x_axis]
 
-        responses_before = forward_model(A_current, priors, response_x_axis)
-        Y = responses_before[observation_x_axis]
+# Run smoother for one step
+smoother = ies.SIES(
+    parameters=A,
+    covariance=observation_errors**2,
+    observations=observation_values,
+    seed=42,
+)
 
-        smoother.fit(Y, observation_errors, observation_values)
-        A_current = smoother.update(A_current)
-    plot_result(A_current, response_x_axis, uniform, priors)
+new_A = smoother.sies_iteration(Y, step_length=1.0)
 
 
-iterative_smoother()
+plot_result(standard_normal_to_prior(new_A), response_x_axis)
 
 # %% [markdown]
-# ## ES-MDA (Multiple Data Assimilation - Ensemble Smoother)
+# ## Iterative smoother
 
 # %%
-import numpy as np
-from matplotlib import pyplot as plt
-import iterative_ensemble_smoother as ies
+from iterative_ensemble_smoother.utils import steplength_exponential
 
 
-def es_mda():
+def iterative_smoother(A):
     A_current = np.copy(A)
-    weights = [8, 4, 2, 1]
-    length = sum(1.0 / x for x in weights)
-
-    smoother = ies.ES()
-    for weight in weights:
-        plot_result(A_current, response_x_axis, uniform, priors)
+    iterations = 4
+    smoother = ies.SIES(
+        parameters=A,
+        covariance=observation_errors**2,
+        observations=observation_values,
+        seed=42,
+    )
+
+    plot_result(
+        standard_normal_to_prior(A_current),
+        response_x_axis,
+        title="Prior",
+    )
+
+    for iteration in range(iterations):
+        responses_before = forward_model(
+            standard_normal_to_prior(A_current), response_x_axis
+        )
 
-        responses_before = forward_model(A_current, priors, response_x_axis)
         Y = responses_before[observation_x_axis]
 
-        observation_errors_scaled = observation_errors * sqrt(weight * length)
-        smoother.fit(Y, observation_errors_scaled, observation_values)
-        A_current = smoother.update(A_current)
-    plot_result(A_current, response_x_axis, uniform, priors)
+        A_current = smoother.sies_iteration(
+            Y, step_length=steplength_exponential(iteration + 1)
+        )
+
+        plot_result(
+            standard_normal_to_prior(A_current),
+            response_x_axis,
+            title=f"SIES iteration {iteration+1}",
+        )
+
 
+iterative_smoother(A)
 
-es_mda()
+
+# %% [markdown]
+# ## ES-MDA (Ensemble Smoother - Multiple Data Assimilation)
 
 # %%
+smoother = ies.ESMDA(
+    # If a 1D array is passed, it is interpreted
+    # as the diagonal of the covariance matrix.
+    covariance=observation_errors**2,
+    observations=observation_values,
+    # The inflation factors used in ESMDA
+    # They are scaled so that sum_i alpha_i^-1 = 1
+    alpha=np.array([8, 4, 2, 1]),
+    seed=42,
+)
+
+
+A_current = np.copy(A)
+
+plot_result(standard_normal_to_prior(A_current), response_x_axis, title="Prior")
+
+for iteration in range(smoother.num_assimilations()):
+    # Evaluate the model
+    responses_before = forward_model(
+        standard_normal_to_prior(A_current), response_x_axis
+    )
+    Y = responses_before[observation_x_axis]
+
+    # Assimilate data
+    A_current = smoother.assimilate(A_current, Y)
+
+    # Plot
+    plot_result(
+        standard_normal_to_prior(A_current),
+        response_x_axis,
+        title=f"ESMDA iteration {iteration+1}",
+    )