finished initial lasso implementation

equinor · Jan 5, 2024 · c546037 · c546037
1 parent 01f761c
commit c546037
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 154 deletions.
diff --git a/docs/source/Lasso.py b/docs/source/Lasso.py
@@ -56,7 +56,7 @@
 rng = np.random.default_rng(42)
 
 # Dimensionality of the problem
-num_parameters = 100
+num_parameters = 75
 num_observations = 50
 num_ensemble = 25
 prior_std = 1
@@ -65,13 +65,13 @@
 alpha = 1
 
 # Effect size (coefficients in sparse mapping A)
-effect_size = 1000
+effect_size = 1
 
 # %% [markdown]
 # ## Create problem data - sparse tridiagonal matrix $A$
 
 # %%
-diagonal = effect_size * np.ones(min(num_parameters, num_observations))
+diagonal = effect_size * np.ones(max(num_parameters, num_observations))
 
 # Create a tridiagonal matrix (easiest with scipy)
 A = sp.sparse.diags(
@@ -174,71 +174,11 @@ def g(X):
 from sklearn.linear_model import LassoCV
 from sklearn.preprocessing import StandardScaler
 
-
-def linear_l1_regression(U, Y):
-    """
-    Performs LASSO regression for each response in Y against predictors in U,
-    constructing a sparse matrix of regression coefficients.
-
-    The function scales features in U using standard scaling before applying
-    LASSO, then re-scales the coefficients to the original scale of U. This
-    extracts the effect of each feature in U on each response in Y, ignoring
-    intercepts and constant terms.
-
-    Parameters
-    ----------
-    U : np.ndarray
-        2D array of predictors with shape (n, p).
-    Y : np.ndarray
-        2D array of responses with shape (n, m).
-
-    Returns
-    -------
-    H_sparse : scipy.sparse.csc_matrix
-        Sparse matrix (m, p) with re-scaled LASSO regression coefficients for
-        each response in Y.
-
-    Raises
-    ------
-    AssertionError
-        If the number of samples in U and Y do not match, or if the shape of
-        H_sparse is not (m, p).
-    """
-    # https://github.com/equinor/graphite-maps/blob/main/graphite_maps/linear_regression.py
-
-    n, p = U.shape  # p: number of features
-    n_y, m = Y.shape  # m: number of y responses
-
-    # Assert that the first dimension of U and Y are the same
-    assert n == n_y, "Number of samples in U and Y must be the same"
-
-    scaler_u = StandardScaler()
-    U_scaled = scaler_u.fit_transform(U)
-
-    scaler_y = StandardScaler()
-    Y_scaled = scaler_y.fit_transform(Y)
-
-    # Loop over features
-    coefs = []
-    for j in range(m):
-        y_j = Y_scaled[:, j]
-
-        # Learn individual regularization and fit
-        alphas = np.logspace(-8, 8, num=32)
-        model_cv = LassoCV(alphas=alphas, fit_intercept=False, max_iter=10_000, cv=5)
-        model_cv.fit(U_scaled, y_j)
-
-        coef_scale = scaler_y.scale_[j] / scaler_u.scale_
-        coefs.append(model_cv.coef_ * coef_scale)
-
-    K = np.vstack(coefs)
-    assert K.shape == (m, p)
-
-    return K
+from iterative_ensemble_smoother.localized import linear_l1_regression
 
 
 Y = g(X)
-K = linear_l1_regression(X.T, Y.T)
+K = linear_l1_regression(X, Y)
 plt.imshow(K)
 plt.show()
 
@@ -262,6 +202,7 @@ def solve_esmda(X, Y, covariance, observations, seed=1):
     cov_YY = empirical_cross_covariance(Y, Y)
 
     # Compute the update using the exact ESMDA approach
+    assert covariance.ndim == 1
     K = cov_XY @ sp.linalg.inv(cov_YY + np.diag(covariance))
     return X + K @ (D - Y)
 
@@ -290,9 +231,10 @@ def solve_projected(X, Y, covariance, observations, seed=1):
     cov_XY = empirical_cross_covariance(X, Y)
     cov_YY = empirical_cross_covariance(Y, Y)
 
-    # Compute the update
-    # TODO: N-1 vs N here.
-    K = cov_XY @ sp.linalg.inv(cov_YY + S @ S.T / (N-1))
+    # covariance ~ S @ S.T / N
+
+    # Compute the update. Divide by N since we know the mean is zero
+    K = cov_XY @ sp.linalg.inv(cov_YY + S @ S.T / N)
     return X + K @ (D - Y)
 
 
@@ -310,13 +252,21 @@ def solve_lstsq(X, Y, covariance, observations, seed=1):
     N = X.shape[1]
     D = smoother.perturb_observations(ensemble_size=N, alpha=1.0)
 
+    # Center X and Y
+    X_center = X - np.mean(X, axis=1, keepdims=True)
+    Y_center = Y - np.mean(Y, axis=1, keepdims=True)
+
     # Instead of using the covariance matrix, sample from it
+    # covariance / (N-1) ~
+
+    N = X.shape[1]
     S = sample_mvnormal(C_dd_cholesky=smoother.C_D_L, rng=smoother.rng, size=N)
+    S = S * np.sqrt((N - 1) / N)  # Divide by N since we know that the mean is zero
 
-    Y_noisy = Y + S
+    Y_noisy = Y_center + S
 
     # Compute the update by solving K @ Y = X
-    K, *_ = sp.linalg.lstsq(Y_noisy.T, X.T)
+    K, *_ = sp.linalg.lstsq(Y_noisy.T, X_center.T)
     K = K.T
 
     return X + K @ (D - Y)
@@ -338,7 +288,7 @@ def solve_lasso_direct(X, Y, covariance, observations, seed=1):
     D = smoother.perturb_observations(ensemble_size=N, alpha=1.0)
 
     # Approximate forward model with H
-    H = linear_l1_regression(X.T, Y.T)
+    H = linear_l1_regression(X, Y)
     cov_XX = empirical_cross_covariance(X, X)
 
     # Compute the update
@@ -349,25 +299,16 @@ def solve_lasso_direct(X, Y, covariance, observations, seed=1):
 def solve_lasso(X, Y, covariance, observations, seed=1):
     """Solve K @ Y = X using Lasso."""
 
+    from iterative_ensemble_smoother.localized import LassoES
+
     # Create a smoother to get D
-    smoother = ESMDA(
+    smoother = LassoES(
         covariance=covariance,
         observations=observations,
-        alpha=1,
         seed=seed,
     )
-    N = X.shape[1]
-    D = smoother.perturb_observations(ensemble_size=N, alpha=1.0)
 
-    # Instead of using the covariance matrix, sample from it
-    S = sample_mvnormal(C_dd_cholesky=smoother.C_D_L, rng=smoother.rng, size=N)
-
-    Y_noisy = Y + S  # Seems to work better with Y instead of Y_noisy
-
-    # Compute the update
-    K = linear_l1_regression(Y_noisy.T, X.T)
-
-    return X + K @ (D - Y)
+    return smoother.assimilate(X, Y)
 
 
 def solve_adaptive_ESMDA(X, Y, covariance, observations, seed=1):
@@ -464,23 +405,3 @@ def solve_adaptive_ESMDA(X, Y, covariance, observations, seed=1):
 # %%
 
 # %%
-
-# %%
-
-# %%
-
-# %%
-
-# %%
-
-# %%
-
-# %%
-
-# %%
-
-# %%
-
-# %%
-
-# %%
diff --git a/lasso.md b/lasso.md
@@ -132,4 +132,4 @@ $$\text{cov}(y, y ) = H \text{cov}(x, x) H^T.$$
 - [Issue 6599 on the Lasso in ERT](https://github.com/equinor/ert/issues/6599)
 - [Presentation: Ensemblized linear least squares (LLS)](https://ncda-fs.web.norce.cloud/WS2023/raanes.pdf)
 - [Paper: Ensemble Smoother with Multiple Data Assimilation](http://dx.doi.org/10.1016/j.cageo.2012.03.011)
-- [Paper: Ensemble transport smoothing. Part I: Unified framework](https://arxiv.org/pdf/2210.17000.pdf)
+- [Paper: Ensemble transport smoothing. Part I: Unified framework](https://arxiv.org/pdf/2210.17000.pdf)
diff --git a/src/iterative_ensemble_smoother/localized.py b/src/iterative_ensemble_smoother/localized.py
@@ -1,28 +1,13 @@
 # -*- coding: utf-8 -*-
-"""
-Created on Fri Jan  5 09:27:34 2024
-
-@author: TODL
-"""
 
 from iterative_ensemble_smoother.esmda import BaseESMDA
-import numbers
-from abc import ABC
 from typing import Union
 
 import numpy as np
 import numpy.typing as npt
-import scipy as sp  # type: ignore
 
-from iterative_ensemble_smoother.esmda_inversion import (
-    inversion_exact_cholesky,
-    inversion_subspace,
-    normalize_alpha,
-)
 from iterative_ensemble_smoother.utils import sample_mvnormal
 
-
-import numpy as np
 from sklearn.linear_model import LassoCV
 from sklearn.linear_model import MultiTaskLassoCV
 
@@ -60,6 +45,8 @@ def linear_l1_regression(X, Y):
 
     """
     # https://github.com/equinor/graphite-maps/blob/main/graphite_maps/linear_regression.py
+    # An option is to use MultiTaskLasso too, but not exactly the same objective
+    # https://scikit-learn.org/stable/modules/linear_model.html#multi-task-lasso
 
     # The goal is to solve K @ X = Y for K, or equivalently X.T @ K.T = Y.T
     num_parameters, ensemble_size1 = X.shape
@@ -76,17 +63,16 @@ def linear_l1_regression(X, Y):
     # Loop over features
     coefs = []
     cv = min(5, ensemble_size1)
-    alphas = np.logspace(-8, 8, num=17)
-    for j in range(num_observations):
-        y_j = Y[j, :]
+    alphas = np.logspace(-6, 6, num=17)
+
+    # The matrix K in K @ X = Y is built up row by row
+    for j, y_j in enumerate(Y): # Loop over rows in Y
 
         # Learn individual regularization and fit
         model_cv = LassoCV(alphas=alphas, fit_intercept=False, max_iter=10_000, cv=cv)
         model_cv.fit(X.T, y_j)  # model_cv.alpha_
-
-        # Alphas for the next iteration
-        # alpha = model_cv.alpha_ * 0.5 * alphas[len(alphas)//2] * 0.5
-        # alphas = np.logspace(-2, 2, num=5) * model_cv.alpha_
+        # TODO: Consider searching smarter for alphas, e.g. momentum or
+        # searching within 1-2 orders of magnitude of previous alpha?
 
         # Scale back the coefficients
         coef_scale = stds_Y[j] / stds_X
@@ -96,23 +82,6 @@ def linear_l1_regression(X, Y):
     assert K.shape == (num_observations, num_parameters)
     return K
 
-    # =======================================================
-
-    # Alternative computation using MultiTaskLasso
-
-    # Create a lasso model
-    lasso = MultiTaskLassoCV(fit_intercept=False, cv=cv)
-    lasso.fit(X.T, Y.T)
-
-    # Get the matrix K
-    K = lasso.coef_
-    assert K.shape == (num_observations, num_parameters)
-
-    K = K / stds_X[np.newaxis, :]
-    K = K * stds_Y[:, np.newaxis]
-
-    return K
-
 
 class LassoES(BaseESMDA):
     """
@@ -156,6 +125,7 @@ def assimilate(
         *,
         alpha: float = 1.0,
     ) -> npt.NDArray[np.double]:
+
         # Verify shapes
         _, num_ensemble = X.shape
         num_outputs, num_emsemble2 = Y.shape
@@ -171,22 +141,23 @@ def assimilate(
         X_center = X - np.mean(X, axis=1, keepdims=True)
         Y_center = Y - np.mean(Y, axis=1, keepdims=True)
 
-        # We have R @ R.T = C_D
+        # We have R @ R.T / N ~ C_D
         R = np.sqrt(alpha) * sample_mvnormal(
             C_dd_cholesky=self.C_D_L, rng=self.rng, size=num_ensemble
         )
 
-        # L = sqrt((N - 1) / N)
-        L = np.sqrt((num_ensemble - 1)/ num_ensemble) * R
+        # S = R * sqrt((N - 1) / N), so that S @ S.T / (N-1) = covariance
+        S = np.sqrt((num_ensemble - 1) / num_ensemble) * R
 
-        # Compute regularized Kalman gain matrix matrix by solving
-        # (Y + L) K = X
-        # The definition of K is
+        # Compute regularized Kalman gain matrix matrix by solving:
+        # (Y + S) K = X
+        # The definition of K is:
         # (cov(Y, Y) + covariance) K = cov(X, Y), which is approximately
-        # (Y @ Y.T + L @ L.T) K = X @ Y.T, [X and Y are centered] which is approximately
-        # (Y + L) @ (Y + L).T @ K = X @ Y.T, which is approximately
-        # (Y + L) @ K = X
-        K = linear_l1_regression(X=Y_center + L, Y=X_center)
+        # (Y @ Y.T + S @ L.T) K = X @ Y.T, [X and Y are centered] which is approximately
+        # (Y + S) @ (Y + S).T @ K = X @ Y.T, which is approximately
+        # (Y + S) @ K = X
+        # TODO: Write this out properly.
+        K = linear_l1_regression(X=(Y_center + S), Y=X_center)
 
         D = self.perturb_observations(ensemble_size=num_ensemble, alpha=alpha)
         return X + K @ (D - Y)