Skip to content

Commit

Permalink
finished initial lasso implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
tommyod committed Jan 5, 2024
1 parent 01f761c commit c546037
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 154 deletions.
129 changes: 25 additions & 104 deletions docs/source/Lasso.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
rng = np.random.default_rng(42)

# Dimensionality of the problem
num_parameters = 100
num_parameters = 75
num_observations = 50
num_ensemble = 25
prior_std = 1
Expand All @@ -65,13 +65,13 @@
alpha = 1

# Effect size (coefficients in sparse mapping A)
effect_size = 1000
effect_size = 1

# %% [markdown]
# ## Create problem data - sparse tridiagonal matrix $A$

# %%
diagonal = effect_size * np.ones(min(num_parameters, num_observations))
diagonal = effect_size * np.ones(max(num_parameters, num_observations))

# Create a tridiagonal matrix (easiest with scipy)
A = sp.sparse.diags(
Expand Down Expand Up @@ -174,71 +174,11 @@ def g(X):
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler


def linear_l1_regression(U, Y):
"""
Performs LASSO regression for each response in Y against predictors in U,
constructing a sparse matrix of regression coefficients.
The function scales features in U using standard scaling before applying
LASSO, then re-scales the coefficients to the original scale of U. This
extracts the effect of each feature in U on each response in Y, ignoring
intercepts and constant terms.
Parameters
----------
U : np.ndarray
2D array of predictors with shape (n, p).
Y : np.ndarray
2D array of responses with shape (n, m).
Returns
-------
H_sparse : scipy.sparse.csc_matrix
Sparse matrix (m, p) with re-scaled LASSO regression coefficients for
each response in Y.
Raises
------
AssertionError
If the number of samples in U and Y do not match, or if the shape of
H_sparse is not (m, p).
"""
# https://github.com/equinor/graphite-maps/blob/main/graphite_maps/linear_regression.py

n, p = U.shape # p: number of features
n_y, m = Y.shape # m: number of y responses

# Assert that the first dimension of U and Y are the same
assert n == n_y, "Number of samples in U and Y must be the same"

scaler_u = StandardScaler()
U_scaled = scaler_u.fit_transform(U)

scaler_y = StandardScaler()
Y_scaled = scaler_y.fit_transform(Y)

# Loop over features
coefs = []
for j in range(m):
y_j = Y_scaled[:, j]

# Learn individual regularization and fit
alphas = np.logspace(-8, 8, num=32)
model_cv = LassoCV(alphas=alphas, fit_intercept=False, max_iter=10_000, cv=5)
model_cv.fit(U_scaled, y_j)

coef_scale = scaler_y.scale_[j] / scaler_u.scale_
coefs.append(model_cv.coef_ * coef_scale)

K = np.vstack(coefs)
assert K.shape == (m, p)

return K
from iterative_ensemble_smoother.localized import linear_l1_regression


Y = g(X)
K = linear_l1_regression(X.T, Y.T)
K = linear_l1_regression(X, Y)
plt.imshow(K)
plt.show()

Expand All @@ -262,6 +202,7 @@ def solve_esmda(X, Y, covariance, observations, seed=1):
cov_YY = empirical_cross_covariance(Y, Y)

# Compute the update using the exact ESMDA approach
assert covariance.ndim == 1
K = cov_XY @ sp.linalg.inv(cov_YY + np.diag(covariance))
return X + K @ (D - Y)

Expand Down Expand Up @@ -290,9 +231,10 @@ def solve_projected(X, Y, covariance, observations, seed=1):
cov_XY = empirical_cross_covariance(X, Y)
cov_YY = empirical_cross_covariance(Y, Y)

# Compute the update
# TODO: N-1 vs N here.
K = cov_XY @ sp.linalg.inv(cov_YY + S @ S.T / (N-1))
# covariance ~ S @ S.T / N

# Compute the update. Divide by N since we know the mean is zero
K = cov_XY @ sp.linalg.inv(cov_YY + S @ S.T / N)
return X + K @ (D - Y)


Expand All @@ -310,13 +252,21 @@ def solve_lstsq(X, Y, covariance, observations, seed=1):
N = X.shape[1]
D = smoother.perturb_observations(ensemble_size=N, alpha=1.0)

# Center X and Y
X_center = X - np.mean(X, axis=1, keepdims=True)
Y_center = Y - np.mean(Y, axis=1, keepdims=True)

# Instead of using the covariance matrix, sample from it
# covariance / (N-1) ~

N = X.shape[1]
S = sample_mvnormal(C_dd_cholesky=smoother.C_D_L, rng=smoother.rng, size=N)
S = S * np.sqrt((N - 1) / N) # Divide by N since we know that the mean is zero

Y_noisy = Y + S
Y_noisy = Y_center + S

# Compute the update by solving K @ Y = X
K, *_ = sp.linalg.lstsq(Y_noisy.T, X.T)
K, *_ = sp.linalg.lstsq(Y_noisy.T, X_center.T)
K = K.T

return X + K @ (D - Y)
Expand All @@ -338,7 +288,7 @@ def solve_lasso_direct(X, Y, covariance, observations, seed=1):
D = smoother.perturb_observations(ensemble_size=N, alpha=1.0)

# Approximate forward model with H
H = linear_l1_regression(X.T, Y.T)
H = linear_l1_regression(X, Y)
cov_XX = empirical_cross_covariance(X, X)

# Compute the update
Expand All @@ -349,25 +299,16 @@ def solve_lasso_direct(X, Y, covariance, observations, seed=1):
def solve_lasso(X, Y, covariance, observations, seed=1):
"""Solve K @ Y = X using Lasso."""

from iterative_ensemble_smoother.localized import LassoES

# Create a smoother to get D
smoother = ESMDA(
smoother = LassoES(
covariance=covariance,
observations=observations,
alpha=1,
seed=seed,
)
N = X.shape[1]
D = smoother.perturb_observations(ensemble_size=N, alpha=1.0)

# Instead of using the covariance matrix, sample from it
S = sample_mvnormal(C_dd_cholesky=smoother.C_D_L, rng=smoother.rng, size=N)

Y_noisy = Y + S # Seems to work better with Y instead of Y_noisy

# Compute the update
K = linear_l1_regression(Y_noisy.T, X.T)

return X + K @ (D - Y)
return smoother.assimilate(X, Y)


def solve_adaptive_ESMDA(X, Y, covariance, observations, seed=1):
Expand Down Expand Up @@ -464,23 +405,3 @@ def solve_adaptive_ESMDA(X, Y, covariance, observations, seed=1):
# %%

# %%

# %%

# %%

# %%

# %%

# %%

# %%

# %%

# %%

# %%

# %%
2 changes: 1 addition & 1 deletion lasso.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,4 @@ $$\text{cov}(y, y ) = H \text{cov}(x, x) H^T.$$
- [Issue 6599 on the Lasso in ERT](https://github.com/equinor/ert/issues/6599)
- [Presentation: Ensemblized linear least squares (LLS)](https://ncda-fs.web.norce.cloud/WS2023/raanes.pdf)
- [Paper: Ensemble Smoother with Multiple Data Assimilation](http://dx.doi.org/10.1016/j.cageo.2012.03.011)
- [Paper: Ensemble transport smoothing. Part I: Unified framework](https://arxiv.org/pdf/2210.17000.pdf)
- [Paper: Ensemble transport smoothing. Part I: Unified framework](https://arxiv.org/pdf/2210.17000.pdf)
69 changes: 20 additions & 49 deletions src/iterative_ensemble_smoother/localized.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,13 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 5 09:27:34 2024
@author: TODL
"""

from iterative_ensemble_smoother.esmda import BaseESMDA
import numbers
from abc import ABC
from typing import Union

import numpy as np
import numpy.typing as npt
import scipy as sp # type: ignore

from iterative_ensemble_smoother.esmda_inversion import (
inversion_exact_cholesky,
inversion_subspace,
normalize_alpha,
)
from iterative_ensemble_smoother.utils import sample_mvnormal


import numpy as np
from sklearn.linear_model import LassoCV
from sklearn.linear_model import MultiTaskLassoCV

Expand Down Expand Up @@ -60,6 +45,8 @@ def linear_l1_regression(X, Y):
"""
# https://github.com/equinor/graphite-maps/blob/main/graphite_maps/linear_regression.py
# An option is to use MultiTaskLasso too, but not exactly the same objective
# https://scikit-learn.org/stable/modules/linear_model.html#multi-task-lasso

# The goal is to solve K @ X = Y for K, or equivalently X.T @ K.T = Y.T
num_parameters, ensemble_size1 = X.shape
Expand All @@ -76,17 +63,16 @@ def linear_l1_regression(X, Y):
# Loop over features
coefs = []
cv = min(5, ensemble_size1)
alphas = np.logspace(-8, 8, num=17)
for j in range(num_observations):
y_j = Y[j, :]
alphas = np.logspace(-6, 6, num=17)

# The matrix K in K @ X = Y is built up row by row
for j, y_j in enumerate(Y): # Loop over rows in Y

# Learn individual regularization and fit
model_cv = LassoCV(alphas=alphas, fit_intercept=False, max_iter=10_000, cv=cv)
model_cv.fit(X.T, y_j) # model_cv.alpha_

# Alphas for the next iteration
# alpha = model_cv.alpha_ * 0.5 * alphas[len(alphas)//2] * 0.5
# alphas = np.logspace(-2, 2, num=5) * model_cv.alpha_
# TODO: Consider searching smarter for alphas, e.g. momentum or
# searching within 1-2 orders of magnitude of previous alpha?

# Scale back the coefficients
coef_scale = stds_Y[j] / stds_X
Expand All @@ -96,23 +82,6 @@ def linear_l1_regression(X, Y):
assert K.shape == (num_observations, num_parameters)
return K

# =======================================================

# Alternative computation using MultiTaskLasso

# Create a lasso model
lasso = MultiTaskLassoCV(fit_intercept=False, cv=cv)
lasso.fit(X.T, Y.T)

# Get the matrix K
K = lasso.coef_
assert K.shape == (num_observations, num_parameters)

K = K / stds_X[np.newaxis, :]
K = K * stds_Y[:, np.newaxis]

return K


class LassoES(BaseESMDA):
"""
Expand Down Expand Up @@ -156,6 +125,7 @@ def assimilate(
*,
alpha: float = 1.0,
) -> npt.NDArray[np.double]:

# Verify shapes
_, num_ensemble = X.shape
num_outputs, num_emsemble2 = Y.shape
Expand All @@ -171,22 +141,23 @@ def assimilate(
X_center = X - np.mean(X, axis=1, keepdims=True)
Y_center = Y - np.mean(Y, axis=1, keepdims=True)

# We have R @ R.T = C_D
# We have R @ R.T / N ~ C_D
R = np.sqrt(alpha) * sample_mvnormal(
C_dd_cholesky=self.C_D_L, rng=self.rng, size=num_ensemble
)

# L = sqrt((N - 1) / N)
L = np.sqrt((num_ensemble - 1)/ num_ensemble) * R
# S = R * sqrt((N - 1) / N), so that S @ S.T / (N-1) = covariance
S = np.sqrt((num_ensemble - 1) / num_ensemble) * R

# Compute regularized Kalman gain matrix matrix by solving
# (Y + L) K = X
# The definition of K is
# Compute regularized Kalman gain matrix matrix by solving:
# (Y + S) K = X
# The definition of K is:
# (cov(Y, Y) + covariance) K = cov(X, Y), which is approximately
# (Y @ Y.T + L @ L.T) K = X @ Y.T, [X and Y are centered] which is approximately
# (Y + L) @ (Y + L).T @ K = X @ Y.T, which is approximately
# (Y + L) @ K = X
K = linear_l1_regression(X=Y_center + L, Y=X_center)
# (Y @ Y.T + S @ L.T) K = X @ Y.T, [X and Y are centered] which is approximately
# (Y + S) @ (Y + S).T @ K = X @ Y.T, which is approximately
# (Y + S) @ K = X
# TODO: Write this out properly.
K = linear_l1_regression(X=(Y_center + S), Y=X_center)

D = self.perturb_observations(ensemble_size=num_ensemble, alpha=alpha)
return X + K @ (D - Y)
Expand Down

0 comments on commit c546037

Please sign in to comment.