Skip to content

Commit a31e50a

Browse files
committed
add marginallinearregressor
1 parent d92ec25 commit a31e50a

File tree

1 file changed

+110
-34
lines changed

1 file changed

+110
-34
lines changed

imodels/algebraic/marginal_shrinkage_linear_model.py

Lines changed: 110 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
import numpy as np
33
import pandas as pd
44
from sklearn.base import BaseEstimator
5-
from sklearn.linear_model import LinearRegression, RidgeCV, Ridge, ElasticNetCV
5+
from sklearn.linear_model import LinearRegression, RidgeCV, Ridge, ElasticNet, ElasticNetCV
66
from sklearn.tree import DecisionTreeRegressor
77
from sklearn.utils.multiclass import check_classification_targets
8-
from sklearn.utils.validation import check_X_y
9-
from sklearn.utils.validation import _check_sample_weight
8+
from sklearn.utils.validation import check_X_y, check_array, _check_sample_weight
109
from sklearn.model_selection import train_test_split
1110
from sklearn.metrics import accuracy_score, roc_auc_score
1211
from tqdm import tqdm
@@ -91,7 +90,8 @@ def fit(self, X, y, sample_weight=None):
9190
self.coef_marginal_ = self._fit_marginal(X, y, sample_weight)
9291

9392
# fit main
94-
self.est_main_ = self._fit_main(X, y, sample_weight, self.coef_marginal_)
93+
self.est_main_ = self._fit_main(
94+
X, y, sample_weight, self.coef_marginal_)
9595

9696
return self
9797

@@ -110,7 +110,8 @@ def _fit_marginal(self, X, y, sample_weight):
110110
else:
111111
coef_marginal_ = []
112112
for i in range(X.shape[1]):
113-
est_marginal.fit(X[:, i].reshape(-1, 1), y, sample_weight=sample_weight)
113+
est_marginal.fit(X[:, i].reshape(-1, 1), y,
114+
sample_weight=sample_weight)
114115
coef_marginal_.append(deepcopy(est_marginal.coef_))
115116
coef_marginal_ = np.vstack(coef_marginal_).squeeze()
116117

@@ -200,12 +201,82 @@ class MarginalShrinkageLinearModelRegressor(
200201
# ...
201202

202203

204+
class MarginalLinearModel(BaseEstimator):
205+
"""Linear model that only fits marginal effects of each feature.
206+
"""
207+
208+
def __init__(self, alpha=1.0, l1_ratio=0.5, max_iter=1000, random_state=None):
209+
'''Arguments are passed to sklearn.linear_model.ElasticNet
210+
'''
211+
self.alpha = alpha
212+
self.l1_ratio = l1_ratio
213+
self.max_iter = max_iter
214+
self.random_state = random_state
215+
216+
def fit(self, X, y, sample_weight=None):
217+
# checks
218+
X, y = check_X_y(X, y, accept_sparse=False, multi_output=False)
219+
sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
220+
if isinstance(self, ClassifierMixin):
221+
check_classification_targets(y)
222+
self.classes_, y = np.unique(y, return_inverse=True)
223+
224+
# fit marginal estimator to each feature
225+
coef_marginal_ = []
226+
for i in range(X.shape[1]):
227+
est_marginal = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio,
228+
max_iter=self.max_iter, random_state=self.random_state)
229+
est_marginal.fit(X[:, i].reshape(-1, 1), y,
230+
sample_weight=sample_weight)
231+
coef_marginal_.append(deepcopy(est_marginal.coef_))
232+
coef_marginal_ = np.vstack(coef_marginal_).squeeze()
233+
234+
self.coef_ = coef_marginal_ / X.shape[1]
235+
self.alpha_ = self.alpha
236+
237+
return self
238+
239+
def predict_proba(self, X):
240+
X = check_array(X, accept_sparse=False, dtype=None)
241+
return X @ self.coef_
242+
243+
def predict(self, X):
244+
probs = self.predict_proba(X)
245+
if isinstance(self, ClassifierMixin):
246+
return np.argmax(probs, axis=1)
247+
else:
248+
return probs
249+
250+
251+
class MarginalLinearRegressor(MarginalLinearModel, RegressorMixin):
252+
...
253+
254+
255+
class MarginalLinearClassifier(MarginalLinearModel, ClassifierMixin):
256+
...
257+
258+
259+
# if __name__ == '__main__':
260+
# X, y = imodels.get_clean_dataset('heart')
261+
# X_train, X_test, y_train, y_test = train_test_split(
262+
# X, y, random_state=42, test_size=0.2)
263+
# m = MarginalLinearModelRegressor()
264+
265+
# m.fit(X_train, y_train)
266+
# print(m.coef_)
267+
# print(m.predict(X_test))
268+
# print(m.score(X_test, y_test))
269+
203270
if __name__ == "__main__":
204271
# X, y, feature_names = imodels.get_clean_dataset("heart")
205272
X, y, feature_names = imodels.get_clean_dataset(
206273
**imodels.util.data_util.DSET_KWARGS["california_housing"]
207274
)
208275

276+
# scale the data
277+
X = StandardScaler().fit_transform(X)
278+
y = StandardScaler().fit_transform(y.reshape(-1, 1)).squeeze()
279+
209280
print("shapes", X.shape, y.shape, "nunique", np.unique(y).size)
210281
X_train, X_test, y_train, y_test = train_test_split(
211282
X, y, random_state=42, test_size=0.2
@@ -220,28 +291,30 @@ class MarginalShrinkageLinearModelRegressor(
220291
)
221292
results = defaultdict(list)
222293
for m in [
223-
MarginalShrinkageLinearModelRegressor(**kwargs),
224-
MarginalShrinkageLinearModelRegressor(est_marginal_name=None, **kwargs),
225-
MarginalShrinkageLinearModelRegressor(
226-
est_main_name=None,
227-
**kwargs,
228-
),
229-
MarginalShrinkageLinearModelRegressor(
230-
est_marginal_name="ridge",
231-
est_main_name="ridge",
232-
marginal_sign_constraint=True,
233-
**kwargs,
234-
),
235-
MarginalShrinkageLinearModelRegressor(
236-
est_marginal_name=None, est_main_name="lasso", **kwargs
237-
),
238-
MarginalShrinkageLinearModelRegressor(
239-
est_marginal_name="ridge",
240-
est_main_name="lasso",
241-
marginal_sign_constraint=True,
242-
**kwargs,
243-
),
244-
# RidgeCV(alphas=alphas, fit_intercept=False),
294+
# MarginalShrinkageLinearModelRegressor(**kwargs),
295+
# MarginalShrinkageLinearModelRegressor(
296+
# est_marginal_name=None, **kwargs),
297+
# MarginalShrinkageLinearModelRegressor(
298+
# est_main_name=None,
299+
# **kwargs,
300+
# ),
301+
# MarginalShrinkageLinearModelRegressor(
302+
# est_marginal_name="ridge",
303+
# est_main_name="ridge",
304+
# marginal_sign_constraint=True,
305+
# **kwargs,
306+
# ),
307+
# MarginalShrinkageLinearModelRegressor(
308+
# est_marginal_name=None, est_main_name="lasso", **kwargs
309+
# ),
310+
# MarginalShrinkageLinearModelRegressor(
311+
# est_marginal_name="ridge",
312+
# est_main_name="lasso",
313+
# marginal_sign_constraint=True,
314+
# **kwargs,
315+
# ),
316+
MarginalLinearRegressor(alpha=1.0),
317+
RidgeCV(alphas=alphas, fit_intercept=False),
245318
]:
246319
results["model_name"].append(str(m))
247320
m.fit(X_train, y_train)
@@ -254,11 +327,14 @@ class MarginalShrinkageLinearModelRegressor(
254327
results["test_roc"].append(
255328
roc_auc_score(y_test, m.predict_proba(X_test)[:, 1])
256329
)
257-
results["acc_train"].append(accuracy_score(y_train, m.predict(X_train)))
258-
results["acc_test"].append(accuracy_score(y_test, m.predict(X_test)))
330+
results["acc_train"].append(
331+
accuracy_score(y_train, m.predict(X_train)))
332+
results["acc_test"].append(
333+
accuracy_score(y_test, m.predict(X_test)))
259334
else:
260335
y_pred = m.predict(X_test)
261-
results["train_mse"].append(np.mean((y_train - m.predict(X_train)) ** 2))
336+
results["train_mse"].append(
337+
np.mean((y_train - m.predict(X_train)) ** 2))
262338
results["test_mse"].append(np.mean((y_test - y_pred) ** 2))
263339
results["train_r2"].append(m.score(X_train, y_train))
264340
results["test_r2"].append(m.score(X_test, y_test))
@@ -271,10 +347,10 @@ class MarginalShrinkageLinearModelRegressor(
271347
coefs.append(deepcopy(lin.coef_))
272348
print("alpha best", lin.alpha_)
273349

274-
diffs = pd.DataFrame({str(i): coefs[i] for i in range(len(coefs))})
275-
diffs["diff 0 - 1"] = diffs["0"] - diffs["1"]
276-
diffs["diff 1 - 2"] = diffs["1"] - diffs["2"]
277-
print(diffs)
350+
# diffs = pd.DataFrame({str(i): coefs[i] for i in range(len(coefs))})
351+
# diffs["diff 0 - 1"] = diffs["0"] - diffs["1"]
352+
# diffs["diff 1 - 2"] = diffs["1"] - diffs["2"]
353+
# print(diffs)
278354

279355
# don't round strings
280356
with pd.option_context(

0 commit comments

Comments
 (0)