2
2
import numpy as np
3
3
import pandas as pd
4
4
from sklearn .base import BaseEstimator
5
- from sklearn .linear_model import LinearRegression , RidgeCV , Ridge , ElasticNetCV
5
+ from sklearn .linear_model import LinearRegression , RidgeCV , Ridge , ElasticNet , ElasticNetCV
6
6
from sklearn .tree import DecisionTreeRegressor
7
7
from sklearn .utils .multiclass import check_classification_targets
8
- from sklearn .utils .validation import check_X_y
9
- from sklearn .utils .validation import _check_sample_weight
8
+ from sklearn .utils .validation import check_X_y , check_array , _check_sample_weight
10
9
from sklearn .model_selection import train_test_split
11
10
from sklearn .metrics import accuracy_score , roc_auc_score
12
11
from tqdm import tqdm
@@ -91,7 +90,8 @@ def fit(self, X, y, sample_weight=None):
91
90
self .coef_marginal_ = self ._fit_marginal (X , y , sample_weight )
92
91
93
92
# fit main
94
- self .est_main_ = self ._fit_main (X , y , sample_weight , self .coef_marginal_ )
93
+ self .est_main_ = self ._fit_main (
94
+ X , y , sample_weight , self .coef_marginal_ )
95
95
96
96
return self
97
97
@@ -110,7 +110,8 @@ def _fit_marginal(self, X, y, sample_weight):
110
110
else :
111
111
coef_marginal_ = []
112
112
for i in range (X .shape [1 ]):
113
- est_marginal .fit (X [:, i ].reshape (- 1 , 1 ), y , sample_weight = sample_weight )
113
+ est_marginal .fit (X [:, i ].reshape (- 1 , 1 ), y ,
114
+ sample_weight = sample_weight )
114
115
coef_marginal_ .append (deepcopy (est_marginal .coef_ ))
115
116
coef_marginal_ = np .vstack (coef_marginal_ ).squeeze ()
116
117
@@ -200,12 +201,82 @@ class MarginalShrinkageLinearModelRegressor(
200
201
# ...
201
202
202
203
204
+ class MarginalLinearModel (BaseEstimator ):
205
+ """Linear model that only fits marginal effects of each feature.
206
+ """
207
+
208
+ def __init__ (self , alpha = 1.0 , l1_ratio = 0.5 , max_iter = 1000 , random_state = None ):
209
+ '''Arguments are passed to sklearn.linear_model.ElasticNet
210
+ '''
211
+ self .alpha = alpha
212
+ self .l1_ratio = l1_ratio
213
+ self .max_iter = max_iter
214
+ self .random_state = random_state
215
+
216
+ def fit (self , X , y , sample_weight = None ):
217
+ # checks
218
+ X , y = check_X_y (X , y , accept_sparse = False , multi_output = False )
219
+ sample_weight = _check_sample_weight (sample_weight , X , dtype = None )
220
+ if isinstance (self , ClassifierMixin ):
221
+ check_classification_targets (y )
222
+ self .classes_ , y = np .unique (y , return_inverse = True )
223
+
224
+ # fit marginal estimator to each feature
225
+ coef_marginal_ = []
226
+ for i in range (X .shape [1 ]):
227
+ est_marginal = ElasticNet (alpha = self .alpha , l1_ratio = self .l1_ratio ,
228
+ max_iter = self .max_iter , random_state = self .random_state )
229
+ est_marginal .fit (X [:, i ].reshape (- 1 , 1 ), y ,
230
+ sample_weight = sample_weight )
231
+ coef_marginal_ .append (deepcopy (est_marginal .coef_ ))
232
+ coef_marginal_ = np .vstack (coef_marginal_ ).squeeze ()
233
+
234
+ self .coef_ = coef_marginal_ / X .shape [1 ]
235
+ self .alpha_ = self .alpha
236
+
237
+ return self
238
+
239
+ def predict_proba (self , X ):
240
+ X = check_array (X , accept_sparse = False , dtype = None )
241
+ return X @ self .coef_
242
+
243
+ def predict (self , X ):
244
+ probs = self .predict_proba (X )
245
+ if isinstance (self , ClassifierMixin ):
246
+ return np .argmax (probs , axis = 1 )
247
+ else :
248
+ return probs
249
+
250
+
251
+ class MarginalLinearRegressor (MarginalLinearModel , RegressorMixin ):
252
+ ...
253
+
254
+
255
+ class MarginalLinearClassifier (MarginalLinearModel , ClassifierMixin ):
256
+ ...
257
+
258
+
259
+ # if __name__ == '__main__':
260
+ # X, y = imodels.get_clean_dataset('heart')
261
+ # X_train, X_test, y_train, y_test = train_test_split(
262
+ # X, y, random_state=42, test_size=0.2)
263
+ # m = MarginalLinearModelRegressor()
264
+
265
+ # m.fit(X_train, y_train)
266
+ # print(m.coef_)
267
+ # print(m.predict(X_test))
268
+ # print(m.score(X_test, y_test))
269
+
203
270
if __name__ == "__main__" :
204
271
# X, y, feature_names = imodels.get_clean_dataset("heart")
205
272
X , y , feature_names = imodels .get_clean_dataset (
206
273
** imodels .util .data_util .DSET_KWARGS ["california_housing" ]
207
274
)
208
275
276
+ # scale the data
277
+ X = StandardScaler ().fit_transform (X )
278
+ y = StandardScaler ().fit_transform (y .reshape (- 1 , 1 )).squeeze ()
279
+
209
280
print ("shapes" , X .shape , y .shape , "nunique" , np .unique (y ).size )
210
281
X_train , X_test , y_train , y_test = train_test_split (
211
282
X , y , random_state = 42 , test_size = 0.2
@@ -220,28 +291,30 @@ class MarginalShrinkageLinearModelRegressor(
220
291
)
221
292
results = defaultdict (list )
222
293
for m in [
223
- MarginalShrinkageLinearModelRegressor (** kwargs ),
224
- MarginalShrinkageLinearModelRegressor (est_marginal_name = None , ** kwargs ),
225
- MarginalShrinkageLinearModelRegressor (
226
- est_main_name = None ,
227
- ** kwargs ,
228
- ),
229
- MarginalShrinkageLinearModelRegressor (
230
- est_marginal_name = "ridge" ,
231
- est_main_name = "ridge" ,
232
- marginal_sign_constraint = True ,
233
- ** kwargs ,
234
- ),
235
- MarginalShrinkageLinearModelRegressor (
236
- est_marginal_name = None , est_main_name = "lasso" , ** kwargs
237
- ),
238
- MarginalShrinkageLinearModelRegressor (
239
- est_marginal_name = "ridge" ,
240
- est_main_name = "lasso" ,
241
- marginal_sign_constraint = True ,
242
- ** kwargs ,
243
- ),
244
- # RidgeCV(alphas=alphas, fit_intercept=False),
294
+ # MarginalShrinkageLinearModelRegressor(**kwargs),
295
+ # MarginalShrinkageLinearModelRegressor(
296
+ # est_marginal_name=None, **kwargs),
297
+ # MarginalShrinkageLinearModelRegressor(
298
+ # est_main_name=None,
299
+ # **kwargs,
300
+ # ),
301
+ # MarginalShrinkageLinearModelRegressor(
302
+ # est_marginal_name="ridge",
303
+ # est_main_name="ridge",
304
+ # marginal_sign_constraint=True,
305
+ # **kwargs,
306
+ # ),
307
+ # MarginalShrinkageLinearModelRegressor(
308
+ # est_marginal_name=None, est_main_name="lasso", **kwargs
309
+ # ),
310
+ # MarginalShrinkageLinearModelRegressor(
311
+ # est_marginal_name="ridge",
312
+ # est_main_name="lasso",
313
+ # marginal_sign_constraint=True,
314
+ # **kwargs,
315
+ # ),
316
+ MarginalLinearRegressor (alpha = 1.0 ),
317
+ RidgeCV (alphas = alphas , fit_intercept = False ),
245
318
]:
246
319
results ["model_name" ].append (str (m ))
247
320
m .fit (X_train , y_train )
@@ -254,11 +327,14 @@ class MarginalShrinkageLinearModelRegressor(
254
327
results ["test_roc" ].append (
255
328
roc_auc_score (y_test , m .predict_proba (X_test )[:, 1 ])
256
329
)
257
- results ["acc_train" ].append (accuracy_score (y_train , m .predict (X_train )))
258
- results ["acc_test" ].append (accuracy_score (y_test , m .predict (X_test )))
330
+ results ["acc_train" ].append (
331
+ accuracy_score (y_train , m .predict (X_train )))
332
+ results ["acc_test" ].append (
333
+ accuracy_score (y_test , m .predict (X_test )))
259
334
else :
260
335
y_pred = m .predict (X_test )
261
- results ["train_mse" ].append (np .mean ((y_train - m .predict (X_train )) ** 2 ))
336
+ results ["train_mse" ].append (
337
+ np .mean ((y_train - m .predict (X_train )) ** 2 ))
262
338
results ["test_mse" ].append (np .mean ((y_test - y_pred ) ** 2 ))
263
339
results ["train_r2" ].append (m .score (X_train , y_train ))
264
340
results ["test_r2" ].append (m .score (X_test , y_test ))
@@ -271,10 +347,10 @@ class MarginalShrinkageLinearModelRegressor(
271
347
coefs .append (deepcopy (lin .coef_ ))
272
348
print ("alpha best" , lin .alpha_ )
273
349
274
- diffs = pd .DataFrame ({str (i ): coefs [i ] for i in range (len (coefs ))})
275
- diffs ["diff 0 - 1" ] = diffs ["0" ] - diffs ["1" ]
276
- diffs ["diff 1 - 2" ] = diffs ["1" ] - diffs ["2" ]
277
- print (diffs )
350
+ # diffs = pd.DataFrame({str(i): coefs[i] for i in range(len(coefs))})
351
+ # diffs["diff 0 - 1"] = diffs["0"] - diffs["1"]
352
+ # diffs["diff 1 - 2"] = diffs["1"] - diffs["2"]
353
+ # print(diffs)
278
354
279
355
# don't round strings
280
356
with pd .option_context (
0 commit comments