-
Notifications
You must be signed in to change notification settings - Fork 0
/
clase_05_14_LASSO_simulado.py
60 lines (45 loc) · 1.55 KB
/
clase_05_14_LASSO_simulado.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""
Universidad Adolfo Ibañez
Facultad de Ingeniería y Ciencias
TICS 585 - Reconocimiento de Patrones en imágenes
Basado en https://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html
Modificado por Miguel Carrasco
rev.1.1
"""
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso
# Construimos el dataset
X, y = make_classification(
n_samples=1000,
n_features=10,
n_informative=3,
n_redundant=0,
n_repeated=0,
n_classes=2,
random_state=0,
shuffle=False,
)
print(f'Tamaño del dataset:{X.shape}')
#separamos el conjunto de datos train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
#generamos nombres para las características
feature_names = [f"feature {i}" for i in range(X.shape[1])]
pipeline = Pipeline([
('scaler',StandardScaler()),
('modelo',Lasso())
])
search = GridSearchCV(pipeline,
{'modelo__alpha':np.arange(0.01,1,0.001)},
cv = 5, scoring="neg_mean_squared_error",verbose=3
)
search.fit(X_train,y_train)
print(search.best_params_)
coefficients = search.best_estimator_.named_steps['modelo'].coef_
importance = np.abs(coefficients)
print(importance)
print(np.array(feature_names)[importance > 0])