-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLR.py
62 lines (48 loc) · 1.97 KB
/
LR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/local/bin/python3
import numpy as np
import pandas as pd
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import make_scorer
from shutil import get_terminal_size
pd.set_option('display.width', get_terminal_size()[0])
pd.set_option('display.max_columns', None)
df = pd.read_csv("data.csv");
cv=10
test_frac = 0.2
max_iter = 1000
X = df.drop(['Target_ml'], axis = 1); y = df['Target_ml']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_frac, random_state = 42)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
classifier = LogisticRegression(max_iter=max_iter)
classifier.fit(X_train, y_train)
training_score = cross_val_score(classifier, X_train, y_train, cv=cv)
#print("For a %1.1f test fraction (%d train & %d test)" %(test_frac, len(X_train), len(X_test)))
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)
TN, FP, FN, TP = confusion_matrix(y_test, predictions).ravel()
acc = 100*(TP + TN)/(TP + FP + TN + FN)
print("Unoptimized model")
print("-----------------")
print("Training core = %1.3f" %(training_score.mean()*100))
print("Validation accuracy = %1.3f" %(acc*100))
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
param_grid = [
{'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
'C' : np.logspace(-4, 4, 20),
'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],
'max_iter' : [100, 1000,2500, 5000]
}
]
clf = GridSearchCV(classifier, param_grid = param_grid, cv = cv, verbose=True, n_jobs=-1)
best_clf = clf.fit(X,y) # CROSS VALIDATING (cv) SO OKAY TO USE WHOLE DATASET
print(best_clf.best_estimator_)
print(f'Accuracy - : {best_clf.score(X,y):.3f}')