-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path35_cost_sensitive_algorithms.py
24 lines (22 loc) · 1.08 KB
/
35_cost_sensitive_algorithms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# If the class distribution was 0.99 to 0.01 for the majority and
# minority classes, then the class_weight argument could be defined
# as a dictionary that defines a penalty of 0.01 for errors made for
# the majority class and a penalty of 0.99 for errors made with the
# minority class, e.g. {0:0.01, 1:0.99}.
# example of cost sensitive logistic regression for imbalanced classification
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
# generate dataset
X, y = make_classification(n_samples=10000, n_features=2, n_redundant=0, n_clusters_per_class=1, weights=[0.99], flip_y=0)
# split into train/test sets with same class ratio
trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.5, stratify=y)
# define model
model = LogisticRegression(solver='liblinear', class_weight='balanced')
# fit model
model.fit(trainX, trainy)
# predict on test set
yhat = model.predict(testX)
# evaluate predictions
print('F-Measure: %.3f' % f1_score(testy, yhat))