35_cost_sensitive_algorithms.py

# If the class distribution was 0.99 to 0.01 for the majority and 
# minority classes, then the class_weight argument could be defined 
# as a dictionary that defines a penalty of 0.01 for errors made for 
# the majority class and a penalty of 0.99 for errors made with the 
# minority class, e.g. {0:0.01, 1:0.99}.

# example of cost sensitive logistic regression for imbalanced classification
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
# generate dataset
X, y = make_classification(n_samples=10000, n_features=2, n_redundant=0, n_clusters_per_class=1, weights=[0.99], flip_y=0)
# split into train/test sets with same class ratio
trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.5, stratify=y)
# define model
model = LogisticRegression(solver='liblinear', class_weight='balanced')
# fit model
model.fit(trainX, trainy)
# predict on test set
yhat = model.predict(testX)
# evaluate predictions
print('F-Measure: %.3f' % f1_score(testy, yhat))