Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

benchmark-update #600

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 127 additions & 15 deletions notebooks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,35 @@
from pyod.models.cof import COF
from pyod.models.sod import SOD


from pyod.models.auto_encoder import AutoEncoder
from pyod.models.cd import CD
from pyod.models.copod import COPOD
from pyod.models.dif import DIF
from pyod.models.ecod import ECOD
from pyod.models.gmm import GMM
from pyod.models.kde import KDE
from pyod.models.lmdd import LMDD
from pyod.models.loci import LOCI #19S
from pyod.models.loda import LODA
from pyod.models.qmcd import QMCD
from pyod.models.sampling import Sampling
from pyod.models.sos import SOS

from pyod.models.alad import ALAD #40s
from pyod.models.anogan import AnoGAN #151s
from pyod.models.inne import INNE
from pyod.models.kpca import KPCA
from pyod.models.lscp import LSCP
from pyod.models.lunar import LUNAR
from pyod.models.mad import MAD
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.rgraph import RGraph #271S
from pyod.models.rod import ROD
from pyod.models.so_gaal import SO_GAAL
from pyod.models.sod import SOD
from pyod.models.vae import VAE

from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score
Expand All @@ -61,15 +90,19 @@
'shuttle.mat',
'vertebral.mat',
'vowels.mat',
'wbc.mat']
'wbc.mat'
]

# define the number of iterations
n_ite = 10
n_classifiers = 10
n_ite = 1

df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',
'ABOD', 'CBLOF', 'FB', 'HBOS', 'IForest', 'KNN', 'LOF',
'MCD', 'OCSVM', 'PCA']
'MCD', 'OCSVM', 'PCA', 'AutoEncoder', 'CD', 'COPOD', 'DIF', 'ECOD',
'GMM', 'KDE', 'LODA', 'QMCD','Sampling', 'SOS', 'ALAD', 'AnoGAN ',
'INNE', 'KPCA', 'LMDD', 'LOCI', 'LSCP', 'LUNAR', 'MO_GAAL', 'RGraph', 'SO_GAAL', 'SOD', 'VAE']

n_classifiers = len(df_columns)-4

# initialize the container for saving the results
roc_df = pd.DataFrame(columns=df_columns)
Expand Down Expand Up @@ -106,27 +139,71 @@
# standardizing data for processing
X_train_norm, X_test_norm = standardizer(X_train, X_test)

classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
contamination=outliers_fraction),
classifiers = {
'Angle-based Outlier Detector (ABOD)': ABOD(
contamination=outliers_fraction),
'Cluster-based Local Outlier Factor': CBLOF(
n_clusters=10,
contamination=outliers_fraction,
check_estimator=False,
random_state=random_state),
'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
random_state=random_state),
'Feature Bagging': FeatureBagging(
contamination=outliers_fraction,
random_state=random_state),
'Histogram-base Outlier Detection (HBOS)': HBOS(
contamination=outliers_fraction),
'Isolation Forest': IForest(contamination=outliers_fraction,
random_state=random_state),
'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
'Isolation Forest': IForest(
contamination=outliers_fraction,
random_state=random_state),
'K Nearest Neighbors (KNN)': KNN(
contamination=outliers_fraction),
'Local Outlier Factor (LOF)': LOF(
contamination=outliers_fraction),
'Minimum Covariance Determinant (MCD)': MCD(
contamination=outliers_fraction, random_state=random_state),
'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
contamination=outliers_fraction,
random_state=random_state),
'One-class SVM (OCSVM)': OCSVM(
contamination=outliers_fraction),
'Principal Component Analysis (PCA)': PCA(
contamination=outliers_fraction, random_state=random_state),
contamination=outliers_fraction,
random_state=random_state),
'AutoEncoder': AutoEncoder(
contamination=outliers_fraction),
'CD': CD(
contamination=outliers_fraction),
'COPOD': COPOD(
contamination=outliers_fraction),
'DIF': DIF(
contamination=outliers_fraction),
'ECOD': ECOD(
contamination=outliers_fraction),
'GMM': GMM(
contamination=outliers_fraction),
'KDE': KDE(
contamination=outliers_fraction),

'LODA': LODA(
contamination=outliers_fraction),
'QMCD': QMCD(
contamination=outliers_fraction),
'Sampling': Sampling(
contamination=outliers_fraction),
'SOS': SOS(
contamination=outliers_fraction),
# 'ALAD': ALAD(
# contamination=outliers_fraction),
# 'AnoGAN':AnoGAN(
# contamination=outliers_fraction),
'INNE': INNE(contamination=outliers_fraction),
'KPCA': KPCA(contamination=outliers_fraction),
'LMDD': LMDD(contamination=outliers_fraction),
# 'LOCI': LOCI(contamination=outliers_fraction),
'LUNAR': LUNAR(contamination=outliers_fraction),
'MO_GAAL': MO_GAAL(contamination=outliers_fraction),
# 'RGraph': RGraph(contamination=outliers_fraction),
# 'SO_GAAL': SO_GAAL(contamination=outliers_fraction),
'SOD': SOD(contamination=outliers_fraction),

}
classifiers_indices = {
'Angle-based Outlier Detector (ABOD)': 0,
Expand All @@ -139,12 +216,47 @@
'Minimum Covariance Determinant (MCD)': 7,
'One-class SVM (OCSVM)': 8,
'Principal Component Analysis (PCA)': 9,
'AutoEncoder': 10,
'CD': 11,
'COPOD': 12,
'DIF': 13,
'ECOD': 14,
'GMM': 15,
'KDE': 16,
'LODA': 17,
'QMCD': 18,
'Sampling': 19,
'SOS': 20,
'ALAD': 21,
'AnoGAN': 22,
'INNE': 23,
'KPCA': 24,
'LMDD': 25,
'LOCI': 26,
'LUNAR': 27,
'MO_GAAL': 28,
'RGraph': 29,
'SO_GAAL': 30,
'SOD': 31,



}


for clf_name, clf in classifiers.items():
t0 = time()
clf.fit(X_train_norm)
test_scores = clf.decision_function(X_test_norm)

# Handle NaN values in test_scores
test_scores = np.nan_to_num(test_scores,
nan=0.0,
posinf=np.nanmax(test_scores),
neginf=np.nanmin(test_scores))
# Handle NaN values in y_test
y_test = np.nan_to_num(y_test, nan=0.0, posinf=0.0, neginf=0.0)

t1 = time()
duration = round(t1 - t0, ndigits=4)

Expand Down Expand Up @@ -177,4 +289,4 @@
# Save the results for each run
time_df.to_csv('time.csv', index=False, float_format='%.3f')
roc_df.to_csv('roc.csv', index=False, float_format='%.3f')
prn_df.to_csv('prc.csv', index=False, float_format='%.3f')
prn_df.to_csv('prc.csv', index=False, float_format='%.3f')
18 changes: 18 additions & 0 deletions notebooks/prc.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA,AutoEncoder,CD,COPOD,DIF,ECOD,GMM,KDE,LODA,QMCD,Sampling,SOS,ALAD,AnoGAN ,INNE,KPCA,LMDD,LOCI,LSCP,LUNAR,MO_GAAL,RGraph,SO_GAAL,SOD,VAE
arrhythmia,452,274,14.6018,0.38076000000000004,0.4585699999999999,0.42641,0.51108,0.49556999999999995,0.44636999999999993,0.4334300000000001,0.39952,0.4614,0.46129,0.41970999999999997,0.0,0.46495,0.47433,0.49446,0.36323000000000005,0.41471,0.43984999999999996,0.049729999999999996,0.45327,0.30998000000000003,0.0,0.0,0.36761000000000005,0.42781,0.47909000000000007,0.0,0.48439,0.31001,0.0,0.0,0.36889,0.0,0.0
cardio,1831,21,9.6122,0.23742999999999997,0.40406000000000003,0.16194,0.44761000000000006,0.49265,0.33227,0.15409,0.43043000000000003,0.50112,0.609,0.37847,0.41884000000000005,0.53687,0.6144000000000001,0.5457700000000001,0.45156999999999997,0.27362000000000003,0.43045,0.50086,0.4677999999999999,0.14051000000000002,0.0,0.0,0.38051,0.31983,0.43345,0.0,0.25598,0.33255999999999997,0.0,0.0,0.30310999999999994,0.0,0.0
glass,214,9,4.2056,0.17023,0.07261999999999999,0.18095,0.0,0.10594999999999999,0.07261999999999999,0.14762,0.025,0.17262,0.07261999999999999,0.15594999999999998,0.12261999999999999,0.07261999999999999,0.24522999999999998,0.20595,0.17262,0.17262,0.025,0.025,0.10594999999999999,0.23928,0.0,0.0,0.20595,0.17262,0.07261999999999999,0.0,0.14762,0.0,0.0,0.0,0.15594999999999998,0.0,0.0
ionosphere,351,33,35.8974,0.84415,0.8134600000000001,0.7093700000000002,0.32951,0.64698,0.86021,0.7063400000000001,0.8806499999999999,0.7000499999999998,0.5728599999999999,0.7146800000000001,0.8447799999999999,0.56287,0.7875099999999999,0.51387,0.85226,0.8648100000000001,0.71319,0.41784999999999994,0.75755,0.6650400000000001,0.0,0.0,0.76081,0.8687000000000001,0.55983,0.0,0.8628600000000001,0.5791,0.0,0.0,0.7865500000000002,0.0,0.0
letter,1600,32,6.25,0.38009,0.22858,0.38184,0.07152,0.08813,0.33117,0.36411,0.19273999999999997,0.15096,0.08746999999999999,0.28865999999999997,0.27525999999999995,0.03683,0.16262,0.08904,0.29725000000000007,0.41737,0.10225999999999999,0.11845999999999998,0.16158999999999998,0.42885,0.0,0.0,0.23424,0.3634,0.08878,0.0,0.42205000000000004,0.10539000000000001,0.0,0.0,0.41620999999999997,0.0,0.0
lympho,148,18,4.0541,0.44833999999999996,0.75167,0.75167,0.8466699999999999,0.8766700000000001,0.75167,0.75167,0.44833999999999996,0.75167,0.75167,0.75167,0.29167,0.79667,0.6883299999999999,0.79667,0.64834,0.6183299999999999,0.43165999999999993,0.17332999999999998,0.80167,0.25833,0.0,0.0,0.41834,0.6183299999999999,0.79834,0.0,0.6683299999999999,0.17332999999999998,0.0,0.0,0.49833999999999995,0.0,0.0
mnist,7603,100,9.2069,0.35550000000000004,0.3953,0.33338,0.11882000000000001,0.30436,0.42042999999999997,0.33429000000000003,0.32133,0.39619,0.38460999999999995,0.37344000000000005,0.1536,0.23819999999999997,0.45736,0.18249,0.39078,0.37636,0.33729000000000003,0.17887,0.39403,0.15399,0.0,0.0,0.39781,0.4051600000000001,0.2644000000000001,0.0,0.35859,0.16896,0.0,0.0,0.33967,0.0,0.0
musk,3062,166,3.1679,0.050749999999999997,1.0,0.20373999999999998,0.9783199999999999,0.9854900000000001,0.2733,0.16954999999999998,0.98378,1.0,0.97994,0.31725,0.42475000000000007,0.34767,0.62775,0.46912000000000004,0.76602,0.12333000000000001,0.32001999999999997,0.69581,0.74071,0.042109999999999995,0.0,0.0,0.99443,0.20093999999999998,0.83667,0.0,0.11652999999999998,0.21476,0.0,0.0,0.16380999999999996,0.0,0.0
optdigits,5216,64,2.8758,0.006019999999999999,0.0,0.023719999999999998,0.21939999999999998,0.028399999999999998,0.0,0.023350000000000003,0.0,0.0,0.0,0.0,0.00471,0.010589999999999999,0.01965,0.0014500000000000001,0.0,0.0,0.0,0.0,0.0,0.04734000000000001,0.0,0.0,0.0,0.0,0.0,0.0,0.02731,0.02054,0.0,0.0,0.007090000000000001,0.0,0.0
pendigits,6870,16,2.2707,0.08124999999999999,0.23104999999999998,0.0658,0.29793000000000003,0.35816000000000003,0.09844,0.06528999999999999,0.08928,0.32866000000000006,0.31865000000000004,0.05608,0.02181,0.27415999999999996,0.32224,0.3545,0.050019999999999995,0.12423,0.39934000000000003,0.17589,0.19923,0.03913,0.0,0.0,0.15775,0.11564,0.20180000000000003,0.0,0.06629,0.14783000000000002,0.0,0.0,0.07569999999999999,0.0,0.0
pima,768,8,34.8958,0.51929,0.48672000000000004,0.44945,0.54238,0.50144,0.54133,0.45552000000000004,0.4981999999999999,0.47035,0.49429,0.47081,0.49493,0.48265,0.38776999999999995,0.44753,0.48932000000000003,0.53364,0.41633,0.52378,0.49695,0.35656000000000004,0.0,0.0,0.5000199999999999,0.5300900000000001,0.42932,0.0,0.51976,0.15489000000000003,0.0,0.0,0.46201,0.0,0.0
satellite,6435,36,31.6395,0.39023,0.5792900000000001,0.39061999999999997,0.56903,0.5571499999999999,0.49944999999999995,0.38929,0.6850200000000001,0.5345500000000001,0.4784400000000001,0.50582,0.42051999999999995,0.4777,0.6402000000000001,0.44645,0.45641,0.5605100000000001,0.48584999999999995,0.62853,0.5153099999999999,0.27453,0.0,0.0,0.61313,0.5763100000000001,0.26108999999999993,0.0,0.44067,0.46269,0.0,0.0,0.45056,0.0,0.0
satimage-2,5803,36,1.2235,0.21304999999999996,0.9375900000000001,0.06375,0.6939,0.8764399999999999,0.38087,0.055510000000000004,0.6481300000000001,0.9355600000000001,0.8040799999999999,0.36232000000000003,0.31453,0.72342,0.75913,0.61295,0.39531,0.32111,0.78276,0.88734,0.82279,0.031380000000000005,0.0,0.0,0.8136300000000001,0.53797,0.01705,0.0,0.25627,0.0,0.0,0.0,0.28467000000000003,0.0,0.0
shuttle,49097,9,7.1511,0.19773000000000002,0.28849,0.04965,0.95511,0.94676,0.21839,0.1425,0.7509000000000002,0.95418,0.95013,0.8938,0.47101,0.9497300000000001,0.58068,0.8712899999999999,0.8626299999999999,0.8650100000000001,0.3405,0.9522099999999998,0.64059,0.07549,0.0,0.0,0.8355899999999998,0.40153999999999995,0.9501800000000001,0.0,0.18308000000000002,0.42557,0.0,0.0,0.19948000000000002,0.0,0.0
vertebral,240,6,12.5,0.060050000000000006,0.03482999999999999,0.05724999999999999,0.0071400000000000005,0.03428,0.02381,0.05058999999999999,0.02857,0.02381,0.02262,0.10905000000000001,0.07939,0.0,0.11738,0.09933,0.01381,0.01381,0.01334,0.0071400000000000005,0.025,0.13516,0.0,0.0,0.05724999999999999,0.0329,0.06504000000000001,0.0,0.0329,0.08663,0.0,0.0,0.07899999999999999,0.0,0.0
vowels,1456,12,3.4341,0.57102,0.41319999999999996,0.34133,0.12974000000000002,0.19754999999999998,0.50929,0.35506000000000004,0.17649,0.27907000000000004,0.13636,0.39759000000000005,0.34219,0.01326,0.19398999999999997,0.16819,0.4132999999999999,0.52183,0.21627000000000002,0.13916,0.24786000000000002,0.24613999999999997,0.0,0.0,0.36521000000000003,0.54957,0.1307,0.0,0.58233,0.0,0.0,0.0,0.40793,0.0,0.0
wbc,378,30,5.5556,0.30604,0.4988800000000001,0.5287900000000001,0.5816600000000001,0.5087900000000001,0.49518000000000006,0.51879,0.45569,0.51249,0.47673000000000004,0.47636,0.41537999999999997,0.65316,0.15875999999999998,0.4163200000000001,0.45569,0.46159999999999995,0.48046,0.5472,0.51037,0.11879999999999999,0.0,0.0,0.4447,0.50507,0.52207,0.0,0.45959000000000005,0.0,0.0,0.0,0.47625,0.0,0.0
Loading
Loading