forked from Xuxl2020/ASD-XGB
-
Notifications
You must be signed in to change notification settings - Fork 0
/
XGB.txt
78 lines (67 loc) · 2.01 KB
/
XGB.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
### XGB
!pip install shap
import pandas as pd
import numpy as np
from numpy import *
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, roc_auc_score,f1_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import keras
import math
from sklearn.model_selection import KFold
from keras.wrappers.scikit_learn import KerasClassifier
from xgboost.sklearn import XGBClassifier
import xgboost as xgb
import shap
from sklearn.utils.class_weight import compute_sample_weight
from keras.utils import to_categorical
import os
import warnings
warnings.filterwarnings("ignore")
### load data
df = pd.read_csv('.../dat.1944.csv')
tdf = pd.read_csv('.../dat.60.csv')
df = df.replace('ASD', 0)
df = df.replace('DLD', 1)
df = df.replace('GDD', 2)
tdf = tdf.replace('ASD', 0)
tdf = tdf.replace('DLD', 1)
tdf = tdf.replace('GDD', 2)
col = df.columns[1:]
col1 = df.columns[2:]
dat = df[col]
grp = dat['diag']
x_test = tdf[col1]
y_test = tdf['diag']
x_train = dat[col[1:]]
y_train = dat['diag']
xlf = XGBClassifier(
max_depth = 4,
learning_rate = 0.0001,
n_estimators = 5000,
silent = True,
eval_metric = 'merror',
objective = 'multi:softprob',
subsample = 0.9,
colsample_bytree = 0.9,
num_class = 3)
xgc = xlf
xgc.fit(x_train, y_train, sample_weight = compute_sample_weight("balanced", y_train))
pred_test = xgc.predict_proba(x_test)
### predict test
pred_res = np.argmax(pred_test, axis=1)
mat_test = confusion_matrix(y_test, pred_res)
acc_all_test = np.diag(mat_test)/mat_test.sum(axis=1)
acc_test = accuracy_score(y_test, pred_res)
acc1 = acc_all_test[0]
acc2 = acc_all_test[1]
acc3 = acc_all_test[2]
explainer = shap.TreeExplainer(xgc)
shap_values = explainer.shap_values(x_test)
print(shap_values)
print('acc:\n', acc_test)
print('r:\n', acc_all_test)
print('mix-matrix:\n', mat_test)