-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
78 lines (64 loc) · 2.98 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#_________________libraries__________
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sb
import matplotlib.pyplot as plt
#__________________read the file____________
df=pd.read_csv("diabetes.csv")
df.head()
#__________________preparing the database to training the Algorithme ____________
columns=["Pregnancies","Glucose","BloodPressure","SkinThickness","Insulin",
"BMI","DiabetesPedigreeFunction","Age","Outcome"]
labels=df["Outcome"].values
features=df[list(columns)].values
print("\nthe labels is\n",labels)
print("\nthe features is\n",features)
#__________________________scaling_______________________
#__________________training and testing the Algorithme 1______________________
x_train,x_test,y_train,y_test=train_test_split(features,labels,test_size=0.30)
clf=RandomForestClassifier(n_estimators=1)
clf=clf.fit(x_train,y_train)
#__________________training and testing the Algorithme 2______________________
x_train,x_test,y_train,y_test=train_test_split(features,labels,test_size=0.30)
clf=RandomForestClassifier(n_estimators=2)
clf=clf.fit(x_train,y_train)
#__________________training and testing the Algorithme 3______________________
x_train,x_test,y_train,y_test=train_test_split(features,labels,test_size=0.30)
clf=RandomForestClassifier(n_estimators=3)
clf=clf.fit(x_train,y_train)
#__________________training and testing the Algorithme 4______________________
x_train,x_test,y_train,y_test=train_test_split(features,labels,test_size=0.30)
clf=RandomForestClassifier(n_estimators=4)
clf=clf.fit(x_train,y_train)
#__________________training and testing the Algorithme 5______________________
x_train,x_test,y_train,y_test=train_test_split(features,labels,test_size=0.30)
clf=RandomForestClassifier(n_estimators=5)
clf=clf.fit(x_train,y_train)
#__________________ score of training____________
accuracyTrain=clf.score(x_train,y_train)
print("\nthe accuracy Train is\n",accuracyTrain)
#____________score of testing______________
accuracyTest=clf.score(x_test,y_test)
print("\nthe accuracy of test is \n",accuracyTest)
#___________performance appraisal of traning_________________
ypredict=clf.predict(x_train)
print("\n Training Classification report \n",classification_report(y_train,ypredict)
,"\nConfusion matrix of training \n",confusion_matrix(y_train,ypredict))
#___________performance appraisal of testing_________________
ypredict=clf.predict(x_test)
print("\n Testing Classification report \n",classification_report(y_test,ypredict)
,"\nConfusion matrix of testing \n",confusion_matrix(y_test,ypredict))
#________________database info________________
#df.info()
#sb.countplot(x="Outcome",data=df,palette="hls")
#plt.show()
#sb.countplot(x="Pregnancies",data=df,palette="hls")
#plt.show()
#sb.countplot(x="Glucose",data=df,palette="hls")
#plt.show()
#sb.heatmap(df.corr())
#plt.show()