-
Notifications
You must be signed in to change notification settings - Fork 0
/
DecisionTree.py
121 lines (93 loc) · 3.61 KB
/
DecisionTree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import pandas as pd
import statistics
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
def train_using_gini(x_train, y_train):
clf_gini = DecisionTreeClassifier(criterion="gini",
random_state=100, max_depth=3, min_samples_leaf=5)
# Performing training
clf_gini.fit(x_train, y_train)
return clf_gini
def tarin_using_entropy(x_train, y_train):
# Decision tree with entropy
clf_entropy = DecisionTreeClassifier(
criterion="entropy", random_state=100,
max_depth=3, min_samples_leaf=5)
# Performing training
clf_entropy.fit(x_train, y_train)
return clf_entropy
def prediction(x_test, clf_object):
# Predicton on test with giniIndex
y_pred = clf_object.predict(x_test)
# print("Predicted values:")
# print(y_pred)
return y_pred
def cal_accuracy(y_test, y_pred):
print("Accuracy : ",
accuracy_score(y_test, y_pred) * 100, "%")
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))
print("Report : ",
classification_report(y_test, y_pred))
def main():
df = pd.read_csv('cleaned_LaptopDataset.csv')
t = statistics.median(df['latest_price'])
h = []
for x in df.latest_price:
if (x >= t):
h.append(1)
else:
h.append(0)
df['latest_price'] = h
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
for col in df:
df[col] = le.fit_transform(df[col])
x = df.drop('latest_price', axis=1)
y = df['latest_price']
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=100)
clf_gini = train_using_gini(x_train, y_train)
clf_entropy = tarin_using_entropy(x_train, y_train)
print("Results Using Gini Index:")
# Prediction using gini
y_pred_gini = prediction(x_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")
# Prediction using entropy
y_pred_entropy = prediction(x_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)
import six
import sys
from sklearn import tree
import graphviz
sys.modules['sklearn.externals.six'] = six
from six import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus
feature_col=['brand','model','processor_brand','processor_name','processor_gnrtn',
'ram_gb','ram_type','ssd','hdd','os','os_bit','graphic_card_gb',
'weight','display_size','warranty','Touchscreen',
'msoffice','latest_price','old_price','discount',
'star_rating']
tree.plot_tree(clf_gini);
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(4, 4), dpi=300)
tree.plot_tree(clf_gini,
feature_names=feature_col,
class_names=["0","1"],
filled=True);
fig.savefig('GiniTree.png')
tree.plot_tree(clf_entropy);
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(4, 4), dpi=300)
tree.plot_tree(clf_entropy,
feature_names=feature_col,
class_names=["0", "1"],
filled=True);
fig.savefig('EntropyTree.png')
# Calling main function
if __name__ == "__main__":
main()