-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUtility.py
101 lines (85 loc) · 3.29 KB
/
Utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import numpy as np
import pandas as pd
import copy
# METODO PER PREVEDERE UN ESMPIO UTILIZZANDO DIRETTAMNETE L'ALBERO (DIZIONARIO)
def predict_example(example, tree):
if not isinstance(tree, dict):
return tree
length = len(list(tree.keys()))
ans_none = False
for i in range(0, length):
question = list(tree.keys())[i]
feature_name, comparison_operator, value = question.split(" ")
if comparison_operator == "<=":
if example[feature_name] <= float(value):
answer = tree[question]
ans_none = True
elif comparison_operator == ">":
if example[feature_name] > float(value):
answer = tree[question]
ans_none = True
# Categorico
else:
if str(example[feature_name]) == value:
ans_none = True
answer = tree[question]
if not ans_none:
answer = np.nan
if not isinstance(answer, dict):
return answer
else:
residual_tree = answer
return predict_example(example, residual_tree)
# METODO CHE MI CALCOLA PREDICT EXAMPLE PER OGNI EXAMPLE DEL DATAFRAME
def make_predictions(df, tree):
if len(df) != 0:
predictions = df.apply(predict_example, args=(tree,), axis=1)
else:
predictions = pd.Series()
return predictions
# METODO CHE MI CALCOLA L'AACURACY DATA LA SERIE CALCOLATA CON MAKE_PREDICTIONS
def calculate_accuracy(df, tree):
predictions = make_predictions(df, tree)
predictions.fillna('non disponibile')
predictions_correct = predictions == df.income
accuracy = predictions_correct.mean()
# avendo un pd che ha come valori tutti booleani considera true e false
# come 0,1 quindi farne la media corrisponde a trovare la percentuale di valori
return accuracy
# NUOVO METODO DEL CALCOLO DELLE RULE
def predict_rule(df, rule):
df_tmp = df
label = rule[len(rule) - 1]
df_tmp = df_tmp[df_tmp['income'] == label]
for index in range(0, len(rule) - 1):
element = rule[index]
feature_name, comparison_operator, value = element.split(" ")
if comparison_operator == "<=":
df_tmp = df_tmp[df_tmp[feature_name] <= float(value)]
if comparison_operator == ">":
df_tmp = df_tmp[df_tmp[feature_name] > float(value)]
# attributo categorico
if comparison_operator == "=":
df_tmp = df_tmp[df_tmp[feature_name] == str(value)]
index = df_tmp.index
return index
# METODO PER CALCOLARE ACCURACY PER TUTTE LE RULES
def make_predictions_rule(df, rules):
total_index = None
df_tmp = copy.deepcopy(df)
for i in range(0, len(rules)):
index = predict_rule(df_tmp, rules[i])
if total_index is not None:
total_index = np.concatenate((total_index, index), axis=0)
else:
total_index = index
# utilizzo gli indici, velocizzando l'operazione
total_index = np.unique(total_index)
accuracy = (len(total_index)) / df.shape[0]
return accuracy
# METODO PER CALCOLARE IL NUMERO DI LETTERALI
def number_of_literals(rules):
count = 0
for r in rules:
count += len(r)
print('Numero di letterali: ', count)