-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
90 lines (67 loc) · 2.49 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
from sklearn.model_selection import train_test_split
from knn import KNN
from sklearn import datasets
from sklearn.feature_selection import mutual_info_classif
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
cmap = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
import enum
import pandas as pd
def main():
# dataset = datasets.load_iris()
# dataset = datasets.load_digits()
dataset = datasets.load_wine()
# dataset = datasets.load_breast_cancer()
X, y = dataset.data, dataset.target
X = apply_weights(X, y, Weight.random)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
knn = KNN(k=7)
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)
acc = calculate_accuracy(predictions, y_test)
print(acc)
def calculate_accuracy(predictions, y_test):
return (np.sum(predictions == y_test) / len(y_test))*100
def apply_weights(X, y, weight_type):
if weight_type == Weight.normal:
feature_weights = 1
return X * feature_weights
if weight_type == Weight.random:
random_matrix = np.random.rand(X.shape[1])
random_matrix = normalize(random_matrix)
weighted_x = np.round(X * random_matrix, 2)
return weighted_x
if weight_type == Weight.fl_correlation:
mi = mutual_info_classif(X, y)
normalized_weights = normalize(mi)
weighted_x = X * normalized_weights
return weighted_x
if weight_type == Weight.ff_correlation:
df = pd.DataFrame(data=X)
correlation_matrix = df.corr().abs()
avg_corr = (correlation_matrix.sum() - 1) / (len(correlation_matrix.columns) - 1)
reciprocal_avg_corr = 1 / avg_corr
normalized_result = normalize(reciprocal_avg_corr)
result = df * normalized_result
final_result = result.to_numpy()
return final_result
def plot_data(X, data):
df = pd.DataFrame(np.c_[X, data.target],
columns=np.append(data['feature_names'], ['target']))
feature1 = df.columns[0]
feature2 = df.columns[1]
plt.figure(figsize=(10, 6))
plt.scatter(df[feature1], df[feature2], c=df['target'], cmap=cmap)
plt.xlabel(feature1)
plt.ylabel(feature2)
plt.show()
def normalize(series):
return (series - series.min()) / (series.max() - series.min())
class Weight(enum.Enum):
normal = 1
random = 2
fl_correlation = 3
ff_correlation = 4
if __name__ == "__main__":
main()