-
Notifications
You must be signed in to change notification settings - Fork 1
/
KNN_from_scratch.py
80 lines (68 loc) · 2.47 KB
/
KNN_from_scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import csv
import random
import math
import operator
#load the dataset and split it into training_set and test_set
def load_dataset(filename, split, training_set=[] , test_set=[]):
with open(filename, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y])
if random.random() < split:
training_set.append(dataset[x])
else:
test_set.append(dataset[x])
#find Euclidean distance between 2 data points
def calculate_euclidean_distance(instance1, instance2, length):
distance_between_points = 0
for x in range(length):
distance_between_points += pow((instance1[x] - instance2[x]), 2)
return math.sqrt(distance_between_points)
#finding the neighbors of the test_instance after sorting them by distance
def fetch_neighbors(training_set, test_instance, k):
distances = []
length = len(test_instance)-1
for x in range(len(training_set)):
distance_between_points = calculate_euclidean_distance(test_instance, training_set[x], length)
distances.append((training_set[x], distance_between_points))
distances.sort(key=operator.itemgetter(1))
neighbors_list = []
for x in range(k):
neighbors_list.append(distances[x][0])
return neighbors_list
#fetching response after majority voting for the test_instance class prediction
def fetch_response(neighbors_list):
class_votes = {}
for x in range(len(neighbors_list)):
response = neighbors_list[x][-1]
if response in class_votes:
class_votes[response] += 1
else:
class_votes[response] = 1
sorted_votes = sorted(class_votes.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted_votes[0][0]
#finding accuracy of the programmed knn model
def fetch_accuracy(test_set, knn_predictions):
correct_prediction = 0
for x in range(len(test_set)):
if test_set[x][-1] == knn_predictions[x]:
correct_prediction += 1
return (correct_prediction/float(len(test_set))) * 100.0
def main():
# preparing data
training_set=[]
test_set=[]
split = 0.67
load_dataset('flowers_dataset.data', split, training_set, test_set)
# generating knn_predictions
knn_predictions=[]
k = 3
for x in range(len(test_set)):
neighbors_list = fetch_neighbors(training_set, test_set[x], k)
result = fetch_response(neighbors_list)
knn_predictions.append(result)
knn_accuracy = fetch_accuracy(test_set, knn_predictions)
print('Accuracy: ' + repr(knn_accuracy) + '%')
main()