This repository has been archived by the owner on Apr 13, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathKNN-recognize.py
99 lines (86 loc) · 3.32 KB
/
KNN-recognize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# __author__ = 'Yaoshi'
# -*- coding: utf-8 -*-
import operator
from numpy import *
def handwritingClassTest():
hwLabels = []
trainingFileList = listDirInMac('trainingdigit/trainingdigits-for-knn')
m = len(trainingFileList)
trainingMat = zeros((m, 1024))
for i in range(m):
fnameStr = trainingFileList[i]
classStr = fnameStr.split("_")[0]
hwLabels.append(classStr)
trainingMat[i, :] = img2vector('trainingdigit/trainingdigits-for-knn/%s' % fnameStr)
testFileList = listDirInMac('trainingdigit/testdigits-for-knn')
errorCount = 0.0
mTest = len(testFileList)
for i in range(mTest):
fnameStr = testFileList[i]
classStr = fnameStr.split('_')[0]
vTest = img2vector('trainingdigit/testdigits/%s' % fnameStr)
classifierResult = classify0(vTest, trainingMat, hwLabels, 3)
print "classifier result: %s, the real answer:%s" % (classifierResult, classStr),
if (classifierResult != classStr):
errorCount += 1.0
print "wrong"
else:
print "correct"
print "error number: %d" % errorCount
print "error rate: %f" % (errorCount / float(mTest))
# kNN algorithm
def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize, 1)) - dataSet
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances ** 0.5
sortedDistIndicies = distances.argsort()
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]] # changed
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
def img2vector(fname):
f = open(fname)
returnVec = zeros((1, 1024))
for i in range(28):
lineStr = f.readline()
if lineStr[-1] == '\n':
lineStr = lineStr[:-1]
rownum = len(lineStr)
for j in range(rownum):
returnVec[0, rownum * i + j] = int(lineStr[j])
return returnVec
def file2matrix(filename):
fr = open(filename)
numberOfLines = len(fr.readlines()) # get the number of lines in the file
returnMat = zeros((numberOfLines, 3)) # prepare matrix to return
classLabelVector = [] # prepare labels return
fr = open(filename)
index = 0
for line in fr.readlines():
line = line.strip() # replace all '\0'(enter key)
listFromLine = line.split('\t')
returnMat[index, :] = listFromLine[0:3]
classLabelVector.append(listFromLine[-1].encode('hex')) # -1 represents for the last line of the list
index += 1
return returnMat, classLabelVector
def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = zeros(shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - tile(minVals, (m, 1))
normDataSet = normDataSet / tile(ranges, (m, 1)) # element wise divide
return normDataSet, ranges, minVals
# For the .DS_Store in the OS X
def listDirInMac(path):
os_list = os.listdir(path)
for item in os_list:
if item.startswith('.') and os.path.isfile(os.path.join(path, item)):
os_list.remove(item)
return os_list
handwritingClassTest()