-
Notifications
You must be signed in to change notification settings - Fork 56
/
Copy pathsolution_2.2-3.R
75 lines (53 loc) · 2.27 KB
/
solution_2.2-3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -------------------- Code for Question 2.2 part 3 -----------------------------
# Clear environment
rm(list = ls())
#First, load the kknn library (which contains the kknn function) and read in the data
#
library(kknn)
data <- read.table("credit_card_data.txt", stringsAsFactors = FALSE, header = FALSE)
#
# optional check to make sure the data is read correctly
#
head(data)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 1 30.83 0.000 1.25 1 0 1 1 202 0 1
## 2 0 58.67 4.460 3.04 1 0 6 1 43 560 1
## 3 0 24.50 0.500 1.50 1 1 0 1 280 824 1
## 4 1 27.83 1.540 3.75 1 0 5 0 100 3 1
## 5 1 20.17 5.625 1.71 1 1 0 1 120 0 1
## 6 1 32.08 4.000 2.50 1 1 0 0 360 0 1
# NOTE: ALL ROWS OF THIS FILE STARTING WITH "##" DENOTE R OUTPUT
#
# Create a function to calculate the accuracy of the model with k=X
#
check_accuracy = function(X){
predicted <- rep(0,(nrow(data))) # predictions: start with a vector of all zeros
# for each row, estimate its response based on the other rows
for (i in 1:nrow(data)){
# data[-i] means we remove row i of the data when finding nearest neighbors...
#...otherwise, it'll be its own nearest neighbor!
model=kknn(V11~V1+V2+V3+V4+V5+V6+V7+V8+V9+V10,data[-i,],data[i,],k=X, scale = TRUE) # use scaled data
# record whether the prediction is at least 0.5 (round to one) or less than 0.5 (round to zero)
predicted[i] <- as.integer(fitted(model)+0.5) # round off to 0 or 1
}
# calculate fraction of correct predictions
accuracy = sum(predicted == data[,11]) / nrow(data)
return(accuracy)
}
#
# Now call the function for values of k from 1 to 20 (you could try higher values of k too)
#
acc <- rep(0,20) # set up a vector of 20 zeros to start
for (X in 1:20){
acc[X] = check_accuracy(X) # test knn with X neighbors
}
#
# report accuracies
#
acc
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
##[1,] 0.8149847 0.8149847 0.8149847 0.8149847 0.851682 0.8455657 0.8470948 0.8486239
## [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17]
##[1,] 0.8470948 0.8501529 0.851682 0.853211 0.851682 0.851682 0.853211 0.851682 0.851682
## [,18] [,19] [,20]
##[1,] 0.851682 0.8501529 0.8501529