-
Notifications
You must be signed in to change notification settings - Fork 56
/
Copy pathsolution_2.2-1.R
379 lines (286 loc) · 16.9 KB
/
solution_2.2-1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# -------------------- Code for Question 2.2 part 1 -----------------------------
# Clear environment
rm(list = ls())
# Load the kernlab library (which contains the ksvm function) and read in the data
#
library(kernlab)
# ---------------------------- Data manipulation -------------------------------------
data <- read.table("/Users/Chewy/Downloads/credit_card_data.txt", stringsAsFactors = FALSE, header = FALSE)
#
# optional check to make sure the data is read correctly
#
head(data)
# Console output for head(data)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 1 30.83 0.000 1.25 1 0 1 1 202 0 1
## 2 0 58.67 4.460 3.04 1 0 6 1 43 560 1
## 3 0 24.50 0.500 1.50 1 1 0 1 280 824 1
## 4 1 27.83 1.540 3.75 1 0 5 0 100 3 1
## 5 1 20.17 5.625 1.71 1 1 0 1 120 0 1
## 6 1 32.08 4.000 2.50 1 1 0 0 360 0 1
# NOTE: ALL ROWS OF THIS FILE STARTING WITH "##" DENOTE R OUTPUT
#
# Setting the random number generator seed so that our results are reproducible
# (Your solution doesn't need this, but it's usually good practice to do)
set.seed(1)
# -------------------------- Creating the models ------------------------------------
# -------------------------- Scaled=TRUE model ------------------------------------
# Fit the model using scaled=TRUE.
# V11 is response, other variables are predictors
#
model_scaled <- ksvm(as.matrix(data[,1:10]),as.factor(data[,11]),
type = "C-svc", # Use C-classification method
kernel = "vanilladot", # Use simple linear kernel
C = 100,
scaled=TRUE) # have ksvm scale the data for you
# or you could use this call; it does the same thing
model_scaled <- ksvm(V11~.,data=data,
type = "C-svc", # Use C-classification method
kernel = "vanilladot", # Use simple linear kernel
C = 100,
scaled=TRUE) # have ksvm scale the data for you
#Attributes model show what the data structure model has to reference
#For example, we use model@b to get the intercept and model@coef to get the coefficients
#Those references (b and coef) can be found listed in the console by using attributes(model)
attributes(model_scaled)
# Console output for attributes(model_scaled) is left out since it is a long output
#model lists some high level information about the model data structure
model_scaled
# Console output for model_scaled
##
## Support Vector Machine object of class "ksvm"
## SV type: C-svc (classification)
## parameter : cost C = 100
## Linear (vanilla) kernel function.
## Number of Support Vectors : 189
## Objective Function Value : -17887.92
## Training error : 0.136086
# -------------------------- Calculating the a coefficients ------------------------------------
#
#Classification is done using linear kernel, a*scaled(x) + a0.
# Unfortunately, the model does not output a directly, but we can use the model output to find a.
# calculate a1 to am using the stored data point values in the model data structure and corresponding coefficients
# multiplying the xmatrix by the coef gives the linear combination of data points that define a1,...,am
# we use the xmatrix attribute since the model stores these data points as scaled
a_scaled <- colSums(model_scaled@xmatrix[[1]] * model_scaled@coef[[1]])
#
# a0 is just -model_scaled@b
a0_scaled<- -model_scaled@b
#
a_scaled
a0_scaled
#Console output for a_scaled
## V1 V2 V3 V4 V5
## -0.0010065348 -0.0011729048 -0.0016261967 0.0030064203 1.0049405641
## V6 V7 V8 V9 V10
## -0.0028259432 0.0002600295 -0.0005349551 -0.0012283758 0.1063633995
#Console output for a0_scaled
## [1] 0.08158492
# -------------------------- Calculating the predicted values ------------------------------------
#
#The ksvm package provides a predict() function that implements this for us, but we also
#show how to get the predicted values using the a coefficients
# Calculate the predicted values using the a's we got above and our data set.
# The coefficients for this model are based on the SCALED data points, so we need to
# scale our data points to get the correct predictions. We do this by using the scaled
# mean and standard deviation values for V1 to V10 stored in the model data structure as:
# model@scaling$x.scale$`scaled:center` (means for V1 to V10)
# model@scaling$x.scale$`scaled:scale` (standard deviation for V1 to V10)
# Then we transform the data points into their scaled equivalent by using the function:
# scaled data point[i,1:10] = (data point[i,1:10] - model@scaling$x.scale$`scaled:center`)/model@scaling$x.scale$`scaled:scale`
#
#Create predicted vector (to hold our calculated predicted values)
predicted_scaled<-rep(0,nrow(data))
#For each data point, perform the transformation, calculate a*scaled(data point)+a0,
#and predict value of data point based on the resulting value
for (i in 1:nrow(data)){
#If the data point is above the classifier, predicted value = 1
if (sum(a_scaled*(data[i,1:10]-model_scaled@scaling$x.scale$`scaled:center`)/model_scaled@scaling$x.scale$`scaled:scale`) + a0_scaled >= 0){
predicted_scaled[i] <- 1
}
#If the data point is below the classifier, predicted value = 0
if (sum(a_scaled*(data[i,1:10]-model_scaled@scaling$x.scale$`scaled:center`)/model_scaled@scaling$x.scale$`scaled:scale`) + a0_scaled < 0){
predicted_scaled[i] <- 0
}
}
predicted_scaled
# Output from predicted_scaled
## [1] 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [165] 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [206] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [247] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [288] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [329] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [370] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [411] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [452] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [493] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [534] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1
## [575] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [616] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# Get prediction from ksvm model we created, model_scaled
# Note that we could also get the predicted values of the model using model_scaled@fitted
#
pred_scaled <- predict(model_scaled,data[,1:10])
pred_scaled
#Output from pred_scaled
## [1] 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [165] 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [206] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [247] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [288] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [329] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [370] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [411] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [452] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [493] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [534] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1
## [575] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [616] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# typing "pred_scaled" will give the sequence of 1s and 0s showing the model's classification
# As you can see in the outputs, pred and predicted have the same predicted values
# so we know that our a coefficients are correct for the SCALED data version of the model
# -------------------------- Calculating the model's accuracy ------------------------------------
#
# I will use a simple accuracy measure that outputs the
# percent of testing observations that are correctly classified.
sum(pred_scaled == data$V11) / nrow(data)
sum(predicted_scaled == data$V11) / nrow(data)
#Output from sum(pred_scaled == data$V11) / nrow(data)
## [1] 0.8639144
#
#Output from sum(predicted_scaled == data$V11) / nrow(data)
## [1] 0.8639144
# Note that this result is found by a wide range of values of C.
# -------------------------- Scaled=FALSE model ------------------------------------
# Fit the model using scaled=FALSE.
# V11 is response, other variables are predictors
#
model_unscaled <- ksvm(as.matrix(data[,1:10]),as.factor(data[,11]),
type = "C-svc", # Use C-classification method
kernel = "vanilladot", # Use simple linear kernel
C = 100,
scaled=FALSE) # ksvm will not scale the data for you
# or you could use this call; it does the same thing
model_unscaled <- ksvm(V11~.,data=data,
type = "C-svc", # Use C-classification method
kernel = "vanilladot", # Use simple linear kernel
C = 100,
scaled=FALSE) # ksvm will not scale the data for you
#Attributes model show what the data structure model has to reference
#For example, we use model_unscaled@b to get the intercept and model_unscaled@coef to get the coefficients
#Those references (b and coef) can be found listed in the console by using attributes(model_unscaled)
attributes(model_unscaled)
# Console output for attributes(model_unscaled) is left out since it is a long output
#model lists some high level information about the model data structure
model_unscaled
# Console output for model_unscaled
##
## Support Vector Machine object of class "ksvm"
## SV type: C-svc (classification)
## parameter : cost C = 100
## Linear (vanilla) kernel function.
## Number of Support Vectors : 186
## Objective Function Value : -2213.731
## Training error : 0.278287
# -------------------------- Calculating the a coefficients ------------------------------------
#
#Classification is done using linear kernel, a*unscaled(x) + a0 = a*x + a0.
# Unfortunately, the model does not output a directly, but we can use the model output to find a.
# calculate a1 to am using the stored data point values in the model data structure and corresponding coefficients
# multiplying the xmatrix by the coef gives the linear combination of data points that define a1,...,am
# we use the xmatrix attribute since the model stores these data points as unscaled
a_unscaled <- colSums(model_unscaled@xmatrix[[1]] * model_unscaled@coef[[1]])
#
# a0 is just -model_unscaled@b
a0_unscaled <- -model_unscaled@b
#
a_unscaled
a0_unscaled
#Console output for a_unscaled
## V1 V2 V3 V4 V5
## -0.0483050561 -0.0083148473 -0.0836550114 0.1751121271 1.8254844547
## V6 V7 V8 V9 V10
## 0.2763673361 0.0654782414 -0.1108211169 -0.0047229653 -0.0007764962
#Console output for a0_unscaled
## 0.5255393
# -------------------------- Calculating the predicted values ------------------------------------
#
#The ksvm package provides a predict() function that implements this for us, but we also
#show how to get the predicted values using the a coefficients
# Calculate the predicted values using the a's we got above and our data set
# The coefficients for this model are based on the UNSCALED data points, so we do not need to
# scale our data points to get the correct predictions.
#Create predicted vector (to hold our calculated predicted values)
predicted_unscaled<-rep(0,nrow(data))
#For each data point, calculate a*(data point)+a0,
#and predict value of data point based on the resulting value
for (i in 1:nrow(data)){
#If the data point is above the classifier, predicted value = 1
if (sum(a_unscaled*data[i,1:10]) + a0_unscaled >= 0){
predicted_unscaled[i] <- 1
}
#If the data point is below the classifier, predicted value = 0
if (sum(a_unscaled*data[i,1:10]) + a0_unscaled < 0){
predicted_unscaled[i] <- 0
}
}
predicted_unscaled
# Output from predicted_unscaled
## [1] 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 1
## [42] 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 1 1 1 0 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1
## [124] 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 1 0 1
## [165] 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
## [206] 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0 0 1 0 1 0 1 0 1 1 0 1 1 1 1 1 0 1 0
## [247] 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [288] 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## [329] 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## [370] 1 0 1 0 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
## [411] 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## [452] 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
## [493] 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0
## [534] 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 1
## [575] 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
## [616] 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
# Get prediction from ksvm model we created
#
pred_unscaled <- predict(model_unscaled,data[,1:10])
pred_unscaled
#Output from pred_unscaled
## [1] 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 1
## [42] 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 1 1 1 0 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1
## [124] 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 1 0 1
## [165] 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
## [206] 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0 0 1 0 1 0 1 0 1 1 0 1 1 1 1 1 0 1 0
## [247] 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [288] 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## [329] 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## [370] 1 0 1 0 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
## [411] 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## [452] 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
## [493] 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0
## [534] 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 1
## [575] 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
## [616] 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
# typing "pred_unscaled" will give the sequence of 1s and 0s showing the model's classification
# As you can see in the outputs, pred and predicted have the same predicted values
# so we know that our a coefficients are correct for the SCALED data version of the model
# -------------------------- Calculating the model's accuracy ------------------------------------
#
# I will use a simple accuracy measure that outputs the
# percent of testing observations that are correctly classified.
sum(pred_unscaled == data$V11) / nrow(data)
sum(predicted_unscaled == data$V11) / nrow(data)
#Output from sum(pred_unscaled == data$V11) / nrow(data)
## [1] 0.7217125
#
#Output from sum(predicted_unscaled == data$V11) / nrow(data)
## [1] 0.7217125