-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNaive bayes Classification
74 lines (49 loc) · 1.85 KB
/
Naive bayes Classification
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# Naive Bayes Classification
#########################################################
## remove all objects
rm(list=ls())
setwd('D:/SanjayPattanayak/NaiveBayes')
#load Data
bcData<-read.csv('breast-cancer-wisconsin.data.csv',na.strings = '?')
#check for NAs - #16 NAs in Dataset
sum(is.na(bcData))
### remove all the records with missing value
?na.omit()
bcData2<-na.omit(bcData)
set.seed(123)
?ifelse
View(bcData2)
str(bcData2$Class)
#converting dependent variable to factor
bcData2$Class<-as.factor(bcData2$Class)
#removing Sample Column as it is an identifier column
bcData2<-bcData2[,2:11]
#split into training and testing
index<-sort(sample(nrow(bcData2),round(.25*nrow(bcData2))))
training<-bcData2[-index,]
test<-bcData2[index,]
#Random Forest
library(randomForest)
fit <- randomForest( Class~., data=training, importance=TRUE, ntree=2000)
fit
summary(fit)
features<-importance(fit)
varImpPlot(fit)
Prediction <- predict(fit, test)
table(actual=test[,10],Prediction)
wrong<- (test[,10]!=Prediction )
error_rate<-sum(wrong)/length(wrong)
paste0('The Random Forest model gives an Error Rate of:',round(error_rate*100,2),'%')
#Mean Decrease in Accuracy the number or proportion of observations that are incorrectly
#classified by removing the feature (or values from the feature)
paste0('The three most important factors in terms of Mean Decrease Accuracy are:F6, F1 and F2')
#Naiive Bayes
library(class)
library(e1071)
nBayes_all <- naiveBayes(Class ~., data =training)
## Naive Bayes classification using all variables
Predicted_Class<-predict(nBayes_all,test)
table(NBayes_all=Predicted_Class,Class=test$Class)
NB_wrong<-sum(Predicted_Class!=test$Class)
NB_error_rate<-NB_wrong/length(Predicted_Class)
paste0('The Naive Bayes model gives an Error Rate of:',round(NB_error_rate*100,2),'%')