-
Notifications
You must be signed in to change notification settings - Fork 1
/
Random_forest_pima_diabetic_prediction.R
46 lines (35 loc) · 1.36 KB
/
Random_forest_pima_diabetic_prediction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#1-Data Acquisition
#Load Diabetes in Pima Indian Women dataset
library(MASS)
data("Pima.te")
#2-Divide Dataset
#we will divide dataset into training data and testing data
set.seed(2)
library(caTools) #sample.split function is present in this package
split <- sample.split(Pima.te, SplitRatio = 0.7)
#split
training_rf <- subset(Pima.te, split== 'TRUE')
testing_rf <- subset(Pima.te, split== 'FALSE')
#3-Implementing dt model
library(randomForest)
model_rf<- randomForest(type ~ ., data = training_rf)
model_rf
#plotting the result as a tree
plot(model_rf, margin = 0.1)
text(model_rf, use.n = TRUE, pretty = TRUE, cex=0.8)
#to find priorities of variables based on GINI INDEX
#next two functions are under library 'randomForest' not applicable for decision tree model
importance(model_rf)
varImpPlot(model_rf)
#4-prediction and accuracy of model
pred <- predict(model_rf, testing_rf, type = 'class')
#here this model predict values of probabilities thats why to convert them
# into 'yes' or 'no' we must have to use type='class'
#otherwise in confusionmatrix function error will occur...beware
pred
library(caret)
confusionMatrix(table(pred, testing_rf$type))
#5-predicting for new dataset
new_data <- list(npreg=4, glu=110, bp=45, skin=47, bmi=51.3, ped=0.776, age=30)
predicted_type<- predict(model_rf, new_data)
predicted_type