-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTelco_Classification_RF.R
67 lines (43 loc) · 1.68 KB
/
Telco_Classification_RF.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#### ---- Telco Customer Churn using Random Forest ----- ####
# Author- Abhirami A
###------------------------------------------------------------
packages_req <- c('tidyverse','randomForest','mice')
sapply(packages_req,require, character.only = T)
file_path <- "C:/Users/abu/Desktop/Machine Learning/Abhirami's Projects/4. Random Forests/Telco-Customer-Churn.csv"
raw_data <- read.csv(file_path, header = T)
glimpse(raw_data)
# remove unwanted cols
data1 <- raw_data[,-1]
glimpse(data1)
# check for NAs
md.pattern(data1)
data1 <- na.omit(data1)
glimpse(data1)
# create test and train data
train_idx <- caret::createDataPartition(data1$Churn, p =0.8, list = F)
train <- data1[train_idx,]
train %>% nrow()
test <- data1[-train_idx,]
test %>% nrow()
# fit random forest model
RF_model <- randomForest::randomForest(Churn ~., data = train,
ntree = 1000,
mtry = 4, #sqrt(19) = 4.35
nodesize = 30, #nodesize te
sampsize = 75)
plot(RF_model)
RF_model
#predict values
pred_val <- predict(RF_model,newdata = test[,-20],type = 'response')
#confusion matrix
conf_matrix <- caret::confusionMatrix(test$Churn,pred_val)
conf_matrix # kappa is only 37%
# important variables
imp_var <- caret::varImp(RF_model, scale = F)
imp_var
imp_var_sorted <- imp_var %>% add_rownames() %>% arrange(-Overall)
imp_var_sorted
imp_var_sorted$rowname <- ordered(imp_var_sorted$rowname,
levels = imp_var_sorted$rowname)
dev.off()
plot(imp_var_sorted$rowname,imp_var_sorted$Overall)