-
Notifications
You must be signed in to change notification settings - Fork 0
/
LPF.R
164 lines (161 loc) · 7.65 KB
/
LPF.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
getwd()
library(randomForest)
library(Hmisc)
library(rpart)
library(xda)
library(car)
loan <- read.csv(file="C:/Users/user/Documents/LoanPrediction/train.csv",stringsAsFactors = F)
data.test <- read.csv(file="C:/Users/user/Documents/LoanPrediction/test.csv",stringsAsFactors = F)
dim(loan)
str(loan)
summary(loan)
View(loan)
loan$Gender <- as.factor(loan$Gender)
loan$Married <-as.factor(loan$Married)
loan$Dependents <-as.factor(loan$Dependents)
loan$Education <-as.factor(loan$Education)
loan$Self_Employed <-as.factor(loan$Self_Employed)
loan$Credit_History <-as.factor(loan$Credit_History)
loan$Property_Area <-as.factor(loan$Property_Area)
loan$Loan_Status <- as.factor(loan$Loan_Status)
loan$LoanAmount <- as.numeric(loan$LoanAmount)
colnames(loan)
barplot(loan$ApplicantIncome,main="Applicant Income")
hist(loan$ApplicantIncome)
barplot(loan$CoapplicantIncome,main="Coapplicant Income")
barplot(loan$LoanAmount,main="Loan Amount")
hist(loan$LoanAmount)
loan$Loan_Amount_Term[!is.na(loan$Loan_Amount_Term)]<-0
loan$Loan_Amount_Term
plot(loan$Loan_Amount_Term,main="Amount Term")
table(loan$Loan_Amount_Term)
table(loan$Gender,loan$Loan_Status)
table(loan$Married,loan$Loan_Status)
table(loan$Dependents,loan$Loan_Status)
table(loan$Education,loan$Loan_Status)
table(loan$Self_Employed,loan$Loan_Status)
table(loan$Credit_History,loan$Loan_Status)
table(loan$Property_Area,loan$Loan_Status)
aggregate(ApplicantIncome~Loan_Status,loan,mean)
aggregate(CoapplicantIncome~Loan_Status,loan,mean)
aggregate(LoanAmount~Loan_Status,loan,mean)
aggregate(Loan_Amount_Term~Loan_Status,loan,mean)
colnames(loan)
loan[loan$Gender=="",2] <- "Female"
loan[loan$Married=="",3] <- "Yes"
loan[loan$Dependents=="",4] <- 0
loan[loan$Self_Employed=="",6] <- "No"
loan$LoanAmount[is.na(loan$LoanAmount)] <- median(loan$LoanAmount, na.rm = T)
loan$Loan_Amount_Term[is.na(loan$Loan_Amount_Term)] <- median(loan$Loan_Amount_Term, na.rm = T)
loan1 <- loan[,c(2:12)]
class_mod <- rpart(Credit_History ~ ., data=loan1[!is.na(loan1$Credit_History), ], method="class", na.action=na.omit)
cred_hist_pred <- predict(class_mod, loan1[is.na(loan1$Credit_History), ])
predicteds <- as.numeric(colnames(cred_hist_pred)[apply(cred_hist_pred, 1, which.max)])
loan$Credit_History[is.na(loan$Credit_History)] <- predicteds
sapply(loan,function(x) sum(is.na(x)))
dim(loan)
colnames(loan)
loan$total_Income <- loan$ApplicantIncome + loan$CoapplicantIncome
loan$Income_shr_fr_loan <- loan$LoanAmount/loan$total_Income
loan$Interest <- (loan$LoanAmount*.09*loan$Loan_Amount_Term)/(12)
loan$positivescore <- ifelse(loan$Married=="No" | loan$Dependents=="0" | loan$Education=="Graduate"|loan$Self_Employed=="No" ,1,0)
loan$D_Gender_M <- ifelse(loan$Gender=="Male" ,1,0)
loan$D_Married_Yes <- ifelse(loan$Married=="Yes" ,1,0)
loan$D_Dependents_0 <- ifelse(loan$Dependents=="0" ,1,0)
loan$D_Dependents_1 <- ifelse(loan$Dependents=="1" ,1,0)
loan$D_Dependents_2 <- ifelse(loan$Dependents=="2" ,1,0)
loan$D_Dependents_3 <- ifelse(loan$Dependents=="3" ,1,0)
loan$D_Dependents_3p <- ifelse(loan$Dependents=="3+" ,1,0)
loan$D_Education_Grad <- ifelse(loan$Education=="Graduate" ,1,0)
loan$D_Self_Employed_No <- ifelse(loan$Self_Employed=="No" ,1,0)
loan$D_Property_Rural <- ifelse(loan$Property_Area=="Rural" ,1,0)
loan$D_Property_Semiurban <- ifelse(loan$Property_Area=="Semiurban" ,1,0)
loan$D_CreditHistory_Yes <- ifelse(loan$Credit_History=="1" ,1,0)
loan$D_LoanStatus_Yes <- ifelse(loan$Loan_Status=="Y" ,"Y","N")
colnames(loan)
View(data.test)
sapply(data.test,function(x) sum(is.na(x)))
colnames(data.test)
data.test$Gender <- as.factor(data.test$Gender)
data.test$Married <-as.factor(data.test$Married)
data.test$Dependents <-as.factor(data.test$Dependents)
data.test$Education <-as.factor(data.test$Education)
data.test$Self_Employed <-as.factor(data.test$Self_Employed)
data.test$Credit_History <-as.factor(data.test$Credit_History)
data.test$Property_Area <-as.factor(data.test$Property_Area)
str(data.test)
data.test[data.test$Gender=="",2] <- "Female"
data.test[data.test$Married=="",3] <- "Yes"
data.test[data.test$Dependents=="",4] <- 0
data.test[data.test$Self_Employed=="",6] <- "No"
data.test$LoanAmount[is.na(data.test$LoanAmount)] <- median(data.test$LoanAmount, na.rm = T)
data.test$Loan_Amount_Term[is.na(data.test$Loan_Amount_Term)] <- median(data.test$Loan_Amount_Term, na.rm = T)
data.test1 <- data.test[,c(2:12)]
class_mod <- rpart(Credit_History ~ ., data=data.test1[!is.na(data.test1$Credit_History), ], method="class", na.action=na.omit)
cred_hist_pred <- predict(class_mod, data.test1[is.na(data.test1$Credit_History), ])
predicteds <- as.numeric(colnames(cred_hist_pred)[apply(cred_hist_pred, 1, which.max)])
data.test$Credit_History[is.na(data.test$Credit_History)] <- predicteds
sapply(data.test,function(x) sum(is.na(x)))
dim(data.test)
data.test$total_Income <- data.test$ApplicantIncome + data.test$CoapplicantIncome
data.test$Income_shr_fr_loan <- data.test$LoanAmount/data.test$total_Income
data.test$Interest <- (data.test$LoanAmount*.09*data.test$Loan_Amount_Term)/(12)
data.test$positivescore <- ifelse(data.test$Married=="No" | data.test$Dependents=="0" | data.test$Education=="Graduate"|data.test$Self_Employed=="No" ,1,0)
data.test$D_Gender_M <- ifelse(data.test$Gender=="Male" ,1,0)
data.test$D_Married_Yes <- ifelse(data.test$Married=="Yes" ,1,0)
data.test$D_Dependents_0 <- ifelse(data.test$Dependents=="0" ,1,0)
data.test$D_Dependents_1 <- ifelse(data.test$Dependents=="1" ,1,0)
data.test$D_Dependents_2 <- ifelse(data.test$Dependents=="2" ,1,0)
data.test$D_Dependents_3 <- ifelse(data.test$Dependents=="3" ,1,0)
data.test$D_Dependents_3p <- ifelse(data.test$Dependents=="3+" ,1,0)
data.test$D_Education_Grad <- ifelse(data.test$Education=="Graduate" ,1,0)
data.test$D_Self_Employed_No <- ifelse(data.test$Self_Employed=="No" ,1,0)
data.test$D_Property_Rural <- ifelse(data.test$Property_Area=="Rural" ,1,0)
data.test$D_Property_Semiurban <- ifelse(data.test$Property_Area=="Semiurban" ,1,0)
data.test$D_CreditHistory_Yes <- ifelse(data.test$Credit_History=="1" ,1,0)
colnames(loan)
colnames(data.test)
train <- loan[,c(7:10,14:30)]
test <- data.test[,c(7:10,13:28)]
train$D_LoanStatus_Yes <- as.factor(train$D_LoanStatus_Yes)
colnames(train)
colnames(test)
str(train)
model <- randomForest(D_LoanStatus_Yes ~ . , data = train)
model
library(randomForest)
library(mlbench)
library(caret)
colnames(train)
x <- train[,1:20]
y <- train[,21]
control <- trainControl(method="repeatedcv", number=10, repeats=3)
seed <- 7
metric <- "Accuracy"
set.seed(seed)
mtry <- sqrt(ncol(x))
tunegrid <- expand.grid(.mtry=mtry)
rf_default <- train(D_LoanStatus_Yes~., data=train, method="rf", metric=metric, tuneGrid=tunegrid, trControl=control)
print(rf_default)
plot(rf_default)
control <- trainControl(method="repeatedcv", number=10, repeats=3, search="random")
set.seed(seed)
mtry <- sqrt(ncol(x))
rf_random <- train(D_LoanStatus_Yes~., data=train, method="rf", metric=metric, tuneLength=15, trControl=control)
print(rf_random)
plot(rf_random)
control <- trainControl(method="repeatedcv", number=10, repeats=3, search="grid")
set.seed(seed)
tunegrid <- expand.grid(.mtry=c(1:15))
rf_gridsearch <- train(D_LoanStatus_Yes~., data=train, method="rf", metric=metric, tuneGrid=tunegrid, trControl=control)
print(rf_gridsearch)
plot(rf_gridsearch)
set.seed(seed)
bestmtry <- tuneRF(x, y, stepFactor=1.5, improve=1e-5, ntree=500)
print(bestmtry)
pred <- predict(rf_gridsearch, newdata = test)
test$Loan_Status <- pred
colnames(test)
Results <- cbind(data.test$Loan_ID,test$Loan_Status)
model$coefficients
write.csv(Results,"C:/Users/user/Documents/LoanPrediction/Final6.csv",row.names=FALSE)