-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.R
64 lines (42 loc) · 1.61 KB
/
functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
num_NAS <- function(DataFrame){
return(sapply(DataFrame, function(x) sum(is.na(x))))
}
return_max <- function(x){
val <- names(sort(table(x), decreasing = TRUE))[1]
return(val)
}
plot_histogram <- function(X, var){
print(ggplot(X, aes_string(var, '..density..')) +
geom_histogram() + ylab('') +
geom_line(aes(y = ..density..), stat = 'density'))
}
plot_boxplot <- function(X, X_var, Y_var){
print(ggplot(train, aes_string(X_var, Y_var)) + geom_boxplot())
}
plot_faceted_histogram <- function(X, var, facet_by){
print(ggplot(X, aes_string(var, '..density..')) + geom_histogram() + facet_wrap(facet_by))
}
plot_bar_plot_count <- function(X, var){
print(ggplot(X, aes_string(var)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))
}
plot_bar_plot_prop <- function(X, var1, var2){
df <- X %>%
group_by_(var1) %>%
summarise(mean = mean(!!sym(var2)))
print(ggplot(df, aes_string(var1, 'mean')) + geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('Proportion'))
}
plot_bar_plot_stacked <- function(X, var1, var2){
print(ggplot(X, aes_string(var1, fill = var2)) + geom_bar(position = "stack") + theme(axis.text.x = element_text(angle = 90, hjust = 1)))
}
train_test_split <- function(DataFrame, DepVar, Split, seed){
library(caTools)
set.seed(seed)
ind <- sample.split(Y = DataFrame[,DepVar], SplitRatio = Split)
train <- DataFrame[ind,]
test <- DataFrame[!ind,]
return(list(train = train, test = test))
print("The training and testing datasets have been created.")
}