-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathabtest.r
106 lines (69 loc) · 2.16 KB
/
abtest.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
library(tidyverse)
total_test = function(long_df, all_normal = FALSE){
if(all_normal){
p = summary(aov(value ~ name, data = long_df))[[1]][["Pr(>F)"]]
} else {
p = kruskal.test(value ~ name, data = long_df)$p.value
}
return(p < 0.05)
}
pair_test = function(vec1, vec2, both_normal = FALSE){
if(both_normal){
p = t.test(vec1 , vec2, paired = FALSE, var.equal = TRUE)$p.value
} else {
p = wilcox.test(vec1 , vec2, paired = FALSE)$p.value
}
return(p < 0.05)
}
ab_test = function(df){
# matrix of compares
eq_mat = matrix(0, nrow = ncol(df), ncol = ncol(df))
dsp = sapply(df, sd)
if(sum(dsp == 0) > 0){
cat('\n Element with zero dispersion! \n')
cat('Means:\n')
print(sapply(df, mean))
cat('Standard dev.:\n')
print(dsp)
return(list(mat = eq_mat))
}
# convert to long format
df2 = pivot_longer(df, cols = names(df)) %>% mutate(name = factor(name))
g = ggplot(df2, aes(y = value, x = name)) + geom_boxplot() +
labs(x = 'Group', title = 'Boxplots by groups') + theme_bw()
print(g)
# check normal distribution
normals_vec = sapply(df, function(x) nortest::ad.test(x)$p.value > 0.05)
# if there are global differences
if(total_test(df2, sum(normals_vec) == length(normals_vec))){
} else{
cat('\n No difference \n')
return(list(mat = eq_mat))
}
for(i in 1:(ncol(df)-1)){
for(j in (i+1):ncol(df)){
is_norm = normals_vec[i] & normals_vec[j]
is_diff = pair_test(df[[i]], df[[j]], is_norm)
if(is_diff){ # if stat. important
if(mean(df[[i]]) > mean(df[[j]])){
eq_mat[i,j] = 1
eq_mat[j,i] = -1
} else {
eq_mat[i,j] = -1
eq_mat[j,i] = 1
}
}
}
}
cat('\n Comparing matrix: \n')
print(eq_mat)
# the rule for finding most less case
counts = rowSums(eq_mat<1)
min_res = (1:ncol(df))[counts == max(counts)]
max_res = (1:ncol(df))[counts == min(counts)]
cat('\n Minimal cats:')
print(colnames(df)[min_res])
cat('\n Maximal cats:')
print(colnames(df)[max_res])
return(list(mat = eq_mat))
}