-
Notifications
You must be signed in to change notification settings - Fork 0
/
ranking_single_explanatory_variable.R
240 lines (229 loc) · 13.7 KB
/
ranking_single_explanatory_variable.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# Install VGAM if not already installed
list.of.packages <- c("VGAM")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
# Makes sure VGAM is loaded
require(VGAM)
# This function only supports regression with a single explanatory variable. The data can be provided following two different formats; as a contingency table or as a list of experiments (see the examples in the README).
# @param formula: Relation between the response and explanatory variable, must follow the syntax a~b.
# @param data: Data frame that contains the data used for the regression
# @param pod_assumption: Whether the regression should be run using the proportional odds assumption (TRUE or FALSE)
# @return: A named matrix with all the coefficients and their corresponding p-values
regression_single_variable <- function(formula, data, pod_assumption){
# Run the regression using vglm from the VGAM package
regression_results <- vglm(formula, family = cumulative(parallel = pod_assumption), data=data)
# Get the number of output
output_number <- length(regression_results@coefficients)
# Outcome numbers
outcome_numbers <- length(regression_results@extra$colnames.y)
# Extract only the coefficients that are related to the effect of each level of the explanatory variable (ignore the intercepts)
effect_coefficents <- regression_results@coefficients[outcome_numbers:output_number]
# Extract the number of levels of the explanatory variable
number_levels <- sum(lengths(regression_results@assign))
# Name given to each level (except the one taken as reference)
effect_names <- names(effect_coefficents)
if (!pod_assumption){
for (i in 1:length(effect_names)){
split_coefficient <- as.list(strsplit(effect_names[i], ':')[[1]])
index_outcome = strtoi(split_coefficient[2])
effect_names[i] <- paste(split_coefficient[1], regression_results@extra$colnames.y[index_outcome], sep=":")
}
}
# Values corresponding to each level (except the one taken as reference)
effect_values <-unname(effect_coefficents)
# Create a vector initialized with the same values as the effect of each level against the reference
all_values <- effect_values
# Vector initialized with the same of each level (except the reference)
all_names <- effect_names
# Get the pvalues of the coefficients for each level (except the reference)
pvalues <- unname(summary(regression_results)@coef3[outcome_numbers:output_number, 4])
# Get the variance covariance values for the effect coefficients
cov_mat = vcov(regression_results)[outcome_numbers:output_number, outcome_numbers:output_number]
if (pod_assumption){
# For each pairwise difference compute the value and associated p-value of the effect
for (index_ref in 1:(number_levels-2))
{
for (second_index in (index_ref+1):(number_levels-1))
{
# Append the difference between two effects
all_values <- c(all_values, effect_values[index_ref] - effect_values[second_index])
# Get the corresponding name
all_names <- c(all_names, paste(effect_names[index_ref], "-", effect_names[second_index]))
# Compute the z^2 statistics that allows to get the corresponding pvalues
zsqaure <- (tail(all_values, n=1))**2/(cov_mat[index_ref, index_ref] + cov_mat[second_index, second_index] - 2*cov_mat[index_ref, second_index])
# Append the pvalue
pvalues <- c(pvalues, 1 - pchisq(zsqaure, 1))
}
}
}
else
{
number_sub_outcomes <- outcome_numbers - 1
for (level_ref in 1:(number_levels-2))
{
for (second_level in (level_ref + 1):(number_levels - 1))
{
for (sub_outcome in 1:number_sub_outcomes)
{
# Index of the level+outcome we want to compare with the other
ref_index <- (level_ref - 1) * number_sub_outcomes + sub_outcome
# Index of the level+outcome that is being compared to the reference
second_index <- (second_level - 1) * number_sub_outcomes + sub_outcome
# Append the difference between two level+outcomes (for the same outcomes)
all_values <- c(all_values, effect_values[ref_index] - effect_values[second_index])
# Get the corresponding name
all_names <- c(all_names, paste(effect_names[ref_index], "-", effect_names[second_index]))
# Compute the z^2 statistics that allows to get the corresponding pvalues
zsqaure <- (tail(all_values, n=1))**2/(cov_mat[ref_index, ref_index] + cov_mat[second_index, second_index] - 2*cov_mat[ref_index, second_index])
# Append the pvalue
pvalues <- c(pvalues, 1 - pchisq(zsqaure, 1))
}
}
}
}
# Concatenate everything inside a matrix
final_matrix <- matrix(c(all_values, pvalues), ncol = 2)
# Name the columns and rows for a nice display
colnames(final_matrix) <- c("value", "p-value")
rownames(final_matrix) <- all_names
# Return the whole matrix
return(final_matrix)
}
# This function supports regression two explanatory variables. The data can be provided following two different formats; a contingency-like table or as a list of experiments (see the examples in the README).
# @param formula: Relation between the response and explanatory variables, must follow the syntax a~b*c. b must be the main explanatory variable corresponding to the thetas
# @param data: Data frame that contains the data used for the regression
# @param include_pure_etas: Whether to also display the values corresponding to the etas (i.e. the second variable) WITHOUT the interaction term
# @return: A named matrix with all the coefficients and their corresponding p-values
regression_multi_variables <- function(formula, data, include_pure_etas=FALSE){
regression_results <- vglm(formula, family = cumulative(parallel = TRUE), data=data)
nb_unique <- length(regression_results@misc$colnames.x)
# Extract all necessary information if the data is provided as a contingency-like table (this if) or if the data is provided as a a table with one trial per row (in else)
if (lengths(regression_results@assign[2]) == 1){
number_total_single_effects <- which(unlist(gregexpr(":", names(regression_results@assign))) != -1)[1]
number_effect_variable1 <- length(grep(names(regression_results@assign)[number_total_single_effects-1], names(regression_results@assign)[number_total_single_effects:nb_unique])) + 1
number_effect_variable2 <- number_total_single_effects - number_effect_variable1
effect_variable1_names <- names(regression_results@assign)[2:number_effect_variable1]
effect_variable2_names <- names(regression_results@assign)[number_effect_variable1+1:number_total_single_effects-1]
} else {
number_effect_variable1 <- lengths(regression_results@xlevels[1])
number_effect_variable2 <- lengths(regression_results@xlevels[2])
number_total_single_effects <- number_effect_variable1 + number_effect_variable2
effect_variable1_names <- unlist(unname(no_contingency@xlevels[1]))[2:number_effect_variable1]
effect_variable2_names <- unlist(unname(no_contingency@xlevels[2]))[2:number_effect_variable2]
}
# Get the number of output
output_number <- length(regression_results@coefficients)
# Outcome numbers
outcome_numbers <- length(regression_results@extra$colnames.y)
pure_effect_coefficents <- regression_results@coefficients[outcome_numbers:(outcome_numbers+number_total_single_effects-3)]
mixed_effect_coefficients <- regression_results@coefficients[(outcome_numbers+number_total_single_effects-2):output_number]
# Name given to each level (except the one taken as reference)
pure_effect_names <- names(pure_effect_coefficents)
mixed_effect_names <- names(mixed_effect_coefficients)
pure_effect_values <- unname(pure_effect_coefficents)
if (include_pure_etas == FALSE)
{
effect_names <- pure_effect_names[1:(number_effect_variable1-1)]
effect_values <- pure_effect_values[1:(number_effect_variable1-1)]
}
else
{
effect_names <- c(pure_effect_names)
effect_values <- c(pure_effect_values)
}
# Create a vector initialized with the same values as the effect of each level against the reference
all_values <- effect_values
# Vector initialized with the same of each level (except the reference)
all_names <- effect_names
# Get the pvalues of the coefficients for each level (except the reference)
pure_effect_pvalues <- unname(summary(regression_results)@coef3[outcome_numbers:(outcome_numbers+number_total_single_effects-3), 4])
if (include_pure_etas==TRUE)
{
pvalues <- c(pure_effect_pvalues)
}
else {
pvalues <- pure_effect_pvalues[1:(number_effect_variable1-1)]
}
# Get the variance covariance values for the effect coefficients
# For pure effects only
cov_mat_pure = vcov(regression_results)[outcome_numbers:(outcome_numbers+number_total_single_effects-3), outcome_numbers:(outcome_numbers+number_total_single_effects-3)]
# For interaction terms only
cov_mat_mixed = vcov(regression_results)[(outcome_numbers+number_total_single_effects-2):output_number, (outcome_numbers+number_total_single_effects-2):output_number]
# Covariance between pure and interaction terms
cov_mat_cross = vcov(regression_results)[outcome_numbers:(outcome_numbers+number_total_single_effects-3), (outcome_numbers+number_total_single_effects-2):output_number]
# For each pairwise difference in first variable compute the value and associated p-value of the effect
for (index_ref in 1:(number_effect_variable1-2))
{
for (second_index in (index_ref+1):(number_effect_variable1-1))
{
# Append the difference between two effects
all_values <- c(all_values, effect_values[index_ref] - effect_values[second_index])
# Get the corresponding name
all_names <- c(all_names, paste(effect_names[index_ref], "-", effect_names[second_index]))
# Compute the z^2 statistics that allows to get the corresponding pvalues
zsqaure <- (tail(all_values, n=1))**2/(cov_mat_pure[index_ref, index_ref] + cov_mat_pure[second_index, second_index] - 2*cov_mat_pure[index_ref, second_index])
# Append the pvalue
pvalues <- c(pvalues, 1 - pchisq(zsqaure, 1))
}
}
# Do the same for etas if required
if (include_pure_etas == TRUE){
for (index_ref in (number_effect_variable1+1):(number_effect_variable2-2))
{
for (second_index in (index_ref+1):(number_effect_variable2-1))
{
# Append the difference between two effects
all_values <- c(all_values, effect_values[index_ref] - effect_values[second_index])
# Get the corresponding name
all_names <- c(all_names, paste(effect_names[index_ref], "-", effect_names[second_index]))
# Compute the z^2 statistics that allows to get the corresponding pvalues
zsqaure <- (tail(all_values, n=1))**2/(cov_mat_pure[index_ref, index_ref] + cov_mat_pure[second_index, second_index] - 2*cov_mat_pure[index_ref, second_index])
# Append the pvalue
pvalues <- c(pvalues, 1 - pchisq(zsqaure, 1))
}
}
}
# Simple interaction between single theta and single eta
for (var1_index in 1:(number_effect_variable1-1))
{
for (var2_index in 1:(number_effect_variable2-1))
{
combined_index_mixed <- match(paste(pure_effect_names[var1_index], pure_effect_names[number_effect_variable1-1 + var2_index], sep=":"), mixed_effect_names)
# Append the sum between two effects
all_values <- c(all_values, pure_effect_coefficents[number_effect_variable1-1 + var2_index] + mixed_effect_coefficients[combined_index_mixed])
# Get the corresponding name
all_names <- c(all_names, paste(pure_effect_names[var1_index], pure_effect_names[number_effect_variable1-1 + var2_index], sep=":"))
# Compute the z^2 statistics that allows to get the corresponding pvalues
zsqaure <- (tail(all_values, n=1))**2/(cov_mat_pure[number_effect_variable1-1 + var2_index, number_effect_variable1-1 + var2_index] + cov_mat_mixed[combined_index_mixed, combined_index_mixed] + 2*cov_mat_cross[number_effect_variable1-1 + var2_index, combined_index_mixed])
# Append the pvalue
pvalues <- c(pvalues, 1 - pchisq(zsqaure, 1))
}
}
# Interaction between difference of two thetas for a single eta
for (index_ref in 1:(number_effect_variable1-2))
{
for (second_index in (index_ref+1):(number_effect_variable1-1))
{
for (var2_index in 1:(number_effect_variable2-1))
{
combined_index_mixed_first <- match(paste(pure_effect_names[index_ref], pure_effect_names[number_effect_variable1-1 + var2_index], sep=":"), mixed_effect_names)
combined_index_mixed_second <- match(paste(pure_effect_names[second_index], pure_effect_names[number_effect_variable1-1 + var2_index], sep=":"), mixed_effect_names)
# Append the difference between two effects
all_values <- c(all_values, mixed_effect_coefficients[combined_index_mixed_first] - mixed_effect_coefficients[combined_index_mixed_second])
# Get the corresponding name
all_names <- c(all_names, paste(paste(effect_names[index_ref], effect_names[second_index], sep="-"), pure_effect_names[number_effect_variable1-1 + var2_index], sep=":"))
# Compute the z^2 statistics that allows to get the corresponding pvalues
zsqaure <- (tail(all_values, n=1))**2/(cov_mat_mixed[combined_index_mixed_first, combined_index_mixed_first] + cov_mat_mixed[combined_index_mixed_second, combined_index_mixed_second] - 2*cov_mat_mixed[combined_index_mixed_first, combined_index_mixed_second])
# Append the pvalue
pvalues <- c(pvalues, 1 - pchisq(zsqaure, 1))
}
}
}
# Concatenate everything inside a matrix
final_matrix <- matrix(c(all_values, pvalues), ncol = 2)
# Name the columns and rows for a nice display
colnames(final_matrix) <- c("value", "p-value")
rownames(final_matrix) <- all_names
# Return the whole matrix
return(final_matrix)
}