-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathi-FWER.R
152 lines (132 loc) · 5.51 KB
/
i-FWER.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# i-FWER
# P: vector of p-values of length n
# x: side information
# alpha: target FWER level
# mask_fun: name of the masking function
# mask_para: a vector of parameters in the masking function
# eg. [p_l, p_u] for gap function
i_FWER = function(P, x, alpha, mask_fun, mask_para, S_model = TRUE, d = 5, delta = 0.05){
# generate masked p-values and missing bits
h_g = mask_gen(P, mask_fun, mask_para); h = h_g$h; g = h_g$g
if(mask_fun == "gap") {mask_ind = !(h == 0)}
n = length(P)
rej_ind = rep(TRUE, n) ## start with all the hypotheses in the rejection set
r_neg = sum(h[rej_ind] == -1)
fwer_est = ifelse(mask_fun %in% c("tent", "railway"),
1 - (1 - mask_para)^(r_neg + 1),
1 - (1 - mask_para[1]/(mask_para[1] + 1 - mask_para[2]))^(r_neg + 1))
iter = 0
while (fwer_est > alpha & any(rej_ind)) {
masked_P = g*rej_ind + P*(1 - rej_ind)
if (S_model) {
if (iter %% 100 == 0) { ## update S score every 100 iterations
if (mask_fun %in% c("tent", "railway")) {
S = em_mixture(masked_P, x, rej_ind, mask_fun, mask_para)
} else {
S = em_mixture(masked_P, x, rej_ind & mask_ind, mask_fun, mask_para)
}
}
} else {
S = -masked_P
}
rej_ind = exclu_search(S, x, rej_ind, d, delta)
r_neg = sum(h[rej_ind] == -1)
fwer_est = ifelse(mask_fun %in% c("tent", "railway"),
1 - (1 - mask_para)^(r_neg + 1),
1 - (1 - mask_para[1]/(mask_para[1] + 1 - mask_para[2]))^(r_neg + 1))
iter = iter + 1
}
rejections = (rej_ind & h == 1)
return(rejections)
}
# masked p-value generator
# P: vector of p-values of length n
# mask_fun: name of the masking function
# mask_para: a vector of parameters in the masking function
mask_gen = function(P, mask_fun, mask_para){
if (mask_fun == "tent"){
h <- 1*(P < mask_para) + (-1)*(P >= mask_para)
g <- pmin(P, mask_para/(1 - mask_para)*(1 - P))
} else if (mask_fun == "railway"){
h <- 1*(P < mask_para) + (-1)*(P >= mask_para)
g <- P*(P < mask_para) +
mask_para/(1 - mask_para)*(P - mask_para)*(P >= mask_para)
} else if (mask_fun == "gap"){
h <- 1*(P < mask_para[1]) + (-1)*(P > mask_para[2])
g <- P*(P < mask_para[1]) +
P*(P >= mask_para[1] & P <= mask_para[2]) +
mask_para[1]/(1 - mask_para[2])*(1 - P)*(P > mask_para[2])
}
return(list(h = h, g = g))
}
# EM algorithm under mixture model to get the non-null likelihood score
# masked_P: vector of masked p-value information
# x: side information
# rej_ind: vector of indicators for whether a hypothesis is in the rejection set
# mask_fun: name of the masking function
# mask_para: a vector of parameters in the masking function
# iter: number of EM iterations, default to five
em_mixture = function(masked_P, x, rej_ind, mask_fun, mask_para, iter = 5, df = 3){
masked_P[masked_P == 0] = 10^(-8) ## avoid Inf in calculation
masked_Z = qnorm(1 - masked_P) ## translate p-values into Z-scores
if (mask_fun == "tent") {
inv_Z = qnorm((1 - mask_para)/mask_para*(1 - pnorm(masked_Z)))
} else if (mask_fun == "railway") {
inv_Z = qnorm((1 - mask_para)/mask_para*(pnorm(masked_Z) - 1 + mask_para))
} else if (mask_fun == "gap") {
inv_Z = qnorm((1 - mask_para[2])/mask_para[1]*(1 - pnorm(masked_Z)))
}
inv_Z[!rej_ind] = 0
pi_set = rep(0.1, length(masked_P)); mu = 1 ## initial values for parameters in the EM algorithm
for (i in 1:iter) {
mu = mask_para %>% ifelse(mask_fun %in% c("tent", "railway"), ., .[1]) %>%
max(mu, qnorm(1 - .) + 0.5)
# likelihood of each case with respect to the latent labels w and q
a = pi_set*dnorm(masked_Z - mu); b = (1 - pi_set)*dnorm(masked_Z)
c = pi_set*dnorm(inv_Z - mu); d = (1 - pi_set)*dnorm(inv_Z)
# update latent labels: w, q
w = ifelse(rej_ind, 1/(1 + (c + d)/(a + b)), 1)
q = ifelse(rej_ind, 1/(1 + (b + d)/(a + c)), 1/(1 + b/a))
# update parameters: pi_set, mu
phi_x = bs(x[,1], df = df); phi_y = bs(x[,2], df);
phi = phi_x[,rep(1:df, each = df)] * phi_y[,rep(1:df, times = df)]
pi_set = glm(q~phi, family = quasibinomial())$fitted.values
mu = sum(q*w*masked_Z + q*(1 - w)*inv_Z)/sum(q)
}
return(q)
}
# update rejection set under clustered non-null structure
# S: non-null likelihood score
# x: side information
# rej_ind: vector of indicators for whether a hypothesis is in the rejection set
# d: number of cones
# delta: proportion of hypothesis to be excluded in one cone
exclu_search = function(S, x, rej_ind, d, delta){
if (sum(rej_ind) == 1) {
rej_ind[rej_ind] = FALSE
} else {
C = colMedians(x[rej_ind,])
# angle and distance from the center
theta = atan((x[,1] - C[1]) / (x[,2] - C[2])) * (x[,1] - C[1] >= 0 & x[,2] - C[2] >= 0) +
(atan((x[,1] - C[1]) / (x[,2] - C[2])) + pi) * (x[,2] - C[2] < 0) +
(atan((x[,1] - C[1]) / (x[,2] - C[2])) + 2*pi) * (x[,1] - C[1] < 0 & x[,2] - C[2] >= 0)
square_dist = (x[,1] - C[1])^2 + (x[,2] - C[2])^2
s_min = Inf
exclu_ind = NULL
for (j in 1:d) {
cone_ind = which(theta >= 2*pi/d*(j - 1) & theta < 2*pi/d*j & rej_ind)
if (length(cone_ind) > 0){
cand_ind = order(square_dist[cone_ind], decreasing = TRUE) %>%
{.[1:round(delta*length(cone_ind))]} %>%
cone_ind[.]
s = mean(S[cand_ind])
if(s < s_min){
s_min = s
exclu_ind = cand_ind
}
}
}
rej_ind[exclu_ind] = FALSE
}
return(rej_ind)
}