-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04_Run_simulations.R
188 lines (161 loc) · 7.99 KB
/
04_Run_simulations.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#################################################################
##
# Script 4: Estimating the prevalence of anaemia attributable to hookworm as a function
# of infection prevalence.
##
#
#Author: Veronica Malizia
#Created: July 2021
#
#The script contains the code to load the outcomes from Stan (i.e. Bayesian posterior distributions
#of model's parameters, obtained by the model model fit to the Uganda data) and run the model to
#simulate different endimicity scenarios.
#
#Required input file in the Data directory:
# "Fit.posteriors.RData", the outcome of the model fit through Stan: it contains the
# posterior distributions of the fitted parameters and the posterior predictive checks (PPC)
#
# Output file in Data directory:
# "Simulations_results.RData", outcome of simulations, including prevalence measures for each endemicity scenario
#################################################################
rm(list = ls())
library(tidyverse)
library(readr)
library(scales)
library(truncnorm)
library(rstan)
library(EnvStats)
library(foreach)
library(doParallel)
#Handy functions
ci <- function(X){
##computes the 95% confidence interval of a vector, removing NA's
return(quantile(X, c(0.025, 0.975), na.rm=TRUE))
}
#Load input file from Data folder
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #Set to current source directory
load("Data/Fit.posteriors.RData")
#Set parameters for simulations
N <- 1000 #Simulate 1000 individuals per each scenario
beta0 = 10000/24 #Mean saturation level for egg production
#Defining endemicity scenarios
shapes = c(0.3, 0.5, 0.7) #Varying values for the shape parameter of the distribution of worm burdens in infected individuals
mean_worms = c(1, 3*1:4) #Varying values for the average worm burden in the population
rhos = seq(0.1, 0.9, 0.2) #Varying values for the rho parameter defining the proportion of individuals having zero worms
scenarios <- expand.grid(shape = shapes, rho=rhos, mean_fworms = mean_worms)
scenarios <- scenarios %>%
mutate(scale = mean_fworms/(rho*gamma(1+1/shape))) #scale parameter of the distribution of worm burdens in infected individuals
#Add the scenario resulting from the fit and approximating the Uganda dataset.
#(For plotting purposes)
scenarios <- rbind(scenarios, c(mean(bestfit$shape),
mean(bestfit$rho),
mean(bestfit$rho)*mean(bestfit$scale)*gamma(1+1/mean(bestfit$shape)),
mean(bestfit$scale)))
#Defining functions for:
#computing expected egg counts given x=worm load
compute_ec <- function(x, a, b){
ind_sui <- rgamma(1, 50, 50)
beta <- b*ind_sui
return((a*x) / (1 + ((a*x) /beta)))
}
#Generating worm burdens and egg counts distributions for each scenario
w <- array(dim = c(nrow(scenarios), N))
ec <- array(dim = c(nrow(scenarios), N))
for(k in 1:nrow(scenarios)){
for(j in 1:N){
tmp <- rweibull(1, shape=scenarios$shape[k], scale=scenarios$scale[k])*(1 - rbernoulli(1, scenarios$rho[k]))
w[k, j] <- tmp
ec[k, j] <- compute_ec(tmp/2, 200/24, beta0)
}
}
#Starting simulations
s <- 5000 #sample size from the posterior
writeLines(c(""), "Sink.txt") #initiate log file
#Launching simulations. It takes approximately
cluster <- makeCluster(min(parallel::detectCores(logical = FALSE), nrow(scenarios)))
registerDoParallel(cluster)
results <- foreach(k = 1:nrow(scenarios), #k identifies the scenario
.inorder = TRUE,
.errorhandling = "remove",
#.combine = bind_rows,
.packages = c("tidyverse")) %dopar% {
#for each scenario, take a random sample of size s from the posterior:
index <- sample(20000, s)
hk.prev1 <- vector("numeric", s)
MH.prev1 <- vector("numeric", s)
hk.prev2 <- vector("numeric", s)
MH.prev2 <- vector("numeric", s)
hk.prev4 <- vector("numeric", s)
MH.prev4 <- vector("numeric", s)
an.prev <- vector("numeric", s)
b.an.prev <- vector("numeric", s)
for(n in 1:s){
sink("Sink.txt", append=TRUE)
cat(paste(Sys.time(), ": Starting scenario", k, "post draw", n, "\n", sep = " "))
sink()
i <- index[n] #i identifies the draw from the posterior
#extracting i-th row of the posterior distribution
theta <- list(k_e=bestfit$k_e[i],
log_mu0=bestfit$log_mu0[i],
sigma=bestfit$sigma[i],
phi1=bestfit$phi1[i],
phi0=bestfit$phi0[i])
#Simulating observed egg counts & epg
#j identifies the individuals
#k identifies the scenarios
epg1 <- c()
epg2 <- c()
epg4 <- c()
reduction <- c()
for(j in 1:N){
eggs <- rnbinom(4, mu = ec[k, j], size = theta$k_e)
epg1 <- c(epg1, round(mean(eggs[1])/0.0417))
epg2 <- c(epg2, round(mean(eggs[1:2])/0.0417))
epg4 <- c(epg4, round(mean(eggs)/0.0417))
if(w[k, j]==0)
reduction <- c(reduction, 0)
else
reduction <- c(reduction,
log(1 + exp(theta$phi1*log(w[k, j]) - theta$phi0)))
}
#Saving infection prevalence
hk.prev1[n] <- length(which(epg1>0))/N
MH.prev1[n] <- length(which(epg1>2000))/N
hk.prev2[n] <- length(which(epg2>0))/N
MH.prev2[n] <- length(which(epg2>2000))/N
hk.prev4[n] <- length(which(epg4>0))/N
MH.prev4[n] <- length(which(epg4>2000))/N
#Simulating hb concentrations in the population
hb <- rlnorm(N, (theta$log_mu0 - reduction), theta$sigma)
hb0 <- hb * exp(reduction)
#Saving anaemia prevalence
an.prev[n] <- length(which(hb<120))/N
b.an.prev[n] <- length(which(hb0<120))/N
}
#Collate results
list(hk.prev1=hk.prev1,
MH.prev1=MH.prev1,
hk.prev2=hk.prev2,
MH.prev2=MH.prev2,
hk.prev4=hk.prev4,
MH.prev4=MH.prev4,
an.prev=an.prev,
b.an.prev=b.an.prev)
}
stopCluster(cluster)
#Inspecting results
#Computing summary statistics (mean and BCI) of prevalences, for each endemicity scenario
#Hookworm infection prevalences
mean.prev <- lapply(results, function(x) sapply(x, mean))
mean.prev <- do.call(bind_rows, mean.prev)
#Prevalence of anaemia attributable to hookworm
excess.an <- lapply(results, function(x) x$an.prev - x$b.an.prev)
EX.AN <- sapply(excess.an, mean)
EX.AN.ci <- sapply(excess.an, ci)
#Preparing results for plotting
to.plot <- bind_cols(scenarios[1:4], mean.prev, EX.AN=EX.AN,
EX.AN.lo=EX.AN.ci[1,], EX.AN.hi=EX.AN.ci[2,])
to.plot <- to.plot %>%
gather(scheme, hk.prev, hk.prev1:MH.prev4, factor_key = T)
#Saving simulations results to Data folder
save(to.plot, file = "Data/Simulations_results.RData")