-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.r
190 lines (161 loc) · 11.4 KB
/
main.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# Set directory where results will be saved
mainDir <- "~/GitHub/dep-model-AJPM/"
setwd(file.path(mainDir))
library(openxlsx)
library(splines)
load("depprevs_2005-2017.rda") # load NSDUH data for calibration
# If running the fully calibrated model (no need for additional estimation of parameters), use this line of code:
allparamsF = read.xlsx(paste0(mainDir, "parameters.xlsx"),sheet=paste0("dep_females_nocalib"),rowNames=TRUE,colNames=TRUE) # Adjust parameters in this excel file with estimates to be used in the model
allparamsM = read.xlsx(paste0(mainDir, "parameters.xlsx"),sheet=paste0("dep_males_splines"),rowNames=TRUE,colNames=TRUE)
# If performing calibration of underreporting and incidence parameters use this line of code:
# allparamsF = read.xlsx(paste0(mainDir, "parameters.xlsx"),sheet=paste0("dep_females_splines"),rowNames=TRUE,colNames=TRUE)
# allparamsM = read.xlsx(paste0(mainDir, "parameters.xlsx"),sheet=paste0("dep_males_splines"),rowNames=TRUE,colNames=TRUE)
# If calibrationg increased incidence probabilities starting in year_SF (for 18-25 year olds), use this line of code:
# allparamsF = read.xlsx(paste0(mainDir, "parameters.xlsx"),sheet=paste0("dep_females_incSF"),rowNames=TRUE,colNames=TRUE)
# allparamsM = read.xlsx(paste0(mainDir, "parameters.xlsx"),sheet=paste0("dep_males_incSF"),rowNames=TRUE,colNames=TRUE)
## FEMALES
paramsF = as.vector(subset(allparamsF,bhat==1)[['estimate']]) # Parameters where bhat = 1 can be estimated by bhat package in 'calibration.R'
paramsnamesF = rownames(subset(allparamsF,bhat==1)) # names of parameters
lowervectorF = as.vector(subset(allparamsF,bhat==1)[['lower']]) # lower bounds for bhat parameters
uppervectorF = as.vector(subset(allparamsF,bhat==1)[['upper']]) # upper bounds for bhat parameters
## MALES
paramsM = as.vector(subset(allparamsM,bhat==1)[['estimate']])
paramsnamesM = rownames(subset(allparamsM,bhat==1))
lowervectorM = as.vector(subset(allparamsM,bhat==1)[['lower']])
uppervectorM = as.vector(subset(allparamsM,bhat==1)[['upper']])
startyear = 1900 # burn-in period starting point
endyear = 2017 # 2017
startage = 0
endage = 99
Ny= endyear - startyear + 1
Na= endage - startage + 1
emptycompartment <- matrix(0, nrow = Na, ncol = Ny, dimnames=list(c(startage:endage),c(startyear:endyear))) # create matrix of zeroes for compartments
agerownames<-c("18to25", "26to34", "35to49", "50to64", "65plus", "total")
agegroupstart <- c(18,26,35,50,65,18)
agegroupend <- c(25,34,49,64,99,99)
# Get model prevs by age group --------------------------------------------
getmodelprevs <- function(numerator,denominator){
numerator = as.data.frame(numerator)
denominator = as.data.frame(denominator)
prevs = NULL
for (a in c(1:length(agegroupstart))) {
prevs <- rbind(prevs,colSums(numerator[c((agegroupstart[a]+1):(agegroupend[a]+1)), ],na.rm=TRUE)/
colSums(denominator[c((agegroupstart[a]+1):(agegroupend[a]+1)), ],na.rm=TRUE))
}
row.names(prevs)<-agerownames
return(prevs)
}
# Main model --------------------------------------------------------------
main <- function(getmodelprevs, whichgender, allparamsF, paramsF,paramsnamesF, year_SF){
# model parameters are retrieved from the parameters.xlsx file, if they are bhat=1, then they are read in as a params vector for bhat estimation
RRdepr_death = c(rep(1,17),rep(ifelse(allparamsF["RRdepr_death","bhat"]==0,allparamsF["RRdepr_death","estimate"],paramsF[match("RRdepr_death",paramsnamesF)]),Na-18))
death_rate = read.xlsx(paste0(mainDir, "hmd_death_rates.xlsx"),sheet=paste0("hmd_",whichgender),rowNames=TRUE, colNames=TRUE, check.names=FALSE)
pop = read.xlsx(paste0(mainDir, "/usproj2000-2050.xlsx"),sheet=whichgender,rowNames=TRUE, colNames=TRUE, check.names=FALSE)
dep1inc = read.xlsx(paste0(mainDir, "incidence_eaton.xlsx"),sheet=whichgender,rowNames=TRUE, colNames=FALSE, check.names=FALSE)
# scale all recovery rates by the scaling factor deprecov_SF
dep1recov_rate = c(rep(ifelse(allparamsF["dep1recov_rate","bhat"]==0,allparamsF["dep1recov_rate","estimate"],paramsF[match("dep1recov_rate",paramsnamesF)]),Na-1))
deprecov_SF = matrix(1,99,1)
deprecov_rate = dep1recov_rate*deprecov_SF
# recovery rates for depression are the same regardless of whether 1st or subsequent depressive episode
deprinc = rep(ifelse(allparamsF["depr_inc","bhat"]==0,allparamsF["depr_inc","estimate"],paramsF[match("depr_inc",paramsnamesF)]),Na-1)
deprinc_SF = matrix(1,99,1)
depr_inc = deprinc * deprinc_SF
depinc1 = ifelse(allparamsF["depinc1","bhat"]==0,allparamsF["depinc1","estimate"],paramsF[match("depinc1",paramsnamesF)])
depinc2 = ifelse(allparamsF["depinc2","bhat"]==0,allparamsF["depinc2","estimate"],paramsF[match("depinc2",paramsnamesF)])
depinc3 = ifelse(allparamsF["depinc3","bhat"]==0,allparamsF["depinc3","estimate"],paramsF[match("depinc3",paramsnamesF)])
# scale all incidence rates by scaling factor starting in year_SF
inc_SF = ifelse(allparamsF["inc_SF","bhat"]==0,allparamsF["inc_SF","estimate"],paramsF[match("inc_SF",paramsnamesF)])
if (whichgender=="females"){
MP=ns(0:21,knots=c(13,18)) ### Matrix of X's
Rps=predict(MP,21)[1,] ## Predicts y value given a set of X's for age 22 # 0.0051285304 anchor at age 22
y=c()
for (j in 0:21){
y=c(y,0.0051285304*exp(sum((MP[j,]-Rps)*c(depinc1,depinc2,depinc3)))) ## multiply by coefficients and sum , exp makes it positive for incidence
}
dep1_inc=dep1inc
dep1_inc[0:22,]<-y
dep1_inc[0:12,]<-rep(0,12) # assumes no 1st MDE before age 12
scaleddep1_inc <- dep1_inc*inc_SF # multiply all incidence probabilities by scaling factor
}
if (whichgender=="males"){
MP=ns(0:28,knots=c(13,18))
Rps=predict(MP,28)[1,] ## Predicts y value given a set of X's for age 22 # 0.0019072600 anchor at age 29
y=c()
for (j in 0:28){
y=c(y,0.0019072600*exp(sum((MP[j,]-Rps)*c(depinc1,depinc2,depinc3)))) ## multiply by coefficients and sum , exp makes it positive for incidence
}
dep1_inc=dep1inc
dep1_inc[0:29,]<-y
dep1_inc[0:12,]<-rep(0,12) # assumes no 1st MDE before age 12
scaleddep1_inc <- dep1_inc*inc_SF # multiply all incidence probabilities by scaling factor
}
# Age-group categorical underreporting probabilities
forget1 = ifelse(allparamsF["forget1","bhat"]==0,allparamsF["forget1","estimate"],paramsF[match("forget1",paramsnamesF)])
forget2 = ifelse(allparamsF["forget2","bhat"]==0,allparamsF["forget2","estimate"],paramsF[match("forget2",paramsnamesF)])
forget3 = ifelse(allparamsF["forget3","bhat"]==0,allparamsF["forget3","estimate"],paramsF[match("forget3",paramsnamesF)])
forget4 = ifelse(allparamsF["forget4","bhat"]==0,allparamsF["forget4","estimate"],paramsF[match("forget4",paramsnamesF)])
forget5 = ifelse(allparamsF["forget5","bhat"]==0,allparamsF["forget5","estimate"],paramsF[match("forget5",paramsnamesF)])
forget = matrix(NA,99,1)
forget[1:17] = rep(0,17)
forget[18:25] = forget1
forget[26:34] = forget2
forget[35:49] = forget3
forget[50:64] = forget4
forget[65:99] = forget5
# Compartments / state variables ------------------------------------------
# Initialize population - Model compartments are organized with age 0-99 as rows, and year as columns
matrix.names<-c('nevdeptrue', 'dep1','hdepr','depr','forgot')
for (name in matrix.names) assign(name,emptycompartment)
nevdeptrue[paste(startage),1:Ny] <- as.matrix(pop[paste(startage),1:Ny]) # Takes empty compartment and populates the top row of the matrix with the number of 0-yrolds
for (y in c((startyear+1):(year_SF-1))){
py = paste(y - 1)
nevdeptrue[2:Na,paste(y)] <- nevdeptrue[1:Na-1,py]*(1-dep1_inc[(startage+1):(endage),])*(1-death_rate[(startage+1):endage,py])
dep1[2:Na,paste(y)] <- dep1[1:Na-1,py]*(1-deprecov_rate)*(1-RRdepr_death*death_rate[(startage+1):endage,py]) + (dep1_inc[(startage+1):(endage),])*nevdeptrue[1:Na-1,py]
hdepr[2:Na,paste(y)] <- hdepr[1:Na-1,py]*(1-depr_inc[(startage+1):(endage),])*(1-RRdepr_death*death_rate[(startage+1):endage,py])*(1-forget[(startage+1):(endage)]) + (deprecov_rate)*dep1[1:Na-1,py] + (deprecov_rate)*depr[1:Na-1,py]
depr[2:Na,paste(y)] <- depr[1:Na-1,py]*(1-deprecov_rate)*(1-RRdepr_death*death_rate[(startage+1):endage,py]) + (depr_inc[(startage+1):(endage),])*hdepr[1:Na-1,py] + (depr_inc[(startage+1):(endage),])*forgot[1:Na-1,py]
forgot[2:Na,paste(y)] <- forgot[1:Na-1,py]*(1-depr_inc[(startage+1):(endage),])*(1-RRdepr_death*death_rate[(startage+1):endage,py])+(forget[(startage+1):(endage)])*hdepr[1:Na-1,py]
}
for (y2 in c(year_SF:endyear)){
py2 = paste(y2 - 1)
nevdeptrue[2:Na,paste(y2)] <- nevdeptrue[1:Na-1,py2]*(1-scaleddep1_inc[(startage+1):(endage),])*(1-death_rate[(startage+1):endage,py2])
dep1[2:Na,paste(y2)] <- dep1[1:Na-1,py2]*(1-deprecov_rate)*(1-RRdepr_death*death_rate[(startage+1):endage,py2]) + (scaleddep1_inc[(startage+1):(endage),])*nevdeptrue[1:Na-1,py2]
hdepr[2:Na,paste(y2)] <- hdepr[1:Na-1,py2]*(1-depr_inc[(startage+1):(endage),])*(1-RRdepr_death*death_rate[(startage+1):endage,py2])*(1-forget[(startage+1):(endage)]) + (deprecov_rate)*dep1[1:Na-1,py2] + (deprecov_rate)*depr[1:Na-1,py2]
depr[2:Na,paste(y2)] <- depr[1:Na-1,py2]*(1-deprecov_rate)*(1-RRdepr_death*death_rate[(startage+1):endage,py2]) + (depr_inc[(startage+1):(endage),])*hdepr[1:Na-1,py2] + (depr_inc[(startage+1):(endage),])*forgot[1:Na-1,py2]
forgot[2:Na,paste(y2)] <- forgot[1:Na-1,py2]*(1-depr_inc[(startage+1):(endage),])*(1-RRdepr_death*death_rate[(startage+1):endage,py2])+(forget[(startage+1):(endage)])*hdepr[1:Na-1,py2]
}
# Get population counts for each subpopulation ----------------------------
nevdep = nevdeptrue + forgot
totalpop = nevdep+dep1 + hdepr + depr
deppop = dep1+depr
everdeppop = dep1 + hdepr + depr # does not account for recall error
everdeppoptrue = dep1 + hdepr + depr + forgot # adjusts for recall error
d1 <- getmodelprevs(nevdep,totalpop) # never MDE prevalence
d2 <- getmodelprevs(deppop,totalpop) # current MDE prevalence
d3 <- getmodelprevs(hdepr,totalpop) # former MDE prevalence
e1 <- getmodelprevs(everdeppop,totalpop) # lifetime MDE prevalence (no recall error adjustment)
f1 <- getmodelprevs(forgot,totalpop) # prevalence of people with past MDE who 'forget' and transition to nevdep
f2 <- getmodelprevs(forgot,nevdep)
n1 <- getmodelprevs(nevdeptrue,totalpop)
n2 <- getmodelprevs(nevdeptrue,nevdep)
d4 <- getmodelprevs(dep1, totalpop)
d5 <- getmodelprevs(depr,totalpop)
incidence <- data.frame(cbind(dep1inc[-1,],dep1_inc[-1,],scaleddep1_inc[-1,],deprinc,deprinc_SF,data.frame(depr_inc),c(1:99)))
colnames(incidence) <- c("dep1inc","splines","scaleddep1inc_yr", "deprinc","SF_depr", "scaledrates_depr", "age")
if (whichgender=="females"){
incidence$dep1inc[0:21]<- NA
incidence$dep1inc[68:99]<- NA
}
if (whichgender=="males"){
incidence$dep1inc[0:28]<- NA
incidence$dep1inc[59:99]<- NA
}
recovery <- data.frame(cbind(dep1recov_rate, deprecov_SF,deprecov_rate,c(1:99)))
colnames(recovery) <- c("dep1recov","SF","scaledrates","age")
forget <- data.frame(cbind(forget,c(1:99)))
colnames(forget) <- c("forget_prob","age")
modeldepprevdata <- list(d1,d2,d3,e1,d4,d5,incidence,recovery,forget,f1,f2,n1,n2, everdeppoptrue, totalpop)
return(modeldepprevdata)
}
# Run the model
outF = main(getmodelprevs, "females", allparamsF, paramsF,paramsnamesF, 2016) # runs model using parameters specified in excel sheet OR using bhat estimates
outM = main(getmodelprevs, "males", allparamsM, paramsM,paramsnamesM, 2016)