-
Notifications
You must be signed in to change notification settings - Fork 0
/
sim-concord-4.R
147 lines (119 loc) · 4.21 KB
/
sim-concord-4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#### Simulate data from various causal models of the concordance project
#### Model 4: add a noisy measurement of US
rbern_logit <- function(N, x)
{
logp <- ifelse(x < 0,
x - log1p(exp(x)),
-log1p(exp(-x)))
p <- exp(logp)
rbinom(N, 1, p)
}
## l: a matrix of logit values with N rows, k columns
rmulti_logit <- function(N, l)
{
unprob <- exp(l)
sprob <- apply(unprob, 1, sum)
prob <- sweep(unprob, 1, sprob, '/') # N x k matrix of probs.
r <- runif(N)
rslt <- rep(NA_integer_, N)
for(k in seq(1, ncol(l))) {
sel <- r > 0 & r <= prob[,k]
rslt[sel] <- k
r <- r - prob[,k]
}
stopifnot(!any(is.na(rslt)))
rslt
}
## parms: a named list of model parameters
sim_concord_4 <- function(N, parms = NULL)
{
default_parms <- list(
ac = 0.0, # baseline concordance
ad = -1.0, # baseline dual eligible
ao = -2.3, # baseline outcome (p(death))
bspc = c(-2, 0, 0, 0), # specialty effects on concordance
bwc = -0.5, # weekend effect on concordance
bsu = 0, # sex effect on SES (assume none, but leave possibility open)
bhu = -0.5, # HNW effect on SES (with ad=bd=-1, this implies ~38% of HNW will be DE.)
sigus = 1.2, # RAndom component of SES
bua = -0.25, # SES effect on OSH (higher SES -> lower OSH)
bud = -1.0, # unobserved socioeconomic status effect on dual eligibility
# (low values of latent SES associated with dual elig.)
sigi = 0.85, # Noise level in the proxy for sigus. Default is half the variance
# of the default sigi
## Direct effect on outcomes. +ve = higher p(death)
bspo = c(1.0, 0.0, 0.0, 0.0), # spec = 1 effect on p(death)
bwo = 0.1, # weekend effect on p(death)
bco = -0.3, # concordance effect on p(death)
bso = 0.0, # sex effect on outcome - assume none, but leave possibility open
bho = 0.2, # HNW direct effect on outcome - this is the primary object of study
buo = 0.5, # SES effect on outcome - this is the main confounding pathway
## Admission status effect on outcome. OSH is modest increase in mortality,
## Elective is big decrease, ED is baseline
bao = c(0.1, -1.0, 0),
## Root node probabilities
pspec = rep(0.25, 4), # 4 specialties, equal probability
pw = 0.15, # 15% of pts on weekends
ps = 0.5, # equal distribution of sexes
ph = 0.2, # 20% HNW
lsrc = log(c(2, 1, 7)/7) # baseline admissions source multinomial logit value: OSH, elective, ED
)
p <- default_parms
for (n in names(parms)) {
if(! n %in% names(default_parms)) {
warning('Unknown parameter: ', n)
}
p[[n]] <- parms[[n]]
}
if(length(p$pspec) != 4) {
stop('pspec must have length 4')
}
if(length(p$lsrc) != 3) {
stop('psrc must have length 3')
}
if(length(p$bao) != 3) {
stop('bao must have length 3')
}
## Start by generating the root variables
SPEC <- sample(seq(1,4), N, TRUE, p$pspec)
W <- rbinom(N, 1, p$pw)
S <- rbinom(N, 1, p$ps)
H <- rbinom(N, 1, p$ph)
## unobserved variables
us <- rnorm(N, sd=p$sigus) + p$bsu*S + p$bhu*H
## noisy measurement of us
I <- us + rnorm(N, sd=p$sigi)
## admission status
l <- matrix(rep(p$lsrc, N), nrow=N, byrow=TRUE)
l[,1] <- l[,1] + p$bua * us # SES effect on p(OSH)
Adm <- rmulti_logit(N, l)
## other non-root variables
C <- rbern_logit(N, p$ac + p$bspc[SPEC] + p$bwc*W)
D <- rbern_logit(N, p$ad + p$bud*us)
## outcome variable
po <- p$ao + p$bspo[SPEC] + p$bwo*W + p$bco*C + p$bso*S +
p$bho*H + p$buo*us + p$bao[Adm]
O <- rbern_logit(N, po)
## package everything up in a data frame. We'll include the unobserved variables,
## but using them in the modeling is verboten, unless to demonstrate the effect of bias.
list(
SPEC = SPEC,
W = W,
S = S,
H = H,
ADM = Adm,
C = C,
D = D,
I = I,
O = O,
us = us,
N=N
)
}
set.seed(867-5309)
concord_4_n1000 <- sim_concord_4(1000)
set.seed(867-5309)
concord_4_n20000 <- sim_concord_4(20000)
## Dataset 5 is the same as dataset 4, but with sigi = 1e-6
set.seed(867-5309)
concord_5_n20000 <- sim_concord_4(20000, list(sigi=1e-6))