-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRicker_mean.qmd
387 lines (344 loc) · 18 KB
/
Ricker_mean.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
---
title: "Which Ricker curve is best?"
output: html_document
editor:
markdown:
wrap: 72
---
```{r echo = FALSE, warning = FALSE, message=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message=FALSE, fig.align = 'center')
library(tidyverse)
# Functions ---------------------------------------------------------------
Ricker <- function(S, lnalpha, beta) {
S*exp(lnalpha - beta*S)
}
#Simulate a single stock
simulateSR_goal <- function(lnalpha, beta, sigW, N, goal, sigS = 0, phi = 0, power = 1, sigF = 0) {
stopifnot(length(goal) %in% 1:2)
if(length(goal) == 1){stopifnot(goal >= 0 & goal < 1)}
if(length(goal) == 2){stopifnot(goal[1] > 1 & goal[2] < (lnalpha + 0.5 * sigW * sigW) / beta & goal[2] > goal[1])}
stopifnot(power >= 0 & power <=1) # fishing power is 0 for no harvest and 1 when the fishery can fully control escapement
# ----- initial values ----- #
# initial value for S: Seq minus some harvest
S <- 900
# initial value for observed S
Shat <- S*rlnorm(1, sdlog=sigS)
# initializing all other values
redresid <- 0
E1R <- E2R <- whiteresid <- epsF <- U1 <- U2 <- U <- Ft <- H <- Rhat <- lnRhatShat <- fittedR <- NA
R <- 1 #This values in never used but needed to evaluate U
# recursive portion...
for(i in 2:(N + 50 +1)) { #added 50 (will discard later)
E1R[i] <- S[i-1]*exp(lnalpha - beta*S[i-1])
E2R[i] <- E1R[i]*exp(phi*redresid[i-1])
R[i] <- E2R[i]*rlnorm(1,0,sigW)
redresid[i] <- log(R[i]/E1R[i])
whiteresid[i] <- log(R[i]/E2R[i])
epsF[i] <- rnorm(1,0,sigF)
if(length(goal) == 2){
U1[i] <- if(R[i] > goal[1]){(R[i] - goal[1]) / R[i]} else{0} # Find U to fish to lb of goal
U2[i] <- if(R[i] > goal[2]){(R[i] - goal[2]) / R[i]} else{0} # Find U to fish to ub of goal
}
U[i] <- if(length(goal) == 1){goal} else{min(runif(1, U2[i], U1[i]), power)} # draw a harvest rate
Ft[i] <- -log(1 - U[i])*exp(epsF[i])
S[i] <- R[i]*exp(-Ft[i])
Shat[i] <- S[i]*rlnorm(1, sdlog=sigS)
H[i] <- R[i]-S[i]
Rhat[i] <- Shat[i]+H[i]
lnRhatShat[i] <- log(Rhat[i]/Shat[i])
}
return(list(S=Shat[1:N + 50],
R=Rhat[2:(N+1) + 50],
Strue=S[1:N + 50],
Rtrue=R[2:(N+1) + 50],
yield=R[2:(N+1) + 50] - S[1:N + 50],
goal = goal,
lnalpha = lnalpha,
beta = beta,
sigW = sigW))
}
#Function to find U that archives S somewhere within the goal
H_goal <- function(R, goal, power = .85, ...){
# stopifnot(length(goal) == 4)
# stopifnot(goal[1, 1] > 1 & goal[1, 2] < (lnalpha + 0.5 * sigW * sigW) / beta & goal[1, 2] > goal[1, 1])
# stopifnot(goal[2, 1] > 1 & goal[2, 2] < (lnalpha + 0.5 * sigW * sigW) / beta& goal[2, 2] > goal[2, 1])
U_lb <- U_ub <- U <- c(NA, NA)
for(j in 1:2){
U_lb[j] <- if(R[j] > goal[j, 1]){(R[j] - goal[j, 1]) / R[j]} else{0} # Find U to fish to lb of goal
U_ub[j] <- if(R[j] > goal[j, 2]){(R[j] - goal[j, 2]) / R[j]} else{0} # Find U to fish to ub of goal
U[j] <- min(runif(1, U_ub[j], U_lb[j]), power)
}
return(list(U, goal = goal, power = power))
}
#Function to find U that archives Smsy for each goal
H_Smsy <- function(R, power = 0.85){
lna <- with(parent.frame(), lnalpha)
b <- with(parent.frame(), beta)
sig <- with(parent.frame(), sigW)
lnap <- lna + 0.5 * sig * sig
Smsy <- lna / b * (0.5 - 0.07 * lna)
Smsyp <- lnap / b * (0.5 - 0.07 * lnap)
U <- c(if(R[1] > Smsy){min((R[1] - Smsy) / R[1], power)} else{0},
if(R[2] > Smsyp){min((R[2] - Smsyp) / R[2], power)} else{0})
return(list(U, power = power))
}
#Function to find U that archives S a some percentage of the goal range
H_target <- function(R, goal, target, power = .85, ...){
S_target <- U <- c(NA, NA)
S_target <- c(goal[1,1] + target * (goal[1,2] - goal[1,1]),
goal[2,1] + target * (goal[2,2] - goal[2,1]))
U <- c(if(R[1] > S_target[1]){min((R[1] - S_target[1]) / R[1], power)} else{0},
if(R[2] > S_target[2]){min((R[2] - S_target[2]) / R[2], power)} else{0})
return(list(U, goal = goal, power = power))
}
#Simulate 2 stocks w same dynamics but fished to different goals.
simSR_goals <- function(lnalpha, beta, sigW, N, sigF = 0, Hfun, ...){
# ----- initial values ----- #
# initial value for S: Seq minus some harvest
S <- matrix(c(900, rep(NA, N + 50), 900, rep(NA, N + 50)), N + 51, 2)
# initializing all other values
redresid <- 0
E1R <- E2R <- redresid <- whiteresid <- epsF <- U <- Ft <- H <- Rhat <- lnRhatShat <- fittedR <-
matrix(c(rep(NA, N + 51), rep(NA, N + 51)), N + 51, 2)
R <- matrix(c(1, rep(NA, N + 50), 1, rep(NA, N + 50)), N + 51, 2)
# recursive portion...
for(i in 2:(N + 50 + 1)) { #added 50 (will discard later)
E1R[i, ] <- S[(i-1), ]*exp(lnalpha - beta*S[(i-1), ])
R[i, ] <- E1R[i, ]*rlnorm(1,0,sigW)
epsF[i] <- rnorm(1,0,sigF)
temp <- Hfun(R = R[i, ], ...)
U[i,] <- temp[[1]]
Ft[i, ] <- -log(1 - U[i, ])*exp(epsF[i])
S[i, ] <- R[i, ]*exp(-Ft[i, ])
H[i, ] <- R[i, ]-S[i, ]
}
return(data.frame(sim = rep(1:N, times = 2),
goal = rep(c("mode", "mean"), each = N),
S = c(S[1:N + 50, 1], S[1:N + 50, 2]),
R = c(R[2:(N+1) + 50, 1], R[2:(N+1) + 50, 2]),
Y = c(R[2:(N+1) + 50, 1] - S[1:N + 50, 1], R[2:(N+1) + 50, 2] - S[1:N + 50, 2]),
U = c(U[2:(N+1) +50, 1], U[2:(N+1) +50, 2]),
power = temp[["power"]],
lnalpha = lnalpha,
beta = beta,
sigW = sigW))
}
#table of realized yield near Smsy... Not sure I need this
tab_Smsy <- function(list){
lnalpha_c <- list$lnalpha + .5 * list$sigW * list$sigW
data.frame(median = median(list$yield[list$S < list$lnalpha / list$beta * (0.5 - 0.07 * list$lnalpha) + 50 &
list$S > list$lnalpha / list$beta * (0.5 - 0.07 * list$lnalpha) - 50]),
mean = mean(list$yield[list$S < lnalpha_c / list$beta * (0.5 - 0.07 * lnalpha_c) + 10 &
list$S > lnalpha_c / list$beta * (0.5 - 0.07 * lnalpha_c) - 10]))
}
```
Hello all,
During our meeting last week it became apparent that my understanding of
the $log(\alpha)$ correction differed markedly from many others. I
prepared this document to explain my understanding of this issue. In
listening to everyone thoughts, and reading the paper Sara attached to
the meeting, I noted three concerns from the group:
1. $S^{'}_{msy}$ (Smsy calculated using $log(\alpha) + \sigma^2/2$) is
a poor estimate of $S_{msy}$.
I was surprised by this as I was not aware that anyone thought
$S^{'}_{msy}$ was an estimate of $S_{msy}$. I don't want to pander to
anyone but perhaps it's helpful to state that $S^{'}_{msy}$ exists to
describe a Ricker curve that passes though the mean of the recruitment
data. Because stock recruit equations generally assume lognormal process
error the Ricker regression is conducted in log space. If we
exponentiate the regression mean we get the mode of recruits in the
natural space, which can be adjusted to represent the mean with the
factor $e^{\sigma^2/2}$. While it is convenient to incorporate this
correction into $log(\alpha^{'}) = log(\alpha) + \sigma^2/2$ this
convenience is not meant to imply $log(\alpha^{'})$ (or any of it's
derivatives) are less biased versions of the uncorrected parameter. In
this light use of $log(\alpha^{'})$ is really a decision about weather
you want the Ricker curve to go through the mean or the mode of your
data. The figure below demonstrated this choice by simulating many
generations of stock recruit data ($log(\alpha)=$ 1.5, $\beta=$ 0.001,
$\sigma=$ 0.5), and binning the simulated recruitments into groups where
each group corresponds to a range of 50 escaped fish. The box plots show
the 25%, 50% and 75% quantiles and the red dot shows the mean for
recruitment in each bin.
```{r fig.height=5, fig.width=9}
SRout <- simulateSR_goal(lnalpha = 1.5, beta = 0.001, sigW = .5, N = 100000, goal = c(405, 889), power = 0.9)
SRout$group <-
as.numeric(
as.character(
cut(SRout$S - 25, seq(0, max(SRout$S) + 50, 50), labels = seq(from = 50, by = 50, length.out= length(seq(0, max(SRout$S), 50))))))
ggplot(data = as.data.frame(SRout), aes(x = group, y = R)) +
geom_boxplot(aes(group = group), color = "black") +
stat_function(fun = Ricker, args = list(lnalpha = SRout$lnalpha, beta = SRout$beta), aes(color = "black")) +
stat_function(fun = Ricker, args = list(lnalpha = SRout$lnalpha + 0.5 * SRout$sigW * SRout$sigW,
beta = SRout$beta), aes(color = "red")) +
geom_point(data = aggregate(R ~ group, SRout, mean), aes(y = R, color = "red")) +
geom_abline() +
annotate("rect", xmin = SRout$goal[1], xmax = SRout$goal[2], ymin = 0, ymax = Inf, alpha = 0.1) +
scale_x_continuous(name = "S") +
scale_y_continuous(name = "R", limits = c(0, quantile(SRout$R, 0.99))) +
scale_color_manual(values = c("black", "red"), labels = c("ln(\u03B1)", "ln(\u03B1')"), name = "Ricker Parameter") +
theme_bw(base_size = 18)
```
2. Correcting for the mean is not appropriate when the posterior is
simulated.
It has been pointed out that use of $\sigma^2/2$ is unnecessary when
using MCMC, since MCMC provides the entire posterior for estimated
recruitment, from which both the median and the mean are readily
available. This is correct, however the I'm unaware of anyone who uses
$\sigma^2/2$ to estimate the mean of annual recruitment with MCMC
methods. Instead, $\sigma^2/2$ is used to calculate $log(\alpha^{'})$
and $S^{'}_{msy}$ which describe a Ricker curve which go through the
mean of the data.
3. The department needs to be consistent in it's usage of $S_{msy}$ or
$S^{'}_{msy}$.
I certainly agree that our current system (which seems to be based on
personal preference) could be refined. That said, I'm not convinced that
it is in the department's best interests to eliminate use of either
$S_{msy}$ or $S^{'}_{msy}$ when describing spawn-recruit relationships.
In the rest of statistics the analyst chooses a measure of central
tendency depending on the analysis goals and the characteristics of the
data. It is well known that the mean minimizes squared errors while a
median minimizes absolute errors. It seems appealing to me to think
about the mean/mode choice with respect to the Ricker curve in this
way[^1] although it is not obvious (to me) that characteristics we know
apply to the mean and mode of a random variable also apply to yields
derived from management objectives derived from mean and mode centered
Ricker curves. To evaluate situations where mean or mode centered Ricker
curves may be preferable I conducted a simple management strategy
evaluation.
[^1]: Note that this is a separate discussion from which measure of
central tendency you want to use to summarize your estimates of
annual recruitment.
The idea behind a management strategy evaluation is to simulate
population dynamics using assumed stock-recruit equation and then
'manage' the population with a variety of operating models that describe
different harvest strategies. The operating models I'll explore maximize
sustained yield by fishing to $S_{MSY}$ or $S_{MSY}^{'}$. Note that we
should expect any difference we find to be modest as while
$ln(\alpha^{'})$ is considerably larger than $ln(\alpha)$ at most
combinations of $ln(\alpha)$ and $\sigma$, $S_{MSY}^{'}$ is more similar
to $S_{MSY}$ and $R_{MSY}^{'}$ (the point estimate for recruitment at
$S_{MSY}^{'}$) is even more similar to $R_{MSY}$ at the same parameter
values.
```{r fig.height=4, fig.width=9}
diff <-
expand.grid(lnalpha = seq(0.5, 2.5, length.out = 10), beta = 0.001, sigW = c(.25, .5, .75, 1)) %>%
dplyr::mutate(Smsy = lnalpha / beta *(0.5 - 0.07 * lnalpha),
Rmsy = Ricker(Smsy, lnalpha, beta),
lnalpha_c = lnalpha + .5 * sigW * sigW,
Smsy_c = lnalpha_c / beta *(0.5 - 0.07 * lnalpha_c),
Rmsy_c = Ricker(Smsy_c, lnalpha, beta),
diff_Smsy = Smsy_c / Smsy,
diff_Rmsy = Rmsy_c / Rmsy,
diff_lnalpha = lnalpha_c / lnalpha) %>%
tidyr::pivot_longer(dplyr::starts_with("diff")) %>%
dplyr::mutate(name = factor(name,
levels = c("diff_lnalpha", "diff_Smsy", "diff_Rmsy"),
labels = c("ln(alpha^c)/ln(alpha)", "S[msy]^{c}/S[msy]", "R[msy]^{c}/R[msy]")))
ggplot(diff, aes(x = lnalpha, y = value, color = as.character(sigW))) +
geom_line() +
geom_point() +
xlab(expression("ln"~alpha)) +
ylab("Ratio") +
#coord_cartesian(xlim = c(0, 3), ylim = c(1:3)) +
scale_color_discrete(name = expression(sigma)) +
theme_bw(base_size = 18) +
facet_grid(. ~ name, labeller = label_parsed)
```
For this example I picked a range of values for $ln(\alpha)$ (0.5, 1.5,
2.5) and $\sigma$ (0.5, 0.75, 1) which should span all of the values you
would expect to see in a salmon population. For each combination I
simulated 2 populations of 500 generations which were exposed to
identical annual process error. Each population was harvested at a rate
that would maximize sustained yield according the the mode or mean
Ricker curve (S = $S_{MSY}$ or $S_{MSY}^{'}$). Harvest rates were 0 if
the simulated number of recruits was less than $S_{MSY}$ or
$S_{MSY}^{'}$ and could not exceed 85%. The plot below shows summary
statistics for recruitment, spawning abundance and yield. The dots and
error bars show the mean for each parameter while the 25%, 50% and 75%
quantiles are shown using a box plot. As expected, differences in yield
are negligible for most combinations of $ln(\alpha)$ and $\sigma$ while
recruitment and spawning escapement are generally larger when managing
to $S_{MSY}^{'}$.
```{r fig.height=6, fig.width=9}
# Simulate Smsy fishing ---------------------------------------------------------------
lnalpha <- rep(seq(.5, 2.5, length.out = 3), times = 3)
beta0 <- 0.001
beta <- rep(beta0, 9)
sigW <- rep(seq(0.5, 1, length.out = 3), each = 3)
sim_Smsy <-
mapply(simSR_goals, lnalpha = lnalpha, beta = beta, sigW = sigW, MoreArgs = list(Hfun = H_Smsy, N = 500), SIMPLIFY = FALSE) %>%
do.call(rbind, .) %>%
mutate(close = ifelse(U == 0, 1, 0),
cap = ifelse(U == power, 1, 0))
# #Check harvest rates
# sim_Smsy %>%
# ggplot(aes(x = S, y = lag(U), color = goal)) +
# geom_point() +
# scale_x_continuous(limits = c(0, 5000)) +
# facet_grid(lnalpha~sigW, scales = "free_y", labeller = label_both)
#Mean R, S, and Y
hinge <- function(x){quantile(x, c(0.25, 0.5, 0.75)) %>% setNames(c("ymin", "y", "ymax"))}
sim_Smsy %>%
pivot_longer(cols = c(S:Y), names_to = "stat", values_to = "value") %>%
ggplot(aes(x = stat, y = value, color = goal)) +
stat_summary(fun = "mean", geom = "point", position = position_dodge(width = 0.75)) +
stat_summary(fun.data = "hinge", geom = "crossbar", position = position_dodge(width = 0.75), linetype = 2, width = .25) +
stat_summary(fun.data = "mean_cl_boot", geom = "errorbar", position = position_dodge(width = 0.75)) +
theme_bw(base_size = 18) +
facet_grid(lnalpha~sigW, scales = "free_y", labeller = label_both)
```
In my eyes the plot above argues for managing to $S_{MSY}^{'}$ because
while the situation where we would experience improved yield appear to
be negligible at worst we are able to attain the same yield with larger
numbers of fish returning and being allowed to spawn. However, you may
be able to make an economic argument for managing to $S_{MSY}$ which
revolves around fishery predictability. The plot below shows the
probability that the fishery is overcapacity \[P(U=85%)\] thus forgoing
available yield or closes \[P(U=0%)\] for each scenario. Not
surprisingly the slightly larger objective associated with $S_{MSY}^{'}$
leads to more closures but fewer situations where the fishery is
overcapacity. Again the differences are small.
```{r fig.height=5, fig.width=9}
#% of time closed and fished at max power under each goal
sim_Smsy %>%
pivot_longer(cols = c(close, cap), names_to = "stat", values_to = "value") %>%
ggplot(aes(x = stat, y = value, color = goal)) +
stat_summary(fun = "mean", geom = "point", position = position_dodge(width = 0.75)) +
stat_summary(fun.data = "mean_cl_boot", geom = "errorbar", position = position_dodge(width = 0.75)) +
scale_x_discrete(labels = c("cap" = "P(U=0.85)", "close" = "P(U=0%)")) +
facet_grid(lnalpha~sigW, scales = "free_y", labeller = label_both)
```
There is one obvious situation where an analyst may want to choose the
Ricker mode to describe the stock recruit relationship, that being the
case where the available data has an extreme outlier. I admit cherry
picking this data from my simulation time series ($ln(\alpha)$ = 1.5 and
$\sigma$ = 0.75) but I did not have to look to hard... I just picked the
first 25 year chuck that had one very large recruitment. Below I show
the data, the true Ricker relationship, the estimated Ricker mean and
the estimated Ricker mode. While in this case we see that the large
recruitment forces the estimated mean stock recruit relationship too
high a naive analyst (who does not know the true SR relationship) might
prefer to use the mode based on her belief that the large recruitment
was associated with analogously large process error or simply distorts
the relationship due to its size and the relatively sparse data
available to counter it's effect.
```{r fig.height=5, fig.width=7}
dat_outlier <- readRDS(".\\dat_outlier.rds")
mod_outlier <- lm(log(dat_outlier$R/dat_outlier$S) ~ dat_outlier$S)
#summary(mod_outlier) mod_outlier2 <- lm(log(dat_outlier$R[-20]/dat_outlier$S[-20]) ~ dat_outlier$S[-20]) #estimation wo outlier #summary(mod_outlier2)
plot(50:1000, exp(1.5+.5*.75*.75)*50:1000*exp(-.001*50:1000),
type = "l",
ylim = c(0, 18000),
xlab = "S",
ylab = "R")
points(dat_outlier$S, dat_outlier$R)
lines(50:1000,
exp(coef(mod_outlier)[1] + 0.5*sigma(mod_outlier)*sigma(mod_outlier))*50:1000*exp(coef(mod_outlier)[2]*50:1000), col = "red")
lines(50:1000,
exp(1.6)*50:1000*exp(-.0007*50:1000), col = "green")
legend("topleft",
c("true Ricker mean", "estimated Ricker mean", "estimated Ricker mode"),
col = c("black", "red", "green"),
lty = 1)
```