Ricker_mean.qmd

---
title: "Which Ricker curve is best?"
output: html_document
editor: 
  markdown: 
    wrap: 72
---

```{r echo = FALSE, warning = FALSE, message=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message=FALSE, fig.align = 'center')
library(tidyverse)

# Functions ---------------------------------------------------------------
Ricker <- function(S, lnalpha, beta) {
  S*exp(lnalpha - beta*S)
}

#Simulate a single stock
simulateSR_goal <- function(lnalpha, beta, sigW, N, goal, sigS = 0, phi = 0, power = 1, sigF = 0) {
  stopifnot(length(goal) %in% 1:2)
  if(length(goal) == 1){stopifnot(goal >= 0 & goal < 1)}
  if(length(goal) == 2){stopifnot(goal[1] > 1 & goal[2] < (lnalpha + 0.5 * sigW * sigW) / beta & goal[2] > goal[1])}
  stopifnot(power >= 0 & power <=1) # fishing power is 0 for no harvest and 1 when the fishery can fully control escapement
  
  # ----- initial values ----- #
  # initial value for S: Seq minus some harvest
  S <- 900
  
  # initial value for observed S
  Shat <- S*rlnorm(1, sdlog=sigS)
  
  # initializing all other values
  redresid <- 0
  E1R <- E2R <- whiteresid <- epsF <- U1 <- U2 <- U <- Ft <- H <- Rhat <- lnRhatShat <- fittedR <- NA
  R <- 1 #This values in never used but needed to evaluate U
  
  # recursive portion...
  for(i in 2:(N + 50 +1)) { #added 50 (will discard later)
    E1R[i] <- S[i-1]*exp(lnalpha - beta*S[i-1])
    E2R[i] <- E1R[i]*exp(phi*redresid[i-1])
    R[i] <- E2R[i]*rlnorm(1,0,sigW)
    redresid[i] <- log(R[i]/E1R[i])
    whiteresid[i] <- log(R[i]/E2R[i])
    epsF[i] <- rnorm(1,0,sigF)
    if(length(goal) == 2){
      U1[i] <- if(R[i] > goal[1]){(R[i] - goal[1]) / R[i]} else{0} # Find U to fish to lb of goal
      U2[i] <- if(R[i] > goal[2]){(R[i] - goal[2]) / R[i]} else{0} # Find U to fish to ub of goal
    }
    U[i] <- if(length(goal) == 1){goal} else{min(runif(1, U2[i], U1[i]), power)} # draw a harvest rate
    Ft[i] <- -log(1 - U[i])*exp(epsF[i])
    S[i] <- R[i]*exp(-Ft[i])
    Shat[i] <- S[i]*rlnorm(1, sdlog=sigS)
    H[i] <- R[i]-S[i]
    Rhat[i] <- Shat[i]+H[i]
    lnRhatShat[i] <- log(Rhat[i]/Shat[i])
  }
  
  return(list(S=Shat[1:N + 50],
              R=Rhat[2:(N+1) + 50],
              Strue=S[1:N + 50],
              Rtrue=R[2:(N+1) + 50],
              yield=R[2:(N+1) + 50] - S[1:N + 50],
              goal = goal,
              lnalpha = lnalpha, 
              beta = beta,
              sigW = sigW))
}

#Function to find U that archives S somewhere within the goal
H_goal <- function(R, goal, power = .85, ...){
  # stopifnot(length(goal) == 4)
  # stopifnot(goal[1, 1] > 1 & goal[1, 2] < (lnalpha + 0.5 * sigW * sigW) / beta & goal[1, 2] > goal[1, 1])
  # stopifnot(goal[2, 1] > 1 & goal[2, 2] < (lnalpha + 0.5 * sigW * sigW) / beta& goal[2, 2] > goal[2, 1])
  U_lb <- U_ub <- U <- c(NA, NA)
  
  for(j in 1:2){
    U_lb[j] <- if(R[j] > goal[j, 1]){(R[j] - goal[j, 1]) / R[j]} else{0} # Find U to fish to lb of goal
    U_ub[j] <- if(R[j] > goal[j, 2]){(R[j] - goal[j, 2]) / R[j]} else{0} # Find U to fish to ub of goal
    U[j] <- min(runif(1, U_ub[j], U_lb[j]), power)
  }
  return(list(U, goal = goal, power = power))
}

#Function to find U that archives Smsy for each goal
H_Smsy <- function(R, power = 0.85){
  lna <- with(parent.frame(), lnalpha)
  b <- with(parent.frame(), beta)
  sig <- with(parent.frame(), sigW)
  lnap <- lna + 0.5 * sig * sig
  Smsy <- lna / b * (0.5 - 0.07 * lna)
  Smsyp <- lnap / b * (0.5 - 0.07 * lnap)
  
  U <- c(if(R[1] > Smsy){min((R[1] - Smsy) / R[1], power)} else{0},
         if(R[2] > Smsyp){min((R[2] - Smsyp) / R[2], power)} else{0})
  
  return(list(U, power = power))
}

#Function to find U that archives S a some percentage of the goal range
H_target <- function(R, goal, target, power = .85, ...){
  S_target <- U <- c(NA, NA)
  S_target <- c(goal[1,1] + target * (goal[1,2] - goal[1,1]), 
                goal[2,1] + target * (goal[2,2] - goal[2,1]))
  
  U <- c(if(R[1] > S_target[1]){min((R[1] - S_target[1]) / R[1], power)} else{0},
         if(R[2] > S_target[2]){min((R[2] - S_target[2]) / R[2], power)} else{0})
  
  return(list(U, goal = goal, power = power))
}

#Simulate 2 stocks w same dynamics but fished to different goals.
simSR_goals <- function(lnalpha, beta, sigW, N, sigF = 0, Hfun, ...){
  # ----- initial values ----- #
  # initial value for S: Seq minus some harvest
  S <- matrix(c(900, rep(NA, N + 50), 900, rep(NA, N + 50)), N + 51, 2)
  
  # initializing all other values
  redresid <- 0
  E1R <- E2R <- redresid <- whiteresid <- epsF <- U <- Ft <- H <- Rhat <- lnRhatShat <- fittedR <-
    matrix(c(rep(NA, N + 51), rep(NA, N + 51)), N + 51, 2)
  R <-  matrix(c(1, rep(NA, N + 50), 1, rep(NA, N + 50)), N + 51, 2)
  
  # recursive portion...
  for(i in 2:(N + 50 + 1)) { #added 50 (will discard later)
    E1R[i, ] <- S[(i-1), ]*exp(lnalpha - beta*S[(i-1), ]) 
    R[i, ] <- E1R[i, ]*rlnorm(1,0,sigW)
    epsF[i] <- rnorm(1,0,sigF)
    temp <- Hfun(R = R[i, ], ...)
    U[i,] <- temp[[1]]
    Ft[i, ] <- -log(1 - U[i, ])*exp(epsF[i])
    S[i, ] <- R[i, ]*exp(-Ft[i, ])
    H[i, ] <- R[i, ]-S[i, ]
  }
  return(data.frame(sim = rep(1:N, times = 2),
                    goal = rep(c("mode", "mean"), each = N),
                    S = c(S[1:N + 50, 1], S[1:N + 50, 2]),
                    R = c(R[2:(N+1) + 50, 1], R[2:(N+1) + 50, 2]),
                    Y = c(R[2:(N+1) + 50, 1] - S[1:N + 50, 1], R[2:(N+1) + 50, 2] - S[1:N + 50, 2]),
                    U = c(U[2:(N+1) +50, 1], U[2:(N+1) +50, 2]),
                    power = temp[["power"]],
                    lnalpha = lnalpha,
                    beta = beta,
                    sigW = sigW))
}

#table of realized yield near Smsy... Not sure I need this
tab_Smsy <- function(list){
  lnalpha_c <- list$lnalpha + .5 * list$sigW * list$sigW
  data.frame(median = median(list$yield[list$S < list$lnalpha / list$beta * (0.5 - 0.07 * list$lnalpha) + 50 &
                                          list$S > list$lnalpha / list$beta * (0.5 - 0.07 * list$lnalpha) - 50]),
             mean = mean(list$yield[list$S < lnalpha_c / list$beta * (0.5 - 0.07 * lnalpha_c) + 10 & 
                                      list$S > lnalpha_c / list$beta * (0.5 - 0.07 * lnalpha_c) - 10]))
}
```

Hello all,

During our meeting last week it became apparent that my understanding of
the $log(\alpha)$ correction differed markedly from many others. I
prepared this document to explain my understanding of this issue. In
listening to everyone thoughts, and reading the paper Sara attached to
the meeting, I noted three concerns from the group:

1.  $S^{'}_{msy}$ (Smsy calculated using $log(\alpha) + \sigma^2/2$) is
    a poor estimate of $S_{msy}$.

I was surprised by this as I was not aware that anyone thought
$S^{'}_{msy}$ was an estimate of $S_{msy}$. I don't want to pander to
anyone but perhaps it's helpful to state that $S^{'}_{msy}$ exists to
describe a Ricker curve that passes though the mean of the recruitment
data. Because stock recruit equations generally assume lognormal process
error the Ricker regression is conducted in log space. If we
exponentiate the regression mean we get the mode of recruits in the
natural space, which can be adjusted to represent the mean with the
factor $e^{\sigma^2/2}$. While it is convenient to incorporate this
correction into $log(\alpha^{'}) = log(\alpha) + \sigma^2/2$ this
convenience is not meant to imply $log(\alpha^{'})$ (or any of it's
derivatives) are less biased versions of the uncorrected parameter. In
this light use of $log(\alpha^{'})$ is really a decision about weather
you want the Ricker curve to go through the mean or the mode of your
data. The figure below demonstrated this choice by simulating many
generations of stock recruit data ($log(\alpha)=$ 1.5, $\beta=$ 0.001,
$\sigma=$ 0.5), and binning the simulated recruitments into groups where
each group corresponds to a range of 50 escaped fish. The box plots show
the 25%, 50% and 75% quantiles and the red dot shows the mean for
recruitment in each bin.

```{r fig.height=5, fig.width=9}
SRout <- simulateSR_goal(lnalpha = 1.5, beta = 0.001, sigW = .5, N = 100000, goal = c(405, 889), power = 0.9)
SRout$group <-  
  as.numeric(
    as.character(
      cut(SRout$S - 25, seq(0, max(SRout$S) + 50, 50), labels = seq(from = 50, by = 50, length.out= length(seq(0, max(SRout$S), 50))))))
ggplot(data = as.data.frame(SRout), aes(x = group, y = R)) +
  geom_boxplot(aes(group = group), color = "black") +
  stat_function(fun = Ricker, args = list(lnalpha = SRout$lnalpha, beta = SRout$beta), aes(color = "black")) +
  stat_function(fun = Ricker, args = list(lnalpha = SRout$lnalpha + 0.5 * SRout$sigW * SRout$sigW, 
                                          beta = SRout$beta), aes(color = "red")) +
  geom_point(data = aggregate(R ~ group, SRout, mean), aes(y = R, color = "red")) +
  geom_abline() +
  annotate("rect", xmin = SRout$goal[1], xmax = SRout$goal[2], ymin = 0, ymax = Inf, alpha = 0.1) +
  scale_x_continuous(name = "S") + 
  scale_y_continuous(name = "R", limits = c(0, quantile(SRout$R, 0.99))) +
  scale_color_manual(values = c("black", "red"), labels = c("ln(\u03B1)", "ln(\u03B1')"), name = "Ricker Parameter") +
  theme_bw(base_size = 18)
```

2.  Correcting for the mean is not appropriate when the posterior is
    simulated.

It has been pointed out that use of $\sigma^2/2$ is unnecessary when
using MCMC, since MCMC provides the entire posterior for estimated
recruitment, from which both the median and the mean are readily
available. This is correct, however the I'm unaware of anyone who uses
$\sigma^2/2$ to estimate the mean of annual recruitment with MCMC
methods. Instead, $\sigma^2/2$ is used to calculate $log(\alpha^{'})$
and $S^{'}_{msy}$ which describe a Ricker curve which go through the
mean of the data.

3.  The department needs to be consistent in it's usage of $S_{msy}$ or
    $S^{'}_{msy}$.

I certainly agree that our current system (which seems to be based on
personal preference) could be refined. That said, I'm not convinced that
it is in the department's best interests to eliminate use of either
$S_{msy}$ or $S^{'}_{msy}$ when describing spawn-recruit relationships.
In the rest of statistics the analyst chooses a measure of central
tendency depending on the analysis goals and the characteristics of the
data. It is well known that the mean minimizes squared errors while a
median minimizes absolute errors. It seems appealing to me to think
about the mean/mode choice with respect to the Ricker curve in this
way[^1] although it is not obvious (to me) that characteristics we know
apply to the mean and mode of a random variable also apply to yields
derived from management objectives derived from mean and mode centered
Ricker curves. To evaluate situations where mean or mode centered Ricker
curves may be preferable I conducted a simple management strategy
evaluation.

[^1]: Note that this is a separate discussion from which measure of
    central tendency you want to use to summarize your estimates of
    annual recruitment.

The idea behind a management strategy evaluation is to simulate
population dynamics using assumed stock-recruit equation and then
'manage' the population with a variety of operating models that describe
different harvest strategies. The operating models I'll explore maximize
sustained yield by fishing to $S_{MSY}$ or $S_{MSY}^{'}$. Note that we
should expect any difference we find to be modest as while
$ln(\alpha^{'})$ is considerably larger than $ln(\alpha)$ at most
combinations of $ln(\alpha)$ and $\sigma$, $S_{MSY}^{'}$ is more similar
to $S_{MSY}$ and $R_{MSY}^{'}$ (the point estimate for recruitment at
$S_{MSY}^{'}$) is even more similar to $R_{MSY}$ at the same parameter
values.

```{r fig.height=4, fig.width=9}
diff <-
  expand.grid(lnalpha = seq(0.5, 2.5, length.out = 10), beta = 0.001, sigW = c(.25, .5, .75, 1)) %>%
  dplyr::mutate(Smsy = lnalpha / beta *(0.5 - 0.07 * lnalpha),
                Rmsy = Ricker(Smsy, lnalpha, beta),
                lnalpha_c = lnalpha + .5 * sigW * sigW,
                Smsy_c = lnalpha_c / beta *(0.5 - 0.07 * lnalpha_c),
                Rmsy_c = Ricker(Smsy_c, lnalpha, beta),
                diff_Smsy = Smsy_c / Smsy,
                diff_Rmsy = Rmsy_c / Rmsy,
                diff_lnalpha = lnalpha_c / lnalpha) %>%
  tidyr::pivot_longer(dplyr::starts_with("diff")) %>%
  dplyr::mutate(name = factor(name, 
                              levels = c("diff_lnalpha", "diff_Smsy", "diff_Rmsy"), 
                              labels = c("ln(alpha^c)/ln(alpha)", "S[msy]^{c}/S[msy]", "R[msy]^{c}/R[msy]")))
ggplot(diff, aes(x = lnalpha, y = value, color = as.character(sigW))) + 
  geom_line() + 
  geom_point() +
  xlab(expression("ln"~alpha)) +
  ylab("Ratio") +
  #coord_cartesian(xlim = c(0, 3), ylim = c(1:3)) +
  scale_color_discrete(name = expression(sigma)) +
  theme_bw(base_size = 18) +
  facet_grid(. ~ name, labeller = label_parsed)
```

For this example I picked a range of values for $ln(\alpha)$ (0.5, 1.5,
2.5) and $\sigma$ (0.5, 0.75, 1) which should span all of the values you
would expect to see in a salmon population. For each combination I
simulated 2 populations of 500 generations which were exposed to
identical annual process error. Each population was harvested at a rate
that would maximize sustained yield according the the mode or mean
Ricker curve (S = $S_{MSY}$ or $S_{MSY}^{'}$). Harvest rates were 0 if
the simulated number of recruits was less than $S_{MSY}$ or
$S_{MSY}^{'}$ and could not exceed 85%. The plot below shows summary
statistics for recruitment, spawning abundance and yield. The dots and
error bars show the mean for each parameter while the 25%, 50% and 75%
quantiles are shown using a box plot. As expected, differences in yield
are negligible for most combinations of $ln(\alpha)$ and $\sigma$ while
recruitment and spawning escapement are generally larger when managing
to $S_{MSY}^{'}$.

```{r fig.height=6, fig.width=9}
# Simulate Smsy fishing  ---------------------------------------------------------------
lnalpha <- rep(seq(.5, 2.5, length.out = 3), times = 3)
beta0 <- 0.001
beta <- rep(beta0, 9)
sigW <- rep(seq(0.5, 1, length.out = 3), each = 3)

sim_Smsy <- 
  mapply(simSR_goals, lnalpha = lnalpha, beta = beta,  sigW = sigW, MoreArgs = list(Hfun = H_Smsy, N = 500), SIMPLIFY = FALSE) %>%
  do.call(rbind, .) %>%
  mutate(close = ifelse(U == 0, 1, 0),
         cap = ifelse(U == power, 1, 0)) 

# #Check harvest rates
# sim_Smsy %>%
#   ggplot(aes(x = S, y = lag(U), color = goal)) + 
#   geom_point() +
#   scale_x_continuous(limits = c(0, 5000)) +
#   facet_grid(lnalpha~sigW, scales = "free_y", labeller = label_both)

#Mean R, S, and Y
hinge <- function(x){quantile(x, c(0.25, 0.5, 0.75)) %>% setNames(c("ymin", "y", "ymax"))}
sim_Smsy %>%
  pivot_longer(cols = c(S:Y), names_to = "stat", values_to = "value") %>%
  ggplot(aes(x = stat, y = value, color = goal)) + 
  stat_summary(fun = "mean", geom = "point", position = position_dodge(width = 0.75)) +
  stat_summary(fun.data = "hinge", geom = "crossbar", position = position_dodge(width = 0.75), linetype = 2, width = .25) +
  stat_summary(fun.data = "mean_cl_boot", geom = "errorbar", position = position_dodge(width = 0.75)) +
  theme_bw(base_size = 18) +
  facet_grid(lnalpha~sigW, scales = "free_y", labeller = label_both)

```

In my eyes the plot above argues for managing to $S_{MSY}^{'}$ because
while the situation where we would experience improved yield appear to
be negligible at worst we are able to attain the same yield with larger
numbers of fish returning and being allowed to spawn. However, you may
be able to make an economic argument for managing to $S_{MSY}$ which
revolves around fishery predictability. The plot below shows the
probability that the fishery is overcapacity \[P(U=85%)\] thus forgoing
available yield or closes \[P(U=0%)\] for each scenario. Not
surprisingly the slightly larger objective associated with $S_{MSY}^{'}$
leads to more closures but fewer situations where the fishery is
overcapacity. Again the differences are small.

```{r fig.height=5, fig.width=9}
#% of time closed and fished at max power under each goal
sim_Smsy  %>%
  pivot_longer(cols = c(close, cap), names_to = "stat", values_to = "value") %>%
  ggplot(aes(x = stat, y = value, color = goal)) + 
  stat_summary(fun = "mean", geom = "point", position = position_dodge(width = 0.75)) +
  stat_summary(fun.data = "mean_cl_boot", geom = "errorbar", position = position_dodge(width = 0.75)) +
  scale_x_discrete(labels = c("cap" = "P(U=0.85)", "close" = "P(U=0%)")) +
  facet_grid(lnalpha~sigW, scales = "free_y", labeller = label_both)
```

There is one obvious situation where an analyst may want to choose the
Ricker mode to describe the stock recruit relationship, that being the
case where the available data has an extreme outlier. I admit cherry
picking this data from my simulation time series ($ln(\alpha)$ = 1.5 and
$\sigma$ = 0.75) but I did not have to look to hard... I just picked the
first 25 year chuck that had one very large recruitment. Below I show
the data, the true Ricker relationship, the estimated Ricker mean and
the estimated Ricker mode. While in this case we see that the large
recruitment forces the estimated mean stock recruit relationship too
high a naive analyst (who does not know the true SR relationship) might
prefer to use the mode based on her belief that the large recruitment
was associated with analogously large process error or simply distorts
the relationship due to its size and the relatively sparse data
available to counter it's effect.

```{r fig.height=5, fig.width=7}
dat_outlier <- readRDS(".\\dat_outlier.rds") 
mod_outlier <- lm(log(dat_outlier$R/dat_outlier$S) ~ dat_outlier$S) 
#summary(mod_outlier) mod_outlier2 <- lm(log(dat_outlier$R[-20]/dat_outlier$S[-20]) ~ dat_outlier$S[-20]) #estimation wo outlier #summary(mod_outlier2)

plot(50:1000, exp(1.5+.5*.75*.75)*50:1000*exp(-.001*50:1000), 
     type = "l", 
     ylim = c(0, 18000), 
     xlab = "S", 
     ylab = "R") 
points(dat_outlier$S, dat_outlier$R) 
lines(50:1000, 
      exp(coef(mod_outlier)[1] + 0.5*sigma(mod_outlier)*sigma(mod_outlier))*50:1000*exp(coef(mod_outlier)[2]*50:1000), col = "red")
lines(50:1000, 
      exp(1.6)*50:1000*exp(-.0007*50:1000), col = "green")
legend("topleft", 
       c("true Ricker mean", "estimated Ricker mean", "estimated Ricker mode"), 
       col = c("black", "red", "green"), 
       lty = 1) 
```