SR_all_LVM.R

#The purpose of this script is to fit models of transition between spawners and juvenile emigrants expressing different juvenile life histories

#libraries
library(here)
library(tidyverse)
library(viridis)
library(TMB)
library(TMBhelper)
library(readxl)
library(glmmTMB)


##  Analysis of screw trap data to estimate daily juvenile emigrants ##
refit<-FALSE # whether to refit models rather that loading existing fitted model objects if they exist (takes about ~20 minutes; 5 for model fitting and 15 for parametric bootstrap)

#fit/load model
<<<<<<< HEAD
setwd(here("Wenatchee-screw-traps"))
here::i_am("src/ST_all.R")
=======
>>>>>>> 7a07d9ad79c9bab3c05112cc0965b9ad8574f2b9
source(here("src","ST_all.R"))
ST_all<-ST_all_func(refit=refit)


<<<<<<< HEAD
load(here("results","emigrant_estimates Jul 06 2021.Rdata"))


=======
>>>>>>> 7a07d9ad79c9bab3c05112cc0965b9ad8574f2b9
# load functions to calculate geometric means of dialy emigrants and plot
source(here("src","ST_plotting_funcs.R"))

#plot average daily emigrants and LHP breaks
# png(file=here("results","plots","emigration_timing.png"),units="in",width = 6,height=4,res=300)
with(ST_all,ggplot_timing_func(all_emigrants_estimates,all_data_lists,across_stream_geomean,mix_geomean_dens,breaks))
# dev.off()


#plot average daily temperature and discharge
# png(file=here("results","plots","temp_dis.png"),units="in",width = 4.5,height=3,res=300)
with(ST_all,plot_dis_temp_func(all_data_lists))
# dev.off()

rm(list=ls()[which(ls()!="ST_all")])#remove all all the functions used for screw trap model


#----------------------------------------------------------------------------
##  Analysis of screw trap data to estimate daily juvenile emigrants ##

##   Load data ##
{
  #read redd data from Hatchery program annual report (Hillman et al 2020)
  redds<-read_csv(here("data","redd_counts.csv")) %>% pivot_longer(c("Chiwawa","Nason","White"),"stream",values_to="redds") 
  
  
  ##emigrant abundance estimates (log means and standard deviations from screw trap model)
  all_emigrants_estimates<-ST_all$all_emigrants_estimates; rm("ST_all")
  
  #load stream flow covariate values (flow_covs)
  source(here("src","covariates.r"))
  
  # length of habitat surveyed for spawners in each tributary (based on motoring annual report, Hillman et al. 2020)
  trib_lengths<-c(32.5,15.4,16.1)
  names(trib_lengths)<-c("Chiwawa","Nason","White")
}


##   Load functions ##

# source helper functions to make data object, make initial parameter values, and makes map (which fixes certain parameter),  all to feed to the TMB model "Stock_recruit_LVM"
source(here("src","SR_helper_functions.R"))
 
 # source plotting functions for visualizing results.
 source(here("src","SR_plotting_funcs_factor.R"))
 
 #--------------------------------------------------------------------- 
 #--------------------------------------------------------------------- 
 ##   Fit models  ##
 
 #load TMB model
 setwd(here("src","TMB"))
 TMB::compile("Stock_recruit_LVM.cpp")
 dyn.load(dynlib("Stock_recruit_LVM"))
 
#---------------------------------------------------------------------

#sometimes have to try a number of times with different starting calues to get a model to converge (takes several minutes)
#there should ideally be multiple converged model with the same BIC = 612.9517
 ## if its not working, I recommend closing r and then trying again
 set.seed(1234)
fit_mod_result<-fit_mod_iter(streams=0:2,     #streams to include (Chiwawa, Nason, White)
                             LHs=1:4,         #life histories to include (spring subs, sumemr subs, fall subs, spring yearlings)
                             n_f=1,           # number of latent variable factors to include
                             fit_env =1,      #whether to fit environmentel covariates (1=year, 0 = no)
                             fit_attempts=20, # numebr of times to attempt to fit model
                             additional_attempts = 0, #additional attempts after that until a model converges
                             log_J_max_prior=log(15000))   # prior mean on Jmax hyper-mean
 
#BIC value from each iteration running the model with different initial parameters
##There should ideally be multiple converged model with the same BIC = 612.9517
fit_mod_result$BIC_vec                      
# AIC of best fit
fit_mod_result$fit$AIC

save(fit_mod_result,file=here("results","fit_mod_result_12_21_21.Rdata"))

#list of final parameter values
par_out<-fit_mod_result$mod$env$parList(par=fit_mod_result$mod$env$last.par.best)
par_out$rate %>% exp() #penalty parameters
gc() #garbage clean
  
<<<<<<< HEAD
  setwd(here("src","TMB"))
  dyn.load(dynlib("Stock_recruit_LVM"))

LL_exclude<-rep(NA,196)
for (i in 1:nrow(prior_combs)){
for(j in 85:196){
  if(is.na(LL_exclude[j])){
fit_mod_result<-fit_mod_iter(rep(c(1,1,1,1),times=3),streams=0:2,LHs=1:4,n_f=1,no_rand_FF =0, fit_env =1, fit_attempts=5,additional_attempts = 4,rate=c(5,5,5,5,1,log(5000)),fold_exclude=(j-1))
                               
        gc()                       # rand_ord[fold[j]:(fold[j+1]-1)])   
try(LL_exclude[j]<-fit_mod_result$report$ll_exclude)
        print(j)
        print(LL_exclude[j])
   }
}
}
return(LL_exclude)
}#end of loop
sum(is.na(results))
end<-Sys.time()
rowSums(results) %>% sort

# write.csv(results,here("results","LL_exclude_loo.csv"))
results<-read.csv(here("results","LL_exclude_loo.csv"))[,-1]
rowSums(results) %>% `names<-`(1:27) %>%  sort()
prior_combs2<- prior_combs %>% mutate(LL_exc=rowSums(results))

prior_combs2 %>% arrange(LL_exc)
write.csv(prior_combs2,here("results","prior_combs2.csv"))


stopCluster(cl) #shut down parallel computing clusters
unregister <- function() {
  env <- foreach:::.foreachGlobals
  rm(list=ls(name=env), pos=env)
}
unregister()


results2<-array(unlist(results),dim=c(dim(results[[1]]),Niter))
return(results2)
}

 for(j in 1:nrow(prior_combs)){
 
 }


thing2<- sum(ll_fold_exclude2)
sum(ll_fold_exclude2)
sum(ll_fold_exclude3)
sum(ll_fold_exclude4)

test_sim<-rmvnorm_prec(mu=fit_mod_result$mod$env$last.par.best ,
                       prec=fit_mod_result$fit$SD$jointPrecision ,
                       n=500,random_seed=500)


test_x<-apply(test_sim,2,function(x){fit_mod_result$mod$report(x)[["juv_like"]]})

loo::loo(t(test_x))


fit_mod_result<-fit_mod_iter(rep(c(1,1,1,1),times=3),streams=0:2,LHs=1:4,n_f=1,no_rand_FF =0, fit_env =1, fit_attempts=5,additional_attempts = 2,rate=c(5,5,5,5,1,log(100000)),fold_exclude=integer(0)) 

lower<-rep(-Inf,length(fit_mod_result$mod$env$last.par.best))
lower[which(names(fit_mod_result$mod$env$last.par.best)=="Loadings_vec")[5]]<-0
stand_test<-tmbstan::tmbstan(fit_mod_result$mod, cores=4,lower = lower,upper=rep(Inf,length(lower)),
                             init=fit_mod_result$mod$env$last.par.best)
                             
shinystan::launch_shinystan(stand_test)


thing<-as.data.frame(stand_test)
colnames(thing)

sd(exp(thing$`beta_alpha[1]`+thing$`eps_alpha[2]`*exp(thing$`log_sigma_alpha[1]`)))
=======
>>>>>>> 7a07d9ad79c9bab3c05112cc0965b9ad8574f2b9

  #parametric bootstrap from posterior of fitted model
  sim_post<-rmvnorm_prec(fit_mod_result$mod$env$last.par.best, 
               fit_mod_result$fit$SD$jointPrecision, 50000, 623 )
    

  #reporting
  
  ##calculate and store functional form parameters (alpha, gamma, Jmax) for each posterior samples
  FFparams<-array(dim=c(12,50000,3))
  for (i in 1:50000){
    out<-fit_mod_result$mod$report(par=sim_post[,i])
    FFparams[,i,1]<-out$alpha
    FFparams[,i,2]<-out$gamma
    FFparams[,i,3]<-out$Jmax
  }
  ##calculate quantiles of FF parameters 
  quant_FF<-apply(FFparams,c(1,3),quantile,probs=c(0.025,.5,0.975))
  
 ## table of  functional relationship fit parameters
   FF_params<- tibble(stream=rep(c("Chiwawa","Nason","White"),each=4),
  LH=factor(rep(c("Spr-0","Sum-0","Fall-0", "Spr-1"),times=3),levels=c("Spr-0","Sum-0","Fall-0", "Spr-1")),           #alpha parameters
  alpha=fit_mod_result$report$alpha, #mean
  alpha_lcl=quant_FF[1,,1], #lower 95% confidence limit
  alpha_ucl=quant_FF[3,,1], #upper 95% confidence limie
  #gamma
  gamma=fit_mod_result$report$gamma,
  gamma_lcl=quant_FF[1,,2],
  gamma_ucl=quant_FF[3,,2],
  #Jmax
  Jmax=fit_mod_result$report$Jmax,
  Jmax_lcl=quant_FF[1,,3],
  Jmax_ucl=quant_FF[3,,3],) %>% 

    arrange(LH,stream) %>% 
    mutate(across(3:11,round,2))
  
  View(FF_params) # View table

 write.csv(FF_params,here("results","FF_params.csv")) #write table CSV file
 
 
 #hyper means of parameters
 ##gamma
 exp(par_out$beta_gamma)
 exp(
 sim_post[
 which(names(fit_mod_result$mod$env$last.par.best)=="beta_gamma"),]) %>% 
   apply(1,quantile,probs=c(.025,.5,.975))
 ##Jmax
 exp(par_out$beta_Jmax)
 exp(
   sim_post[
     which(names(fit_mod_result$mod$env$last.par.best)=="beta_Jmax"),]) %>% 
   apply(1,quantile,probs=c(.025,.5,.975))
 
 
  # plot of latent and expected juveniles vs spawners 
  ##png(file=here("results","plots","spawn_em_11112021.png"),units="in",height=5,width=6.5,res=300)
  spawn_em<-ggplot_spawner_juveniles(mod_fit = fit_mod_result$fit,mod_dat =fit_mod_result$dat,mod_rep=fit_mod_result$report)
  ##dev.off()

  
#spawners and juvenile by year
# png(here("results","plots","spaw_em.png"),units="in",res=300,height=5,width=7)
   plot_spawn_em_year(mod_dat=fit_mod_result$dat,sum_out = spawn_em$sum_out)
   # dev.off() 


##expected emigrants vs. spawners
# png(here("results","plots","juv_LH.png"),units="in",res=300,height=4,width=5)
   expec_spawn_em_ploft_func(preds=spawn_em$preds)
  # dev.off()
  # ggsave(here("results","plots","juv_LH.png"))


# Probability environmental covariate coefficients are not equal to zero
pnorm(0,abs(fit_mod_result$fit$SD$par.fixed),sqrt(diag(fit_mod_result$fit$SD$cov.fixed)),lower.tail = TRUE)[names(fit_mod_result$fit$SD$par.fixed)=="beta_e"]*2


# plot environmental covariate coefficients
# png(here("results","plots","coef_plot_all.png"),units="in",res=300,height=4,width=5)
bootstrap_env_cov(fit_mod_result$dat,last_best=fit_mod_result$fit$par , precis=fit_mod_result$fit$SD$cov.fixed )
# dev.off()  

# quantiles of paramateric bootstrap distribution of environmental covariates
fit_mod_result$mod$env$last.par.best[which(names(fit_mod_result$mod$env$last.par.best) =="beta_e")] 
sim_post[which(names(fit_mod_result$mod$env$last.par.best) =="beta_e"),] %>% apply(1,quantile,probs=c(.025,.5,.975))


#Refit model with no environmental covariates to look at process erros covariance (&correlation) based on the loadings onto the latent variable


#sometimes have to run this a few times to get a model to converge
fit_mod_result_no_env<-fit_mod_iter(streams=0:2,LHs=1:4,n_f=1,fit_env =0, fit_attempts=20,additional_attempts = 0,log_J_max_prior=log(15000))   

#BIC value from each iteration running the model with different initial parameters
##There should ideally be multiple converged model with the same BIC = 606.946
fit_mod_result_no_env$BIC_vec                      
# AIC of best fit
fit_mod_result_no_env$fit$AIC

save(fit_mod_result_no_env,file=here("results","fit_mod_result_no_env_12_21_21.Rdata")) #save fitted model object

# plot process error correlation (takes a minute for bootstrapping p-values)
## png(here("results","plots","correlation.png"),units="in",res=300,height=10,width=10)
corr_plot<-plot_cor(mod_rep = fit_mod_result_no_env$report, mod_fit =  fit_mod_result_no_env$fit, mod_dat = fit_mod_result_no_env$dat)
## dev.off()  

#end of analysis