diff --git a/phyto-final/Aulacoseira.BV.Analyses.R b/phyto-final/Aulacoseira.BV.Analyses.R deleted file mode 100644 index f4e9b98..0000000 --- a/phyto-final/Aulacoseira.BV.Analyses.R +++ /dev/null @@ -1,80 +0,0 @@ -# Load Libraries and Data Files ------------------------------------------------ -## Script just to look at Aulacoseira - -library("tidyverse");packageVersion("tidyverse") - -# Set working directory -setwd("./phyto_code/") -getwd() - -# Set visual theme in ggplot -theme_set(theme_bw()) - -# Clean workspace -rm(list=ls()) - -# Set folder name to output graphs into -output <- "plots" - -# Import combined EMP & AEU data files -load("RData/df_phyto.RData") - -# Filter out non-Aulacoseira taxa and years outside of FASTR study -df_Aul <- df_phyto %>% - filter(Genus == "Aulacoseira") %>% - filter(Year %in% c(2014,2015,2016,2017,2018,2019)) - -# Plot GALD by year -plot_GALD <- ggplot(df_Aul, aes(x = Year, y = GALD)) + - geom_jitter(width = 0.1) - -plot_GALD + - facet_wrap(Study ~ ., ncol = 1) - -# Plot Cell Biovolume by Year -plot_Cell_BV <- ggplot(df_Aul, aes(x = Year, y = BV.Avg)) + - geom_jitter(width = 0.1) - -plot_Cell_BV + - facet_wrap(Study ~ ., ncol = 1) - -# Plot EMP and FASTR samples by year, facet by study -ggplot(df_Aul, aes(x = Year, y = BV.um3.per.L)) + - geom_jitter(width = 0.1, - size = 2, - pch = 21, - color = "black", - fill = "darkgreen") + - labs(x = "Year", - y = "Biovolume (um^3 per L)") + - facet_wrap(Study ~ ., ncol = 1) - -ggsave(path = output, - filename = "Fig 4.13 - Aulacoseira_abundance_FASTR_EMP_yearly_all.pdf", - device = "pdf", - scale=1.0, - units="in", - height=4, - width=4, - dpi="print") - -## Plot EMP and FASTR samples by month, facet by study -ggplot(df_Aul, aes(x = Month, y = BV.um3.per.L)) + - geom_jitter(data = subset(df_Aul, Year == "2016"), - width = 0.1, - size = 2, - pch = 21, - color = "black", - fill = "darkgreen") + - labs(x = "Year", - y = "Biovolume (um^3 per L)") + - facet_wrap(Study ~ ., ncol = 1) - -ggsave(path = output, - filename = "Fig 4.14 - Aulacoseira_abundance_FASTR_EMP_month_2016.pdf", - device = "pdf", - scale=1.0, - units="in", - height=4, - width=4, - dpi="print") diff --git a/phyto-final/GAMs/df_rtm_chla_c1.RData b/phyto-final/GAMs/df_rtm_chla_c1.RData deleted file mode 100644 index 700e149..0000000 Binary files a/phyto-final/GAMs/df_rtm_chla_c1.RData and /dev/null differ diff --git a/phyto-final/GAMs/phyto.sum.RData b/phyto-final/GAMs/phyto.sum.RData deleted file mode 100644 index 2caeafc..0000000 Binary files a/phyto-final/GAMs/phyto.sum.RData and /dev/null differ diff --git a/phyto-final/GAMs/phyto_gam_analysis.Rmd b/phyto-final/GAMs/phyto_gam_analysis.Rmd deleted file mode 100644 index db5caf3..0000000 --- a/phyto-final/GAMs/phyto_gam_analysis.Rmd +++ /dev/null @@ -1,645 +0,0 @@ ---- -title: "NDFS Synthesis Manuscript: Chlorophyll GAM analysis" -author: "Dave Bosworth" -date: '`r format(Sys.Date(), "%B %d, %Y")`' -output: - html_document: - code_folding: show - toc: true - toc_float: - collapsed: false -editor_options: - chunk_output_type: console -knit: (function(input, ...) { - rmarkdown::render( - input, - output_dir = here::here("docs"), - envir = globalenv() - ) - }) ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -options(knitr.kable.NA = "") -``` - -# Purpose - -Explore and analyze the continuous chlorophyll data to be included in the NDFS synthesis manuscript. We will attempt to fit a generalized additive model (GAM) to the data set to help account for seasonality in the data. We will also include interaction terms in the model which wasn't explored in the analysis for the NDFS Synthesis report. - -# Global code and functions - -```{r load packages, message = FALSE, warning = FALSE} -# Load packages -library(tidyverse) -library(lubridate) -library(scales) -library(knitr) -library(mgcv) -library(lme4) -library(car) -library(emmeans) -library(gratia) -library(here) -``` - -```{r load functions, message = FALSE, warning = FALSE} -# Source functions -source(here("global_ndfa_funcs.R")) -source(here("Water_Quality/global_wq_funcs.R")) -``` - -Display current versions of R and packages used for this analysis: - -```{r print session info} -devtools::session_info() -``` - -# Import Data - -```{r define rtm file paths} -# Define relative file path for file containing all QA'ed and cleaned continuous WQ data -fp_rel_rtm_data <- - "2011-2019 Synthesis Study-FASTR/WQ_Subteam/Processed_Data/Continuous/RTM_INPUT_all_2021-04-20.csv" - -# Define relative file path for file containing region assignments for the continuous stations -fp_rel_rtm_region <- - "2011-2019 Synthesis Study-FASTR/WQ_Subteam/Processed_Data/Continuous/NDFA_Cont_WQ_Stations.csv" - -# Define relative file path for file containing dates of flow action periods -fp_rel_fa_dates <- - "2011-2019 Synthesis Study-FASTR/Data Management/FlowDatesDesignations_45days.csv" - -# Define absolute file paths -fp_abs_rtm_data <- ndfa_abs_sp_path(fp_rel_rtm_data) -fp_abs_rtm_region <- ndfa_abs_sp_path(fp_rel_rtm_region) -fp_abs_fa_dates <- ndfa_abs_sp_path(fp_rel_fa_dates) -``` - -```{r import rtm data, message = FALSE} -# Import continuous WQ data -df_rtm_orig <- import_rtm_data(fp_abs_rtm_data, 10) - -# Import region assignments for the continuous stations -df_rtm_region <- read_csv(fp_abs_rtm_region) %>% select(StationCode, Region = BroadRegion) - -# Import dates of flow action periods -df_fa_dates_orig <- read_csv(fp_abs_fa_dates) -``` - -# Prepare Data - -```{r prepare rtm chla data, message = FALSE} -# Create a vector for the factor order of StationCode -sta_order <- c( - "RMB", - "RCS", - "RD22", - "I80", - "LIS", - "TOE", - "STTD", - "LIB", - "RYI", - "RVB" -) - -# Prepare continuous chlorophyll for exploration and analysis -df_rtm_chla <- df_rtm_orig %>% - transmute( - StationCode, - # Parse DateTime variable and create Date and Year variables - DateTime = ymd_hms(DateTime), - Date = date(DateTime), - Year = year(DateTime), - Chla - ) %>% - # Remove all NA Chlorophyll values - drop_na(Chla) %>% - # Calculate daily average Chlorophyll values - group_by(StationCode, Year, Date) %>% - summarize(Chla = mean(Chla)) %>% - ungroup() %>% - # Scale and log transform chlorophyll values - mutate(Chla_log = log(Chla * 1000)) %>% - # Add Flow Action Periods, Region assignments, and DOY - ndfa_action_periods() %>% - left_join(df_rtm_region) %>% - mutate(DOY = yday(Date)) %>% - # Remove stations in Middle Sac River region since we only want to include - # stations within the Yolo Bypass when comparing Flow Action Periods - # Also remove SDI since it's further downstream in the Sacramento River - filter( - Region != "Middle Sac River", - StationCode != "SDI" - ) %>% - arrange(StationCode, Date) %>% - mutate( - # Apply factor orders to FlowActionPeriod, Region, and StationCode - FlowActionPeriod = factor(FlowActionPeriod, levels = c("Before", "During", "After")), - Region = factor(Region, levels = c("Upstream", "Downstream")), - StationCode = factor(StationCode, levels = sta_order), - # Add a column for Year as a factor for the model - Year_fct = factor(Year) - ) -``` - -```{r prepare flow action period data} -# Prepare dates of flow action periods to highlight the flow action periods for - # each year in the plots -df_fa_dates <- df_fa_dates_orig %>% - transmute( - Year, - across(c(PreFlowEnd, PostFlowStart), mdy), - # add 1 day to PreFlowEnd so that the highlight for the flow action periods - # aligns correctly - PreFlowEnd = PreFlowEnd + days(1), - # Add DOY for PreFlowEnd and PostFlowStart - across(where(is.Date), yday, .names = "{.col}_DOY") - ) -``` - -# Explore sample counts by Station - -```{r rtm chla sample counts station} -df_rtm_chla %>% - count(Year, FlowActionPeriod, StationCode) %>% - arrange(StationCode) %>% - pivot_wider(names_from = StationCode, values_from = n) %>% - arrange(Year, FlowActionPeriod) %>% - kable() -``` - -Remove under-sampled years (2011 and 2012) and stations (RMB, TOE) - -```{r rtm chla remove under sampled} -df_rtm_chla_c1 <- df_rtm_chla %>% - filter( - Year > 2012, - !StationCode %in% c("RMB", "TOE") - ) %>% - mutate(StationCode = fct_drop(StationCode)) - -df_fa_dates_c1 <- df_fa_dates %>% filter(Year > 2012) -``` - -Read in the phyto data -```{Ted Data} -load("C:/R/ND-FASTR/phyto_code/GAMs/phyto.sum.RData") - -df_phyto <- phyto.sum %>% - mutate(Date = date(DateTime)) %>% - select(Year,Date,StationCode,Region,ActionPhase,Total.BV.per.L) %>% - mutate(DOY = yday(Date)) %>% - mutate(Year_fct = factor(Year)) %>% - rename("BV_Density" = "Total.BV.per.L") %>% - rename("FlowActionPeriod" = "ActionPhase") %>% - mutate(BV_log = log10(BV_Density)) - -df_phyto <- df_phyto %>% - filter(StationCode != "RMB") %>% - filter(Year >= 2015) - -# Check headers to see if they match Dave's -sort(colnames(df_phyto)) -sort(colnames(df_rtm_chla_c1)) - -``` -Now, let's look at the sample counts by station after removing under-sampled years and stations. - -```{r rtm chla sample counts station 2} -df_phyto %>% - count(Year, FlowActionPeriod, StationCode) %>% - arrange(StationCode) %>% - pivot_wider(names_from = StationCode, values_from = n) %>% - arrange(Year, FlowActionPeriod) %>% - kable() -``` - -RCS and RYI have some gaps, but we'll keep them in for now. - -# Explore sample counts by Region - -We would like to include interaction terms in the model, so we need to look at sample sizes and visuals of the data. - -```{r rtm chla sample counts region} -df_phyto %>% distinct(Region, StationCode) %>% arrange(Region, StationCode) - -df_phyto %>% - count(Year, FlowActionPeriod, Region) %>% - pivot_wider(names_from = Region, values_from = n) %>% - kable() -``` - -It looks like there is an adequate number of samples in each group. - -# Plots - -## Boxplots by Year and Region - -Let's explore the data with some plots. First lets plot the data in boxplots facetted by Year and Region using a log10 scale to see the results better. - -```{r rtm chla boxplot region, fig.height = 9} -df_phyto %>% - ggplot(aes(x = FlowActionPeriod, y = BV_Density, fill = FlowActionPeriod)) + - geom_boxplot() + - facet_grid(rows = vars(Year), cols = vars(Region)) + - scale_y_log10(labels = trans_format("log10", math_format(10^.x))) + - annotation_logticks(sides = "l") -``` - -There may be some interaction between Flow Period, Year, and Region, but it's difficult to see clearly. Its obvious that Upstream is higher than Downstream. Also 2018 and 2019 stand out in the Upstream region - During appears lower than Before and After. - -## Boxplots by Station and Region {#boxplots-station} - -Now let's look at the same boxplots but grouped by Station. First, the stations in the Upstream region: - -```{r rtm chla boxplot station us, fig.width = 8.5, fig.height = 9} -df_phyto %>% - filter(Region == "Upstream") %>% - ggplot(aes(x = FlowActionPeriod, y = BV_Density, fill = FlowActionPeriod)) + - geom_boxplot() + - facet_grid(rows = vars(Year), cols = vars(StationCode)) + - scale_y_log10(labels = trans_format("log10", math_format(10^.x))) + - annotation_logticks(sides = "l") -``` - -The patterns differ by Station, however we'll keep these all in the same region for consistency purposes. - -Next, let's look at the stations in the Downstream region: - -```{r rtm chla boxplot station ds, fig.width = 8, fig.height = 9} -df_phyto %>% - filter(Region == "Downstream") %>% - ggplot(aes(x = FlowActionPeriod, y = BV_Density, fill = FlowActionPeriod)) + - geom_boxplot() + - facet_grid(rows = vars(Year), cols = vars(StationCode)) + - scale_y_log10(labels = trans_format("log10", math_format(10^.x))) + - annotation_logticks(sides = "l") -``` - -The patterns appear to differ by Station, but not as obviously as with the stations in the upstream region. - -## Time-series Plots by Year and Region - -Let's look at time-series plots based on day of year for each Region facetted by Year. The brown shaded areas represent the flow pulse periods for each year. - -```{r rtm chla time series plot point, fig.width = 8, fig.height = 6} -df_phyto %>% - ggplot(aes(x = DOY, y = BV_Density, color = Region)) + - geom_point(size = 1, alpha = 0.2) + - scale_color_viridis_d(option = "plasma", end = 0.8) + - facet_wrap(vars(Year), scales = "free_y") + - geom_rect( - data = df_fa_dates_c1, - aes( - xmin = PreFlowEnd_DOY, - xmax = PostFlowStart_DOY, - ymin = -Inf, - ymax = Inf - ), - inherit.aes = FALSE, - alpha = 0.2, - fill = "brown" - ) + - theme_bw() -``` - -## GAM smooth plots - -Now, let's fit some GAM models to these time-series plots. - -```{r rtm chla time series plot gam, warning = FALSE, fig.width = 8, fig.height = 6} -df_phyto %>% - ggplot(aes(x = DOY, y = BV_Density, color = Region)) + - geom_smooth(method = "gam", formula = y ~ s(x, bs = "tp")) + - scale_color_viridis_d(option = "plasma", end = 0.8) + - facet_wrap(vars(Year), scales = "free_y") + - geom_rect( - data = df_fa_dates_c1, - aes( - xmin = PreFlowEnd_DOY, - xmax = PostFlowStart_DOY, - ymin = -Inf, - ymax = Inf - ), - inherit.aes = FALSE, - alpha = 0.2, - fill = "brown" - ) + - theme_bw() -``` - -These GAMs do a nice job of displaying the general trends for each Region. Overall, the Downstream region didn't vary much through time except for in 2016. There is an obvious decrease in chlorophyll in the Upstream region during flow pulses in 2015 and 2017-2019. - -Since the model will fit the smooth to all the data across years, regions and flow action periods, lets take a look at what that looks like using the log-transformed data. - -```{r rtm chla time series plot gam all data, warning = FALSE} -df_phyto %>% - ggplot(aes(x = DOY, y = BV_log)) + - geom_point(size = 1, alpha = 0.2) + - geom_smooth(method = "gam", formula = y ~ s(x, bs = "tp")) + - theme_bw() -``` - -Finally, let's see how these smooths look if we group by Region. - -```{r rtm chla time series plot gam grp region, warning = FALSE} -df_phyto %>% - ggplot(aes(x = DOY, y = BV_log, color = Region)) + - geom_point(size = 1, alpha = 0.2) + - geom_smooth(method = "gam", formula = y ~ s(x, bs = "tp")) + - scale_color_viridis_d(option = "plasma", end = 0.8) + - theme_bw() -``` - -# GAM Model - -We'll try running a GAM including all two-way interactions between Year, Flow Action Period, and Region, a smooth term for day of year to account for seasonality, and Station as a random effect. First we'll run the GAM without accounting for serial autocorrelation. - -## Initial Model - -```{r gam no autocorr, warning = FALSE} -m_phyto_gam <- gam( - BV_log ~ (Year_fct + FlowActionPeriod + Region)^2 + s(DOY) + s(StationCode, bs = "re"), - data = df_phyto, - method = "REML" -) -``` - -Lets look at the model summary and diagnostics: - -```{r gam no autocorr diag, warning = FALSE} -summary(m_phyto_gam) -appraise(m_phyto_gam) -k.check(m_phyto_gam) -plot(m_phyto_gam, pages = 1, all.terms = TRUE) -acf(residuals(m_phyto_gam)) -Box.test(residuals(m_phyto_gam), lag = 20, type = 'Ljung-Box') -``` - -## Model with k=5 - -The model definitely has residual autocorrelation as indicated by the ACF plot and the Box-Ljung test. The smooth term for day of year may also be overfitted. Let's try a smaller k-value for the smooth first, then lets try to address the residual autocorrelation. - -```{r gam no autocorr k5, warning = FALSE} -m_phyto_gam_k5 <- gam( - BV_log ~ (Year_fct + FlowActionPeriod + Region)^2 + s(DOY, k = 5) + s(StationCode, bs = "re"), - data = df_phyto, - method = "REML" -) - -summary(m_phyto_gam_k5) -k.check(m_phyto_gam_k5) -anova(m_phyto_gam_k5) -plot(m_phyto_gam_k5, pages = 1, all.terms = TRUE) -``` - -## Model with lag1 term - -Changing the k-value to 5 seems to help reduce the "wiggliness" of the smooth term for DOY. Now, lets add a lag1 term to the model to see if that helps with the residual autocorrelation. - -```{r gam lag1, warning = FALSE} -df_phyto_lag1 <- df_phyto %>% - # Fill in missing days for each StationCode-Year combination - group_by(StationCode, Year) %>% - complete(Date = seq.Date(min(Date), max(Date), by = "1 day")) %>% - # Create lag1 of scaled log transformed chlorophyll values - mutate(lag1 = lag(BV_log)) %>% - ungroup() - -m_phyto_gam_lag1 <- gam( - BV_log ~ (Year_fct + FlowActionPeriod + Region)^2 + s(DOY, k = 5) + s(StationCode, bs = "re") + lag1, - data = df_phyto_lag1, - na.action = "na.omit", - method = "REML" -) - -summary(m_chla_gam_lag1) -appraise(m_chla_gam_lag1) -k.check(m_chla_gam_lag1) -plot(m_chla_gam_lag1, pages = 1, all.terms = TRUE) -acf(residuals(m_chla_gam_lag1), na.action = na.pass) -Box.test(residuals(m_chla_gam_lag1), lag = 20, type = 'Ljung-Box') -``` - -Well, adding a lag1 term to the model helped with the residual autocorrelation, but it basically turned the smooth term of day of year into a straight line with a lot of uncertainty. This isn't what we are looking for. After a brief internet search, I found a [blog post](https://fromthebottomoftheheap.net/2014/05/09/modelling-seasonal-data-with-gam/) that suggests using an AR(p) model to account for the correlated residuals. We can give that a try. We'll run AR(1), AR(3), and AR(4) models and compare them using AIC. It wasn't possible running an AR(2) model. - -## Model with AR() correlation structure - -```{r gam with AR comparison, warning = FALSE} -# Define model formula as an object -f_phyto_gam_k5 <- as.formula("BV_log ~ (Year_fct + FlowActionPeriod + Region)^2 + s(DOY, k = 5)") - -# Fit original model with k = 5 and two successive AR(p) models -m_phyto_gamm_k5 <- gamm( - f_phyto_gam_k5, - data = df_phyto, - random = list(StationCode = ~ 1), - method = "REML" -) - -m_phyto_gamm_k5_ar1 <- gamm( - f_phyto_gam_k5, - data = df_phyto, - random = list(StationCode = ~ 1), - correlation = corARMA(form = ~ 1|Year_fct, p = 1), # grouped by Year_fct - method = "REML" -) - -m_phyto_gamm_k5_ar3 <- gamm( - f_phyto_gam_k5, - data = df_phyto, - random = list(StationCode = ~ 1), - correlation = corARMA(form = ~ 1|Year_fct, p = 3), - method = "REML" -) - -m_phyto_gamm_k5_ar4 <- gamm( - f_phyto_gam_k5, - data = df_phyto, - random = list(StationCode = ~ 1), - correlation = corARMA(form = ~ 1|Year_fct, p = 4), - method = "REML" -) - -# Compare models -anova( - m_phyto_gamm_k5$lme, - m_phyto_gamm_k5_ar1$lme, - m_phyto_gamm_k5_ar3$lme, - m_phyto_gamm_k5_ar4$lme -) -``` - -It looks like the AR(3) model has the best fit according to the AIC values. Let's take a closer look at that one. - -```{r gam ar3 diag, warning = FALSE} -# Pull out the residuals and the GAM model -resid_ar3 <- residuals(m_phyto_gamm_k5_ar3$lme, type = "normalized") -m_phyto_gamm_k5_ar3_gam <- m_phyto_gamm_k5_ar3$gam - -summary(m_phyto_gamm_k5_ar3_gam) -appraise(m_phyto_gamm_k5_ar3_gam) -k.check(m_phyto_gamm_k5_ar3_gam) -plot(m_phyto_gamm_k5_ar3_gam, pages = 1, all.terms = TRUE) -acf(resid_ar3) -Box.test(resid_ar3, lag = 20, type = 'Ljung-Box') -``` - -The AR(3) model has much less residual autocorrelation, and the diagnostics plots look pretty good. What does the ANOVA table look like? - -```{r gam ar3 anova, warning = FALSE} -# the anova.gam function is similar to a type 3 ANOVA -anova(m_phyto_gamm_k5_ar3_gam) -``` - -## AR(3) Model - -Only Region and the Year:Region interaction was significant among the parametric terms of the model. Let's re-run the AR(3) GAM model dropping the two interactions that weren't significant. - -```{r gam ar3 less interaction, warning = FALSE} -m_phyto_gamm_k5_ar3b <- gamm( - BV_log ~ Year_fct * Region + FlowActionPeriod + s(DOY, k = 5), - data = df_phyto, - random = list(StationCode = ~ 1), - correlation = corARMA(form = ~ 1|Year_fct, p = 3), - method = "REML" -) -``` - -```{r gam ar3 less interaction diag, warning = FALSE} -# Pull out the residuals and the GAM model -resid_ar3b <- residuals(m_phyto_gamm_k5_ar3b$lme, type = "normalized") -m_phyto_gamm_k5_ar3b_gam <- m_phyto_gamm_k5_ar3b$gam - -summary(m_phyto_gamm_k5_ar3b_gam) -appraise(m_phyto_gamm_k5_ar3b_gam) -k.check(m_phyto_gamm_k5_ar3b_gam) -plot(m_phyto_gamm_k5_ar3b_gam, pages = 1, all.terms = TRUE) -acf(resid_ar3b) -Box.test(resid_ar3b, lag = 20, type = 'Ljung-Box') -anova(m_phyto_gamm_k5_ar3b_gam) -``` - -Only the main effect of Region and the Year:Region interaction is significant in the model. Let's take a closer look at the pairwise contrasts. - -```{r gam ar3 contrasts, warning = FALSE} -# Contrasts in Region main effect -emmeans(m_phyto_gamm_k5_ar3b, specs = pairwise ~ Region, data = df_phyto) - -# Contrasts in Region for each Year -emmeans(m_phyto_gamm_k5_ar3b, specs = pairwise ~ Region | Year_fct, data = df_phyto) - -# Contrasts in Year for each Region -em_gamm_yr_reg <- emmeans(m_phyto_gamm_k5_ar3b, specs = pairwise ~ Year_fct | Region, data = df_phyto) -multcomp::cld(em_gamm_yr_reg$emmeans, sort = FALSE, Letters = letters) -``` - -Upstream was always higher than downstream in all years. None of the years differed significantly in the upstream region. In the downstream region, 2018 was lower than 2013-2017, and 2019 was lower than 2013-2016. - -# Compare to linear mixed effects model - -Let's compare the results of the GAM model to a linear mixed effects model like we used in the FASTR report. We'll start by adding a lag1 term to account for residual autocorrelation. - -```{r lme lag1} -m_phyto_lmer_lag1 <- - lmer( - BV_log ~ (Year_fct + FlowActionPeriod + Region)^2 + lag1 + (1|StationCode), - data = df_phyto_lag1 - ) - -# Pull out residuals and look at autocorrelation -resid_lmer_lag1 <- residuals(m_phyto_lag1) -acf(resid_lmer_lag1) -Box.test(resid_lmer_lag1, lag = 20, type = 'Ljung-Box') -``` - -There is still some residual autocorrelation so lets add a second lag term, lag2. - -```{r lme lag2} -df_rtm_chla_lag2 <- df_rtm_chla_c1 %>% - # Fill in missing days for each StationCode-Year combination - group_by(StationCode, Year) %>% - complete(Date = seq.Date(min(Date), max(Date), by = "1 day")) %>% - # Create lag1 of scaled log transformed chlorophyll values - mutate( - lag1 = lag(Chla_log), - lag2 = lag(Chla_log, 2) - ) %>% - ungroup() - -m_chla_lmer_lag2 <- - lmer( - Chla_log ~ (Year_fct + FlowActionPeriod + Region)^2 + lag1 + lag2 + (1|StationCode), - data = df_rtm_chla_lag2 - ) - -# Pull out residuals and look at autocorrelation -resid_lmer_lag2 <- residuals(m_chla_lmer_lag2) -acf(resid_lmer_lag2) -Box.test(resid_lmer_lag2, lag = 20, type = 'Ljung-Box') -``` - -There is still some residual autocorrelation, but less than with the lag1 model. Let's go with this one for now. Let's look at its diagnostic plots. - -```{r lme lag2 diag} -plot(resid_lmer_lag2) -qqnorm(resid_lmer_lag2) -hist(resid_lmer_lag2) -``` - -The residuals look a little funky. Let's take a look at the ANOVA table - -```{r lme lag2 anova} -Anova(m_chla_lmer_lag2, type = 3, contrasts=list(topic=contr.sum, sys=contr.sum)) -``` - -As with the GAM model, only the Region main effect and the Year:Region interaction terms are significant. Let's re-run the model dropping the two interactions that weren't significant. - -```{r lme lag2 less interaction} -m_chla_lmer_lag2b <- - lmer( - Chla_log ~ Year_fct * Region + FlowActionPeriod + lag1 + lag2 + (1|StationCode), - data = df_rtm_chla_lag2 - ) -``` - -```{r lme lag2 less interaction diag} -# Pull out residuals and look at autocorrelation and diagnostic plots -resid_lmer_lag2b <- residuals(m_chla_lmer_lag2b) -acf(resid_lmer_lag2b) -Box.test(resid_lmer_lag2b, lag = 20, type = 'Ljung-Box') -plot(resid_lmer_lag2b) -qqnorm(resid_lmer_lag2b) -hist(resid_lmer_lag2b) - -``` - -The residuals still look a little strange. Let's look at the ANOVA table. - -```{r lme lag2 less interaction anova} -Anova(m_chla_lmer_lag2b, type = 3, contrasts=list(topic=contr.sum, sys=contr.sum)) -``` - -Again, only the main effect of Region and the Year:Region interaction is significant in the model. Let's take a closer look at the pairwise contrasts. - -```{r lme lag2 contrasts, warning = FALSE, message = FALSE} -# Contrasts in Region main effect -emmeans(m_chla_lmer_lag2b, specs = pairwise ~ Region, adjust = "sidak") - -# Contrasts in Region for each Year -emmeans(m_chla_lmer_lag2b, specs = pairwise ~ Region | Year_fct, adjust = "sidak") - -# Contrasts in Year for each Region -em_lmer_yr_reg <- emmeans(m_chla_lmer_lag2b, specs = pairwise ~ Year_fct | Region, adjust = "sidak") -multcomp::cld(em_lmer_yr_reg$emmeans, sort = FALSE, Letters = letters) -``` - -These results are very similar to the GAM model results. Like with the GAM model, upstream was always higher than downstream in all years, and none of the years differed significantly in the upstream region. In the downstream region, 2018 was lower than 2013-2016, and 2019 was lower than 2016. - -# Next Steps - -It's encouraging that the GAM and LMER models had similar results. Unfortunately, neither found a significant effect of flow action period. Looking at the [station level plots](#boxplots-station), it does appear that there is a chlorophyll response to flow action period at some of the stations. I wonder if it makes sense to run the models at the station level, choosing a few representative stations in our analysis. - diff --git a/phyto-final/NMDS 2014 .RData b/phyto-final/NMDS 2014 .RData deleted file mode 100644 index 77df211..0000000 Binary files a/phyto-final/NMDS 2014 .RData and /dev/null differ diff --git a/phyto-final/analyses/ANOSIM - Year - ActionPhase - Region.xlsx b/phyto-final/analyses/ANOSIM - Year - ActionPhase - Region.xlsx deleted file mode 100644 index f3449b7..0000000 Binary files a/phyto-final/analyses/ANOSIM - Year - ActionPhase - Region.xlsx and /dev/null differ diff --git a/phyto-final/analyses/FASTR.emmeans.xlsx b/phyto-final/analyses/FASTR.emmeans.xlsx deleted file mode 100644 index 95e57c7..0000000 Binary files a/phyto-final/analyses/FASTR.emmeans.xlsx and /dev/null differ diff --git a/phyto-final/analyses/NMDS_stress.csv b/phyto-final/analyses/NMDS_stress.csv deleted file mode 100644 index 9fa1ffb..0000000 --- a/phyto-final/analyses/NMDS_stress.csv +++ /dev/null @@ -1,7 +0,0 @@ -Year,stress -2014,0.17163402568543384 -2015,0.2130854869008491 -2016,0.19441416194649594 -2017,0.1880684251075979 -2018,0.1938806232120082 -2019,0.17471702358438293 diff --git a/phyto-final/analyses/phyto.grp.error.csv b/phyto-final/analyses/phyto.grp.error.csv deleted file mode 100644 index b6c3963..0000000 --- a/phyto-final/analyses/phyto.grp.error.csv +++ /dev/null @@ -1,289 +0,0 @@ -"","Year","ActionPhase","Region","Group","N","BV.um3.per.L","sd","se","ci" -"1",2014,"Before","Upstream","Ciliates",11,0,0,0,0 -"2",2014,"Before","Upstream","Cryptophytes",11,0,0,0,0 -"3",2014,"Before","Upstream","Cyanobacteria",11,126572846.490909,95470577.8314279,28785462.2895699,64138006.8997764 -"4",2014,"Before","Upstream","Diatoms",11,1480688415.24545,3256214297.61782,981785551.108087,2187554530.74268 -"5",2014,"Before","Upstream","Dinoflagellates",11,0,0,0,0 -"6",2014,"Before","Upstream","Golden Algae",11,0,0,0,0 -"7",2014,"Before","Upstream","Green Algae",11,553499583.4,1818580450.14417,548322636.745802,1221738970.35688 -"8",2014,"Before","Upstream","Other",11,0,0,0,0 -"9",2014,"Before","Downstream","Ciliates",3,0,0,0,0 -"10",2014,"Before","Downstream","Cryptophytes",3,0,0,0,0 -"11",2014,"Before","Downstream","Cyanobacteria",3,137840356.133333,147653482.196936,85247777.6931865,366791583.396664 -"12",2014,"Before","Downstream","Diatoms",3,532962058,500651264.342248,289051142.238123,1243686686.18806 -"13",2014,"Before","Downstream","Dinoflagellates",3,0,0,0,0 -"14",2014,"Before","Downstream","Golden Algae",3,3068065,5314044.46092381,3068065,13200818.2472988 -"15",2014,"Before","Downstream","Green Algae",3,15232040.2,13662818.229829,7888231.78288068,33940322.013508 -"16",2014,"Before","Downstream","Other",3,0,0,0,0 -"17",2014,"During","Upstream","Ciliates",3,0,0,0,0 -"18",2014,"During","Upstream","Cryptophytes",3,0,0,0,0 -"19",2014,"During","Upstream","Cyanobacteria",3,50829914.0666667,26914146.3526698,15538889.6417229,66858445.9342347 -"20",2014,"During","Upstream","Diatoms",3,558965326.5,481352641.876218,277909077.362372,1195746250.33536 -"21",2014,"During","Upstream","Dinoflagellates",3,0,0,0,0 -"22",2014,"During","Upstream","Golden Algae",3,0,0,0,0 -"23",2014,"During","Upstream","Green Algae",3,7492896.63333333,12303842.5134836,7103626.78722654,30564439.1871817 -"24",2014,"During","Upstream","Other",3,0,0,0,0 -"25",2014,"During","Downstream","Ciliates",2,0,0,0,0 -"26",2014,"During","Downstream","Cryptophytes",2,22560671.4,14046856.3790786,9932627.4,126205997.312539 -"27",2014,"During","Downstream","Cyanobacteria",2,383050983.35,271530391.814114,192000981.35,2439603778.57956 -"28",2014,"During","Downstream","Diatoms",2,78350238.45,99367719.7269915,70263588.45,892783540.344019 -"29",2014,"During","Downstream","Dinoflagellates",2,0,0,0,0 -"30",2014,"During","Downstream","Golden Algae",2,0,0,0,0 -"31",2014,"During","Downstream","Green Algae",2,0,0,0,0 -"32",2014,"During","Downstream","Other",2,0,0,0,0 -"33",2014,"After","Upstream","Ciliates",16,0,0,0,0 -"34",2014,"After","Upstream","Cryptophytes",16,21586860.4625,63607175.6034255,15901793.9008564,33893871.3835655 -"35",2014,"After","Upstream","Cyanobacteria",16,248179717.634375,451185833.721614,112796458.430403,240419960.062235 -"36",2014,"After","Upstream","Diatoms",16,1512188262.83125,2103896076.29147,525974019.072867,1121087083.92911 -"37",2014,"After","Upstream","Dinoflagellates",16,0,0,0,0 -"38",2014,"After","Upstream","Golden Algae",16,16890661.525,36040294.1945079,9010073.54862697,19204517.1706811 -"39",2014,"After","Upstream","Green Algae",16,90917122.1875,301209030.490539,75302257.6226349,160502962.78939 -"40",2014,"After","Upstream","Other",16,6239842.6,15549202.4318082,3887300.60795206,8285585.11427365 -"41",2014,"After","Downstream","Ciliates",4,0,0,0,0 -"42",2014,"After","Downstream","Cryptophytes",4,32653982.6,65307965.2,32653982.6,103919546.278168 -"43",2014,"After","Downstream","Cyanobacteria",4,105817916.525,83985542.1746223,41992771.0873111,133639739.195438 -"44",2014,"After","Downstream","Diatoms",4,317758449.25,421347838.041111,210673919.020555,670458435.206605 -"45",2014,"After","Downstream","Dinoflagellates",4,0,0,0,0 -"46",2014,"After","Downstream","Golden Algae",4,0,0,0,0 -"47",2014,"After","Downstream","Green Algae",4,36067392.9,41336454.856242,20668227.428121,65775524.0153871 -"48",2014,"After","Downstream","Other",4,0,0,0,0 -"49",2015,"Before","Upstream","Ciliates",19,0,0,0,0 -"50",2015,"Before","Upstream","Cryptophytes",19,58647247.3473684,109142578.545482,25039024.7535372,52605038.9708471 -"51",2015,"Before","Upstream","Cyanobacteria",19,517643782.505263,552134316.233461,126668299.35382,266120221.912291 -"52",2015,"Before","Upstream","Diatoms",19,1146435400.79474,2094903352.31489,480603789.95922,1009711094.9487 -"53",2015,"Before","Upstream","Dinoflagellates",19,367441735.373684,1156885702.09858,265407782.351306,557601059.593356 -"54",2015,"Before","Upstream","Golden Algae",19,2107096.36842105,9184620.13424891,2107096.36842105,4426845.20132764 -"55",2015,"Before","Upstream","Green Algae",19,185675503.073684,253835064.373691,58233757.5753716,122344584.776152 -"56",2015,"Before","Upstream","Other",19,184177920.126316,423635018.987824,97188538.7743611,204185543.169876 -"57",2015,"Before","Downstream","Ciliates",12,0,0,0,0 -"58",2015,"Before","Downstream","Cryptophytes",12,19837275.6083333,45998396.7374571,13278593.369331,29225986.9527888 -"59",2015,"Before","Downstream","Cyanobacteria",12,956465867.716667,1433901151.89993,413931608.02037,911057326.545704 -"60",2015,"Before","Downstream","Diatoms",12,246578245.408333,194227624.126755,56068685.5368217,123406344.812389 -"61",2015,"Before","Downstream","Dinoflagellates",12,0,0,0,0 -"62",2015,"Before","Downstream","Golden Algae",12,510465.8,1768305.40225259,510465.8,1123527.65053431 -"63",2015,"Before","Downstream","Green Algae",12,62613665.3916667,78096539.0028358,22544528.9080327,49620173.5678369 -"64",2015,"Before","Downstream","Other",12,1033332.3,3579568.08940401,1033332.3,2274349.05774336 -"65",2015,"During","Upstream","Ciliates",30,0,0,0,0 -"66",2015,"During","Upstream","Cryptophytes",30,107585306.81,248153037.419129,45306338.7692936,92661867.0274653 -"67",2015,"During","Upstream","Cyanobacteria",30,112198744.266667,117864310.503042,21518980.529103,44011256.846598 -"68",2015,"During","Upstream","Diatoms",30,3384149676.80333,4868342423.74424,888833654.381365,1817868936.8659 -"69",2015,"During","Upstream","Dinoflagellates",30,148812750.343333,519687491.920942,94881520.7261278,194054498.679705 -"70",2015,"During","Upstream","Golden Algae",30,15359994.3533333,29513274.5452757,5388362.07143017,11020437.8310326 -"71",2015,"During","Upstream","Green Algae",30,142623218.99,161966310.308372,29570867.2372589,60479214.2172127 -"72",2015,"During","Upstream","Other",30,252028085.046667,795212444.763106,145185264.668528,296937206.900955 -"73",2015,"During","Downstream","Ciliates",24,426638.7125,2090094.30028594,426638.7125,882569.419512509 -"74",2015,"During","Downstream","Cryptophytes",24,34887576.0083333,55868740.2413495,11404158.8469503,23591299.9891714 -"75",2015,"During","Downstream","Cyanobacteria",24,182894585.258333,148756345.61618,30364761.8967286,62814295.7862299 -"76",2015,"During","Downstream","Diatoms",24,270206428.529167,524526778.678087,107068580.348923,221488233.575563 -"77",2015,"During","Downstream","Dinoflagellates",24,0,0,0,0 -"78",2015,"During","Downstream","Golden Algae",24,1750523.3,4926869.30249539,1005692.98504132,2080434.44725077 -"79",2015,"During","Downstream","Green Algae",24,47153721.9458333,45052753.388154,9196354.77570195,19024109.2948694 -"80",2015,"During","Downstream","Other",24,0,0,0,0 -"81",2015,"After","Upstream","Ciliates",19,124916260.605263,381612933.067396,87548011.0941541,183931546.086975 -"82",2015,"After","Upstream","Cryptophytes",19,22107452.6210526,34896218.7077072,8005741.62413627,16819439.026623 -"83",2015,"After","Upstream","Cyanobacteria",19,132866548.963158,373937276.484327,85787094.7062109,180231998.036524 -"84",2015,"After","Upstream","Diatoms",19,699501746.210526,1008069294.72825,231266957.04246,485873847.229982 -"85",2015,"After","Upstream","Dinoflagellates",19,57197811.2894737,249319479.202526,57197811.2894737,120168142.391603 -"86",2015,"After","Upstream","Golden Algae",19,103383344.826316,165673034.983795,38008000.9033707,79851846.8033928 -"87",2015,"After","Upstream","Green Algae",19,1165667147.77368,2148161096.379,492821954.397979,1035380505.90938 -"88",2015,"After","Upstream","Other",19,262665349.2,755377869.768018,173295568.342411,364080479.006669 -"89",2015,"After","Downstream","Ciliates",16,0,0,0,0 -"90",2015,"After","Downstream","Cryptophytes",16,67021131.125,64639676.4166498,16159919.1041624,34444052.2308498 -"91",2015,"After","Downstream","Cyanobacteria",16,77358909.96375,157442528.434734,39360632.1086835,83895201.4209989 -"92",2015,"After","Downstream","Diatoms",16,363064027.45,714976584.059836,178744146.014959,380984128.795054 -"93",2015,"After","Downstream","Dinoflagellates",16,0,0,0,0 -"94",2015,"After","Downstream","Golden Algae",16,521968.45625,1472382.90572416,368095.726431041,784577.468823937 -"95",2015,"After","Downstream","Green Algae",16,27626283.075,24230075.0670361,6057518.76675902,12911295.6226283 -"96",2015,"After","Downstream","Other",16,6740233.5,26960934,6740233.5,14366467.6305418 -"97",2016,"Before","Upstream","Ciliates",15,0,0,0,0 -"98",2016,"Before","Upstream","Cryptophytes",15,1872784969.62,3682301336.95885,950766116.917262,2039190510.88745 -"99",2016,"Before","Upstream","Cyanobacteria",15,358359150.013333,226448162.848461,58468664.2327568,125402812.706753 -"100",2016,"Before","Upstream","Diatoms",15,4465710409.71333,8344999864.55897,2154669699.96934,4621306889.35408 -"101",2016,"Before","Upstream","Dinoflagellates",15,75954151.44,294169163.602436,75954151.44,162905452.900605 -"102",2016,"Before","Upstream","Golden Algae",15,78653116.0733333,146182024.033829,37744036.3065274,80952906.6185263 -"103",2016,"Before","Upstream","Green Algae",15,868601644.733333,1233659380.51378,318529482.374829,683177793.506881 -"104",2016,"Before","Upstream","Other",15,273229932.093333,732868381.440822,189225802.418819,405848982.038448 -"105",2016,"Before","Downstream","Ciliates",8,0,0,0,0 -"106",2016,"Before","Downstream","Cryptophytes",8,1289229958.225,1987279532.2626,702609416.688057,1661407266.09804 -"107",2016,"Before","Downstream","Cyanobacteria",8,146622314.3,164594196.743205,58192836.3305365,137604192.056156 -"108",2016,"Before","Downstream","Diatoms",8,4404194307.7375,7611364393.52437,2691023688.37146,6363259875.13381 -"109",2016,"Before","Downstream","Dinoflagellates",8,0,0,0,0 -"110",2016,"Before","Downstream","Golden Algae",8,0,0,0,0 -"111",2016,"Before","Downstream","Green Algae",8,272311830.4875,530342748.16159,187504476.789085,443377633.097686 -"112",2016,"Before","Downstream","Other",8,0,0,0,0 -"113",2016,"During","Upstream","Ciliates",15,0,0,0,0 -"114",2016,"During","Upstream","Cryptophytes",15,781348128.946667,2399848614.75154,619638247.89011,1328991865.39942 -"115",2016,"During","Upstream","Cyanobacteria",15,261104158.62,434385989.001604,112157980.081937,240554942.623488 -"116",2016,"During","Upstream","Diatoms",15,3066358682.81333,5364416885.36862,1385086483.9431,2970715052.37604 -"117",2016,"During","Upstream","Dinoflagellates",15,0,0,0,0 -"118",2016,"During","Upstream","Golden Algae",15,7122164.05333333,27584022.7675171,7122164.05333333,15275522.650756 -"119",2016,"During","Upstream","Green Algae",15,450610309.486667,1005427740.93467,259600326.430328,556787324.306883 -"120",2016,"During","Upstream","Other",15,212697996.386667,380996218.181717,98372800.5323866,210988673.035056 -"121",2016,"During","Downstream","Ciliates",12,0,0,0,0 -"122",2016,"During","Downstream","Cryptophytes",12,332959545.516667,798579508.396396,230530047.070989,507393212.558474 -"123",2016,"During","Downstream","Cyanobacteria",12,175517126.291667,281840687.136039,81360398.2932905,179073029.262678 -"124",2016,"During","Downstream","Diatoms",12,3745470874.8,4756372569.2944,1373046491.62414,3022054952.18061 -"125",2016,"During","Downstream","Dinoflagellates",12,0,0,0,0 -"126",2016,"During","Downstream","Golden Algae",12,0,0,0,0 -"127",2016,"During","Downstream","Green Algae",12,278007959.383333,776475218.12809,224149088.102663,493348816.562034 -"128",2016,"During","Downstream","Other",12,0,0,0,0 -"129",2016,"After","Upstream","Ciliates",27,0,0,0,0 -"130",2016,"After","Upstream","Cryptophytes",27,476482163.007407,1075797270.93334,207037281.322276,425571226.654524 -"131",2016,"After","Upstream","Cyanobacteria",27,260881880.803704,305122650.780183,58720881.5212636,120702500.630018 -"132",2016,"After","Upstream","Diatoms",27,1934492455.50741,1774347381.06809,341473312.698523,701908446.762718 -"133",2016,"After","Upstream","Dinoflagellates",27,0,0,0,0 -"134",2016,"After","Upstream","Golden Algae",27,8108407.3037037,42132520.2554322,8108407.3037037,16667069.9132698 -"135",2016,"After","Upstream","Green Algae",27,701580200.414815,1158521995.87642,222957662.06045,458296037.936245 -"136",2016,"After","Upstream","Other",27,452867376.255556,2057041475.62649,395877816.562392,813738506.04966 -"137",2016,"After","Downstream","Ciliates",26,0,0,0,0 -"138",2016,"After","Downstream","Cryptophytes",26,114655484.265385,320903202.90976,62934295.9081265,129615608.713171 -"139",2016,"After","Downstream","Cyanobacteria",26,91497927.2115385,143462044.531598,28135221.712559,57945573.8072769 -"140",2016,"After","Downstream","Diatoms",26,3288557295.15769,5228265390.38606,1025347201.83924,2111742092.14564 -"141",2016,"After","Downstream","Dinoflagellates",26,0,0,0,0 -"142",2016,"After","Downstream","Golden Algae",26,717524.5,3658671.42698091,717524.5,1477769.37029503 -"143",2016,"After","Downstream","Green Algae",26,62350827.3192308,121711506.961942,23869590.3472202,49160341.558528 -"144",2016,"After","Downstream","Other",26,4073511.68076923,17681556.9613927,3467638.61449399,7141735.41356641 -"145",2017,"Before","Upstream","Ciliates",30,0,0,0,0 -"146",2017,"Before","Upstream","Cryptophytes",30,747491720.57,1211510756.41277,221190589.982474,452385551.192977 -"147",2017,"Before","Upstream","Cyanobacteria",30,458911357.913333,432736227.177552,79006464.3582751,161586362.825645 -"148",2017,"Before","Upstream","Diatoms",30,2069541903.06,3509816100.62667,640801817.002683,1310586870.86638 -"149",2017,"Before","Upstream","Dinoflagellates",30,713572418.65,2740045373.91217,500261553.293124,1023149757.61445 -"150",2017,"Before","Upstream","Golden Algae",30,167959341.266667,535652595.138635,97796336.4478708,200015966.195168 -"151",2017,"Before","Upstream","Green Algae",30,1411622707.6,2059065539.05548,375932214.374073,768867708.27044 -"152",2017,"Before","Upstream","Other",30,1466292661.33333,5756564521.94298,1051000080.8007,2149536519.13747 -"153",2017,"Before","Downstream","Ciliates",24,0,0,0,0 -"154",2017,"Before","Downstream","Cryptophytes",24,93162964.85,116050103.981896,23688628.2793813,49003661.1705299 -"155",2017,"Before","Downstream","Cyanobacteria",24,50282976.0083333,37610295.8824237,7677169.49891978,15881435.1104174 -"156",2017,"Before","Downstream","Diatoms",24,628433631.0875,1587900066.01001,324128743.688021,670511392.38579 -"157",2017,"Before","Downstream","Dinoflagellates",24,0,0,0,0 -"158",2017,"Before","Downstream","Golden Algae",24,1939045.08333333,5213235.63938149,1064147.26878139,2201356.34617127 -"159",2017,"Before","Downstream","Green Algae",24,91755450.2958333,110474406.882394,22550493.8748734,46649250.7729649 -"160",2017,"Before","Downstream","Other",24,2119161.2,9710512.17242758,1982149.99696105,4100389.67620556 -"161",2017,"During","Upstream","Ciliates",15,0,0,0,0 -"162",2017,"During","Upstream","Cryptophytes",15,750797054.8,902863019.229422,233118229.158807,499988874.61078 -"163",2017,"During","Upstream","Cyanobacteria",15,396564092.76,292407966.96884,75499412.4245789,161930134.713853 -"164",2017,"During","Upstream","Diatoms",15,1274223463.08667,1189676212.25898,307173077.163878,658820726.787833 -"165",2017,"During","Upstream","Dinoflagellates",15,0,0,0,0 -"166",2017,"During","Upstream","Golden Algae",15,0,0,0,0 -"167",2017,"During","Upstream","Green Algae",15,1205276607.64,1680535335.76047,433912357.874227,930649448.891667 -"168",2017,"During","Upstream","Other",15,206816023.22,778242637.894125,200941385.058165,430976407.724518 -"169",2017,"During","Downstream","Ciliates",13,0,0,0,0 -"170",2017,"During","Downstream","Cryptophytes",13,154002771.023077,214774684.846013,59567779.9141476,129787043.111739 -"171",2017,"During","Downstream","Cyanobacteria",13,32172463.1153846,9830331.81113735,2726443.4922985,5940410.06038269 -"172",2017,"During","Downstream","Diatoms",13,490368109.023077,548291023.215598,152068569.083112,331328949.307421 -"173",2017,"During","Downstream","Dinoflagellates",13,0,0,0,0 -"174",2017,"During","Downstream","Golden Algae",13,39381613.0076923,134342067.460745,37259785.5908892,81182098.8760794 -"175",2017,"During","Downstream","Green Algae",13,138497286.738462,171319414.119732,47515456.3147032,103527285.825968 -"176",2017,"During","Downstream","Other",13,348336.4,1255944.75131053,348336.4,758959.817360096 -"177",2017,"After","Upstream","Ciliates",8,0,0,0,0 -"178",2017,"After","Upstream","Cryptophytes",8,376957541.95,545100732.753335,192722212.27982,455715616.977474 -"179",2017,"After","Upstream","Cyanobacteria",8,166568500.5775,153829147.340014,54386816.6141343,128604385.532711 -"180",2017,"After","Upstream","Diatoms",8,427610468.325,488789326.457759,172813123.654943,408638103.187981 -"181",2017,"After","Upstream","Dinoflagellates",8,0,0,0,0 -"182",2017,"After","Upstream","Golden Algae",8,0,0,0,0 -"183",2017,"After","Upstream","Green Algae",8,726173347.8125,1270283209.43208,449112935.708416,1061983339.48015 -"184",2017,"After","Upstream","Other",8,0,0,0,0 -"185",2017,"After","Downstream","Ciliates",9,0,0,0,0 -"186",2017,"After","Downstream","Cryptophytes",9,267661755.922222,228480092.541336,76160030.8471119,175625346.070717 -"187",2017,"After","Downstream","Cyanobacteria",9,49936047.4877778,22114531.9800603,7371510.66002009,16998734.0647079 -"188",2017,"After","Downstream","Diatoms",9,369048806.122222,266414581.7049,88804860.5682999,204784375.696729 -"189",2017,"After","Downstream","Dinoflagellates",9,534432621.988889,1063470531.92867,354490177.309555,817455814.765092 -"190",2017,"After","Downstream","Golden Algae",9,23994731.2444444,41841041.8047678,13947013.9349226,32161871.8076817 -"191",2017,"After","Downstream","Green Algae",9,127074587.966667,75124162.2928684,25041387.4309561,57745542.9670345 -"192",2017,"After","Downstream","Other",9,623396.8,1302316.79644911,434105.598816371,1001049.30598583 -"193",2018,"Before","Upstream","Ciliates",25,853862095.46,4269310477.3,853862095.46,1762284750.64859 -"194",2018,"Before","Upstream","Cryptophytes",25,299540094.58,739752792.214211,147950558.442842,305354944.762245 -"195",2018,"Before","Upstream","Cyanobacteria",25,606597931.7248,1308910097.7883,261782019.557659,540291533.625133 -"196",2018,"Before","Upstream","Diatoms",25,3239486400.676,13103351389.9174,2620670277.98349,5408797617.23144 -"197",2018,"Before","Upstream","Dinoflagellates",25,323524559.24,1617622796.2,323524559.24,667721872.466777 -"198",2018,"Before","Upstream","Golden Algae",25,69473128.612,180114297.834358,36022859.5668715,74347528.0457945 -"199",2018,"Before","Upstream","Green Algae",25,1106355233.66,1164172254.9777,232834450.99554,480546688.507147 -"200",2018,"Before","Upstream","Other",25,5515053.608,20912957.5334716,4182591.50669431,8632444.59454399 -"201",2018,"Before","Downstream","Ciliates",20,0,0,0,0 -"202",2018,"Before","Downstream","Cryptophytes",20,53986673.25,118112772.995804,26410818.942962,55278479.3442421 -"203",2018,"Before","Downstream","Cyanobacteria",20,71551811.18,34339778.0270151,7678607.80006594,16071510.8299053 -"204",2018,"Before","Downstream","Diatoms",20,289496045.095,435713519.70463,97428504.8775246,203920204.293696 -"205",2018,"Before","Downstream","Dinoflagellates",20,0,0,0,0 -"206",2018,"Before","Downstream","Golden Algae",20,0,0,0,0 -"207",2018,"Before","Downstream","Green Algae",20,118252154.565,176512181.191371,39469323.6000665,82610243.7061647 -"208",2018,"Before","Downstream","Other",20,801683.93,3109774.59876716,695366.73975455,1455419.31294176 -"209",2018,"During","Upstream","Ciliates",20,0,0,0,0 -"210",2018,"During","Upstream","Cryptophytes",20,150657749.58,421454321.614872,94240051.2541912,197246694.163694 -"211",2018,"During","Upstream","Cyanobacteria",20,99278710.765,80774710.4413716,18061774.3409769,37803728.1609594 -"212",2018,"During","Upstream","Diatoms",20,673197768.23,1141889635.02754,255334284.672401,534420799.734475 -"213",2018,"During","Upstream","Dinoflagellates",20,0,0,0,0 -"214",2018,"During","Upstream","Golden Algae",20,25326178.185,113262112.063864,25326178.185,53008300.147436 -"215",2018,"During","Upstream","Green Algae",20,94421128.36,140904214.833067,31507140.2682972,65945202.4671627 -"216",2018,"During","Upstream","Other",20,325729682.88,1456707426.41826,325729682.88,681760061.50263 -"217",2018,"During","Downstream","Ciliates",18,0,0,0,0 -"218",2018,"During","Downstream","Cryptophytes",18,32780638.6888889,51724340.3432808,12191543.9363783,25721909.3148101 -"219",2018,"During","Downstream","Cyanobacteria",18,31561128.5,18799202.4761933,4431014.51727174,9348623.45414548 -"220",2018,"During","Downstream","Diatoms",18,105071780.05,117523464.345637,27700546.1957785,58443043.878345 -"221",2018,"During","Downstream","Dinoflagellates",18,0,0,0,0 -"222",2018,"During","Downstream","Golden Algae",18,0,0,0,0 -"223",2018,"During","Downstream","Green Algae",18,48034900.7722222,58604446.4021336,13813200.4862107,29143305.5655422 -"224",2018,"During","Downstream","Other",18,215647.366666667,914914.291890131,215647.366666667,454976.173512066 -"225",2018,"After","Upstream","Ciliates",33,0,0,0,0 -"226",2018,"After","Upstream","Cryptophytes",33,154122351.9,441485859.72001,76852823.5976454,156544078.925101 -"227",2018,"After","Upstream","Cyanobacteria",33,227072311.339394,724665695.003838,126148105.537775,256955282.384217 -"228",2018,"After","Upstream","Diatoms",33,385194607.39697,395853251.92648,68909206.197802,140363459.77567 -"229",2018,"After","Upstream","Dinoflagellates",33,0,0,0,0 -"230",2018,"After","Upstream","Golden Algae",33,102194487.712121,255739693.616552,44518566.3995977,90681352.3023831 -"231",2018,"After","Upstream","Green Algae",33,165048778.709091,204416779.404694,35584393.7967822,72483038.2314804 -"232",2018,"After","Upstream","Other",33,76779760.6909091,267081734.821251,46492962.3462647,94703065.2393416 -"233",2018,"After","Downstream","Ciliates",30,0,0,0,0 -"234",2018,"After","Downstream","Cryptophytes",30,63114432.31,85808588.8724849,15666433.2510489,32041453.6715387 -"235",2018,"After","Downstream","Cyanobacteria",30,35968375.4266667,31607304.6689862,5770677.9163807,11802361.5297824 -"236",2018,"After","Downstream","Diatoms",30,158720570.806667,331182306.663734,60465340.0021069,123665505.693941 -"237",2018,"After","Downstream","Dinoflagellates",30,132262161.896667,724429695.752046,132262161.896667,270506494.043617 -"238",2018,"After","Downstream","Golden Algae",30,4029979.79,10900048.9377668,1990067.56037503,4070145.16432573 -"239",2018,"After","Downstream","Green Algae",30,42213898.5166667,67397591.6806153,12305060.4283318,25166674.3362584 -"240",2018,"After","Downstream","Other",30,88082.34,482446.845358396,88082.34,180148.612716411 -"241",2019,"Before","Upstream","Ciliates",14,0,0,0,0 -"242",2019,"Before","Upstream","Cryptophytes",14,91048447.2785714,232369263.672781,62103297.991463,134166018.443725 -"243",2019,"Before","Upstream","Cyanobacteria",14,335207377.985714,560088530.045171,149689956.120777,323385489.390616 -"244",2019,"Before","Upstream","Diatoms",14,869404232.221429,1917003655.15367,512340777.627026,1106844957.4132 -"245",2019,"Before","Upstream","Dinoflagellates",14,0,0,0,0 -"246",2019,"Before","Upstream","Golden Algae",14,18879148,70639303.5701985,18879148,40785919.5999222 -"247",2019,"Before","Upstream","Green Algae",14,362167499.614286,497498156.310112,132961975.104582,287246883.517325 -"248",2019,"Before","Upstream","Other",14,14963916.4857143,40231207.9170209,10752242.59154,23228807.8814472 -"249",2019,"Before","Downstream","Ciliates",8,0,0,0,0 -"250",2019,"Before","Downstream","Cryptophytes",8,0,0,0,0 -"251",2019,"Before","Downstream","Cyanobacteria",8,61069670.15,23129055.1764796,8177355.87886327,19336374.0250649 -"252",2019,"Before","Downstream","Diatoms",8,242670595.5125,278826306.620408,98579986.0922451,233104625.835402 -"253",2019,"Before","Downstream","Dinoflagellates",8,0,0,0,0 -"254",2019,"Before","Downstream","Golden Algae",8,0,0,0,0 -"255",2019,"Before","Downstream","Green Algae",8,18148168.15,15937633.9645585,5634804.52620417,13324195.4356472 -"256",2019,"Before","Downstream","Other",8,11443141.6,25756907.1397612,9106441.85045866,21533313.245314 -"257",2019,"During","Upstream","Ciliates",12,0,0,0,0 -"258",2019,"During","Upstream","Cryptophytes",12,24387712.5,84481514.2607652,24387712.5,53676993.3010814 -"259",2019,"During","Upstream","Cyanobacteria",12,145492360.883333,62440621.8187462,18025054.9077104,39672878.3617075 -"260",2019,"During","Upstream","Diatoms",12,1310578730.20833,3107374871.5766,897021859.288915,1974331800.57271 -"261",2019,"During","Upstream","Dinoflagellates",12,0,0,0,0 -"262",2019,"During","Upstream","Golden Algae",12,0,0,0,0 -"263",2019,"During","Upstream","Green Algae",12,53002668,41019086.6762963,11841190.3672361,26062284.2761068 -"264",2019,"During","Upstream","Other",12,529308.208333333,1833577.41939317,529308.208333333,1164999.51165636 -"265",2019,"During","Downstream","Ciliates",8,0,0,0,0 -"266",2019,"During","Downstream","Cryptophytes",8,9849015,27857221.1780321,9849015,23289219.7233011 -"267",2019,"During","Downstream","Cyanobacteria",8,36976284.9,13905595.3242546,4916370.37510819,11625368.6187931 -"268",2019,"During","Downstream","Diatoms",8,211154836.325,137496239.417889,48612261.6400192,114949732.798763 -"269",2019,"During","Downstream","Dinoflagellates",8,0,0,0,0 -"270",2019,"During","Downstream","Golden Algae",8,0,0,0,0 -"271",2019,"During","Downstream","Green Algae",8,10486907.3875,6939839.57297801,2453603.81119975,5801851.07576342 -"272",2019,"During","Downstream","Other",8,0,0,0,0 -"273",2019,"After","Upstream","Ciliates",14,0,0,0,0 -"274",2019,"After","Upstream","Cryptophytes",14,279513481.871429,472391995.143699,126252071.291593,272751017.631863 -"275",2019,"After","Upstream","Cyanobacteria",14,162077785.228571,170794436.741708,45646733.2753218,98613771.8379223 -"276",2019,"After","Upstream","Diatoms",14,3936861446.39286,6833517157.4193,1826334282.12174,3945555339.31928 -"277",2019,"After","Upstream","Dinoflagellates",14,0,0,0,0 -"278",2019,"After","Upstream","Golden Algae",14,211248682.735714,259997947.730041,69487374.4050123,150118345.684483 -"279",2019,"After","Upstream","Green Algae",14,1608047032.11429,3642639581.11462,973536378.288046,2103197477.5798 -"280",2019,"After","Upstream","Other",14,0,0,0,0 -"281",2019,"After","Downstream","Ciliates",7,0,0,0,0 -"282",2019,"After","Downstream","Cryptophytes",7,98698011.2714286,98279236.0492488,37146059.6611037,90893133.6080928 -"283",2019,"After","Downstream","Cyanobacteria",7,31189573.6571429,17700379.2377472,6690114.51065857,16370120.4816474 -"284",2019,"After","Downstream","Diatoms",7,168251495.185714,131006865.341492,49515940.8193877,121161142.411553 -"285",2019,"After","Downstream","Dinoflagellates",7,0,0,0,0 -"286",2019,"After","Downstream","Golden Algae",7,4493368.77142857,11888336.3181038,4493368.77142857,10994877.2983733 -"287",2019,"After","Downstream","Green Algae",7,9531069.62857143,16336674.7902737,6174682.67782894,15108904.2214392 -"288",2019,"After","Downstream","Other",7,0,0,0,0 diff --git a/phyto-final/analyses/phyto.grp.error.xlsx b/phyto-final/analyses/phyto.grp.error.xlsx deleted file mode 100644 index d5dc793..0000000 Binary files a/phyto-final/analyses/phyto.grp.error.xlsx and /dev/null differ diff --git a/phyto-final/analyses/phyto.yearly.mean.grp.xlsx b/phyto-final/analyses/phyto.yearly.mean.grp.xlsx deleted file mode 100644 index d2df71c..0000000 Binary files a/phyto-final/analyses/phyto.yearly.mean.grp.xlsx and /dev/null differ diff --git a/phyto-final/phyto-data-processing-FASTR.Rmd b/phyto-final/phyto-data-processing-FASTR.Rmd deleted file mode 100644 index 11e968a..0000000 --- a/phyto-final/phyto-data-processing-FASTR.Rmd +++ /dev/null @@ -1,957 +0,0 @@ ---- -title: "Phytoplankton Data Processing for FASTR" -author: "Ted Flynn" -date: "2024-03-18" -output: html_document ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -suppressWarnings(suppressMessages(library(tidyverse))) -suppressWarnings(suppressMessages(library(lubridate))) -suppressWarnings(suppressMessages(library(RColorBrewer))) -suppressWarnings(suppressMessages(library(vegan))) -suppressWarnings(suppressMessages(library(janitor))) -suppressWarnings(suppressMessages(library(here))) - -# Set visual theme in ggplot -theme_set(theme_bw()) - -``` - -## Import EMP Data - -Import phytoplankton data collected by the Environmental Monitoring Program. - -```{r import EMP, echo=FALSE} - -# Import EMP data files -phyto_files_EMP <- list.files(path = here("phyto-final","data","EMP","csv"), - pattern = "\\.csv", - full.names = T) - -df_phyto_EMP <- map(phyto_files_EMP, ~read_csv(.x, show_col_types = FALSE)) %>% - list_rbind() - -# Read in files with non-standard headers individually -df_Dec2021 <- read_csv(here("phyto-final","data","EMP","oddballs","December 2021.csv")) -df_Nov2021 <- read_csv(here("phyto-final","data","EMP","oddballs","November 2021.csv")) -df_Sep2013 <- read_csv(here("phyto-final","data","EMP","oddballs","September 2013.csv")) -df_Nov2013 <- read_csv(here("phyto-final","data","EMP","oddballs","November 2013.csv")) - -# Combine like oddball dfs -df_phyto2013 <- bind_rows(df_Sep2013, df_Nov2013) -df_phyto2021 <- bind_rows(df_Dec2021, df_Nov2021) - -# Remove individual dfs -rm(df_Dec2021) -rm(df_Nov2021) -rm(df_Nov2013) -rm(df_Sep2013) - -# Rename headers to match standard BSA headers Oddballs actually have the -# "correct" name of Total Cells rather than the incorrect "Number of cells per -# unit" - -df_phyto2013 <- df_phyto2013 %>% - rename("Number of cells per unit" = "Total Cells Counted") - -df_phyto2021 <- df_phyto2021 %>% - rename("Number of cells per unit" = "Total Number of Cells") %>% - rename("Unit Abundance" = "Unit Abundance (# of Natural Units)") - -# Combine oddball files with others -df_phyto_EMP <- bind_rows(df_phyto_EMP, df_phyto2013) -df_phyto_EMP <- bind_rows(df_phyto_EMP, df_phyto2021) - -# Remove unneeded dfs -rm(df_phyto2013) -rm(df_phyto2021) - -# Remove empty rows -df_phyto_EMP <- df_phyto_EMP %>% filter_all(any_vars(!is.na(.))) - -# Correct GALD, which is imported into two separate columns -# Test to see if NAs are 'either/or' and that there aren't some rows with a -# value in both GALD and GALD 1 - -sum(is.na(df_phyto_EMP$GALD)) # Total is 5880 -sum(is.na(df_phyto_EMP$`GALD 1`)) # Total is 8072 - -# Sum of NAs is 13952 which is the same as the number of rows in the df. -# This shows that there aren't any rows with two values so that we can -# Combine them without any issues. - -# Move GALD header -df_phyto_EMP <- df_phyto_EMP %>% relocate(`GALD 1`, .after = GALD) - -# Combine both GALD columns -df_phyto_EMP <- df_phyto_EMP %>% - rowwise() %>% - mutate(GALD.Tot = sum(c_across(GALD:`GALD 1`), na.rm = TRUE)) - -# Remove old GALD columns and rename GALD.Tot -df_phyto_EMP <- df_phyto_EMP %>% - select(!(GALD:`GALD 1`)) %>% - rename("GALD" = "GALD.Tot") - -# Clean up column names -df_phyto_EMP <- df_phyto_EMP %>% clean_names(case = "big_camel") - -df_phyto_EMP <- df_phyto_EMP %>% rename("GALD" = "Gald") - -# Remove blank columns -df_phyto_EMP <- df_phyto_EMP %>% select_if(~ !all(is.na(.))) - -# Remove columns that just have the method code "Phyto" as well as -# pre-calculated organisms per mL -df_phyto_EMP <- df_phyto_EMP %>% select(SampleDate:Biovolume10,GALD) - -# Add column to indicate what study it came from -df_phyto_EMP <- df_phyto_EMP %>% mutate(Study = "EMP", .after = StationCode) - -``` - -## Import phyto data collected for FASTR project - -```{r import FASTR} -# Import AEU data files (comment out when finished) - -phyto_files_AEU <- list.files(path = here("phyto-final","data","csv"), - pattern = "\\.csv", - full.names = T) - -df_phyto_FASTR <- map(phyto_files_AEU, ~read_csv(.x, show_col_types = FALSE)) %>% - list_rbind() - -# Remove empty rows -df_phyto_FASTR <- df_phyto_FASTR %>% filter_all(any_vars(!is.na(.))) - -# Remove weird row with only a single zero in biovolume -df_phyto_FASTR <- df_phyto_FASTR %>% drop_na(MethodCode) - -# Clean up column names -df_phyto_FASTR <- df_phyto_FASTR %>% clean_names(case = "big_camel") - -# Filter out samples from other projects. Read in station names with flags and -# merge together. -df_keepers <- read_csv(here("phyto-final","CSVs", "station_names_flagged.csv")) -df_phyto_FASTR <- left_join(df_phyto_FASTR, df_keepers) -rm(df_keepers) - -# Remove data unrelated to project -df_phyto_FASTR <- df_phyto_FASTR %>% filter(Flag == "Keep") - -# Remove flag column -df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Flag)) - -# Confirm station IDs -unique(df_phyto_FASTR$StationCode) # Shows only 10 FASTR stations -table(df_phyto_FASTR$StationCode) - -# Remove blank columns -df_phyto_FASTR <- df_phyto_FASTR %>% select_if(~ !all(is.na(.))) - -# Remove individual measurement columns as they are only present in a small -# number of samples. -df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Dimension:DepthM)) - -# Remove secondary and tertiary GALD measurements as they don't vary much and -# aren't present in EMP data -df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Gald2:Gald3)) - -# Correct GALD, which is imported into two separate columns. Test to see if NAs -# are 'either/or' and that there aren't some rows with a value in both GALD and -# GALD 1 - -sum(is.na(df_phyto_FASTR$Gald)) # Total is 1791 -sum(is.na(df_phyto_FASTR$Gald1)) # Total is 4535 - -# Sum of NAs is 6326 which is the same as the number of rows in the df. This -# shows that there aren't any rows with two values so that we can Combine them -# without any issues. - -# Move Gald1 column -df_phyto_FASTR <- df_phyto_FASTR %>% relocate(Gald1, .after = Gald) - -# Combine both GALD columns -df_phyto_FASTR <- df_phyto_FASTR %>% - rowwise() %>% - mutate(GALD.Tot = sum(c_across(Gald:Gald1), na.rm = TRUE)) - -# Check if rows have two NAs or no NAs in the GALD columns -# test <- df_phyto_FASTR %>% -# mutate(GALD.Value = case_when(is.na(Gald) & is.na(Gald1) ~ "Fix", -# TRUE ~ "Okay")) - -# Remove old GALD columns and rename GALD.Tot -df_phyto_FASTR <- df_phyto_FASTR %>% - select(!(Gald:Gald1)) %>% - rename("GALD" = "GALD.Tot") - -# Remove MethodCode column that just says "Phyto" -df_phyto_FASTR <- df_phyto_FASTR %>% select(!(MethodCode)) - -# Add column to indicate what study it came from -df_phyto_FASTR <- df_phyto_FASTR %>% mutate(Study = "FASTR", .after = StationCode) -``` - -## Combine EMP and AEU data - -```{r combine data} -df_phyto <- bind_rows(df_phyto_EMP, df_phyto_FASTR) - -# Average all 10 biovolume measurements for each taxon -df_phyto <- df_phyto %>% - rowwise() %>% - mutate(BV.Avg = mean(c_across(Biovolume1:Biovolume10), na.rm = T)) %>% - select(!(Biovolume1:Biovolume10)) # Remove Individual Biovolume Columns - -# Remove unneeded columns -df_phyto <- df_phyto %>% select(!c("BsaTin","DiatomSoftBody")) -df_phyto <- df_phyto %>% select(!(ColonyFilamentIndividualGroupCode:Shape)) -df_phyto <- df_phyto %>% select(!(VolumeReceivedML:NumberOfFieldsCounted)) - -# Fix samples with missing times -# Just replace with 12:00:00 b/c aren't doing any time-based analyses -df_phyto <- df_phyto %>% replace_na(list(SampleTime = "12:00:00")) - -# Get dates in the right format. Some are 1/1/14 and others 1/1/2014. -df_phyto$SampleDate <- mdy(df_phyto$SampleDate) - -# Combine date and time column -df_phyto <- df_phyto %>% unite(DateTime, c("SampleDate","SampleTime"), sep = " ") #, remove = FALSE, na.rm = FALSE) - -df_phyto$DateTime <- as_datetime(df_phyto$DateTime, - tz = "US/Pacific", - format = c("%Y-%m-%d %H:%M:%OS")) - -# Check for missing dates -df_phyto %>% filter(is.na(DateTime)) # No missing dates - -# Correct BSA header -df_phyto <- df_phyto %>% rename("TotalCells" = "NumberOfCellsPerUnit") - -# Calculate Unit Density & Biovolume Density -df_phyto <- df_phyto %>% - mutate(Units.per.mL = UnitAbundance * Factor) %>% - mutate(BV.um3.per.mL= TotalCells * BV.Avg * Factor) %>% - mutate(Cells.per.mL = TotalCells * Factor) - -# Remove columns used for density calcs -# Remove Biovolume Columns -df_phyto <- df_phyto %>% select(!(c(Factor,UnitAbundance:TotalCells))) - -# Add column for year and month for highlighting data -df_phyto <- df_phyto %>% - mutate(Year = year(df_phyto$DateTime)) %>% - mutate(Month = month(df_phyto$DateTime, label = T)) - -# Order month in calendar order rather than (default) alphabetical -df_phyto$Month = factor(df_phyto$Month, levels = month.abb) - -# Reorder date/time columns -df_phyto <- df_phyto %>% - relocate(Year, .after = DateTime) %>% - relocate(Month, .after = DateTime) - -# Convert years to factors for plotting -df_phyto$Year <- as.factor(df_phyto$Year) - -# - -``` - -## Clean Combined Data - -```{r data cleaning, echo = FALSE} -# Fix EMP site names -df_phyto$StationCode <- gsub("EZ6 SAC","EZ6",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ6SAC","EZ6",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ6SJR","EZ6-SJR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ2SAC","EZ2",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ2 SAC","EZ2",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ2SJR","EZ2-SJR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ2 SJR","EZ2-SJR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ6 SJR","EZ6-SJR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("D16-Twitchell","D16",df_phyto$StationCode) -df_phyto$StationCode <- gsub("D16-Twitchel","D16",df_phyto$StationCode) -df_phyto$StationCode <- gsub("D16 - Twitchell","D16",df_phyto$StationCode) -df_phyto$StationCode <- gsub("D16 Twitchell","D16",df_phyto$StationCode) -df_phyto$StationCode <- gsub("NZ328","NZ325",df_phyto$StationCode) # Typo in August 2019 -df_phyto$StationCode <- gsub("C3A-HOOD","C3A",df_phyto$StationCode) -df_phyto$StationCode <- gsub("C3A Hood","C3A",df_phyto$StationCode) -df_phyto$StationCode <- gsub("C3A- Hood","C3A",df_phyto$StationCode) -df_phyto$StationCode <- gsub("C3A-Hood","C3A",df_phyto$StationCode) -df_phyto$StationCode <- gsub("NZ542","NZS42",df_phyto$StationCode) -df_phyto$StationCode <- gsub("E26","EZ6",df_phyto$StationCode) -df_phyto$StationCode <- gsub("E22","EZ2",df_phyto$StationCode) # Typo in May 2018 - -# Fix AEU site names -df_phyto$StationCode <- gsub("SHER","SHR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("BL-5","BL5",df_phyto$StationCode) - -# Remove extra Microcystis tows at D19 -df_phyto <- df_phyto %>% filter(StationCode != "D19 MC Tow") - -# In Fall 2016, taxonomists began classifying the species -# Chroococcus microscopicus as Eucapsis microscopica. This is one of the most -# dominant species in this samples, so all taxa previously classified as -# C. microscopicus will be re-named E. microscopica - -df_phyto <- df_phyto %>% - mutate(Taxon = case_when(Taxon == 'Chroococcus microscopicus' ~ 'Eucapsis microscopica', - TRUE ~ Taxon)) %>% - mutate(Genus = case_when(Taxon == 'Eucapsis microscopica' ~ 'Eucapsis', - TRUE ~ Genus)) - -# The taxon Plagioselmis lacustris is inconsistently named, appearing sometimes as -# Rhodomonas lacustris. Change to Rhodomonas lacustris to avoid confusion. - -df_phyto <- df_phyto %>% - mutate(Taxon = case_when(Taxon == 'Plagioselmis lacustris' ~ 'Rhodomonas lacustris', - TRUE ~ Taxon)) %>% - mutate(Genus = case_when(Taxon == 'Rhodomonas lacustris' ~ 'Rhodomonas', - TRUE ~ Genus)) - -# Correct the genus label for a Chlorella entry -df_phyto$Genus <- gsub("cf Chlorella","Chlorella",df_phyto$Genus) - -# Add Taxonomy Data------------------------------------------------------------- - -# Read in CSV with manually-added WoRMS classification -df_taxa <- read_csv(here("phyto-final","CSVs","phyto_group_classification.csv"), - show_col_types = FALSE) - -df_phyto <- left_join(df_phyto, df_taxa) - -# Check if any groups are missing -sum(is.na(df_phyto$Group)) # none missing -rm(df_taxa) - -# Reorder Group classification column -df_phyto <- df_phyto %>% - relocate(Group, .before = Taxon) - - - -``` - -## Analyze FASTR Biovolume Data for Outliers - -```{r outlier analysis} - -# Subset combined dataset to FASTR-only -df_phyto_FASTR <- df_phyto %>% - filter(Study == "FASTR") - -# Identify if outliers are present -boxplot(df_phyto_FASTR$BV.um3.per.mL) - -# Identify 0.1% and 99.9% percentiles of the data -quartiles <- quantile(df_phyto_FASTR$BV.um3.per.mL, - probs=c(0.001, 0.999), - na.rm = TRUE) - -# List upper cutoff (99.9%) -cutoff <- quartiles[2] - -# Filter FASTR dataset to show all taxa above this 99.9% cutoff -df_outliers <- df_phyto_FASTR %>% - filter(BV.um3.per.mL > cutoff) - -list(df_outliers) ## 4 of top 6 are Spirogyra. - -df_phyto_FASTR %>% filter(Genus == "Spirogyra")# Only 5 total samples w/ Spirogyra - -# Decision: Remove Spirogyra from datasets -# 4/5 are above 99.9% cutoff, 5th is also highly abundant -# These are benthic algae that clump, more likely to be "bullseye" samples -# that got a big blob or clump - -# Remove Spirogyra from FASTR and combined dataset -df_phyto_FASTR <- df_phyto_FASTR %>% - filter(Genus != "Spirogyra") - -df_phyto <- df_phyto %>% - filter(Genus != "Spirogyra") - -## Remove 2013 data from FASTR dataset (very limited) -df_phyto_FASTR <- df_phyto_FASTR %>% filter(Year != 2013) - -``` - -## Estimate Phytoplankton Biomass - -```{r biomass calcs} - -# Convert biovolume to biomass using relationships from Menden-Deuer and Lussard -# (2000) doi: 10.4319/lo.2000.45.3.0569 -# Only relevant to group-level data -# Units of BV.Density are um^3 per mL - -# Calculate Biomass (pg-C per cell) from Biovolume (um^3 per cell) -df_phyto_FASTR <- df_phyto_FASTR %>% - mutate(Biomass.pg.C = case_when(Group == "Diatoms" ~ 0.288 * (BV.Avg^0.811), - Group != "Diatoms" ~ 0.216 * (BV.Avg^0.939))) - -# Convert pg to ug (ug-C per L) -df_phyto_FASTR <- df_phyto_FASTR %>% - mutate(Biomass.ug.C = Biomass.pg.C / 10^6, .keep = "unused") - -# Calculate Biomass Density (pg-C per mL) -df_phyto_FASTR <- df_phyto_FASTR %>% - mutate(BM.ug.per.mL = Biomass.ug.C * Cells.per.mL) - - - -``` - -## Add FASTR-Specific Metadata - -```{r FASTR metadata} - -# Import Stations listed as Upstream and Downstream -df_region <- read_csv(here("phyto-final","CSVs","upstream_downstream_stations.csv"), - show_col_types = FALSE) - -df_region <- df_region %>% - rename("Region" = "UpDown") %>% - rename("StationCode" = "Site") - -df_region$Region <- factor(df_region$Region, - levels = c("Upstream","Downstream")) - -## Add Region to data frames -df_phyto_FASTR <- left_join(df_phyto_FASTR, df_region) - -df_phyto_FASTR <- df_phyto_FASTR %>% - relocate(Region, .after = StationCode) - -# Cat Pien's Code for Merging Flow Designations ##### -# -# FlowDesignation <- read_csv("FlowDatesDesignations.csv") -# Update with 45 days limit on either end -# Update 5/27/22 TF added >= to two of the mutate commands to avoid removing -# samples falling on the PreFlowEnd and PostFlowStart dates. - -# Upload flow dates and categories -df_flow_designation <- read_csv(here("phyto-final","CSVs","FlowDatesDesignations_45days.csv"), - show_col_types = FALSE) - -df_flow_designation$Year <- as.factor(df_flow_designation$Year) - -# Format dates as dates -df_flow_designation$PreFlowStart <- mdy(df_flow_designation$PreFlowStart) -df_flow_designation$PreFlowEnd <- mdy(df_flow_designation$PreFlowEnd) -df_flow_designation$PostFlowStart <- mdy(df_flow_designation$PostFlowStart) -df_flow_designation$PostFlowEnd <- mdy(df_flow_designation$PostFlowEnd) - -# Remove column with net flow days -df_flow_designation <- df_flow_designation %>% - select(!(NetFlowDays)) - -# Save file for use in making plots -# save(FlowDesignation, file = "RData/FlowDesignation.RData") - -# Merge data from FlowDesignation Table (Water Year Type, Flow days and type) -df_phyto_FASTR <- inner_join(df_phyto_FASTR, df_flow_designation, - by = "Year") - -# Filter only Pre-During-Post Flow Action Data. -df_phyto_FASTR <- df_phyto_FASTR %>% - mutate(ActionPhase = ifelse(DateTime > PreFlowStart & DateTime < PreFlowEnd, "Before", NA)) %>% - mutate(ActionPhase = replace(ActionPhase, DateTime >= PreFlowEnd & DateTime < PostFlowStart, "During")) %>% - mutate(ActionPhase = replace(ActionPhase, DateTime >= PostFlowStart & DateTime < PostFlowEnd, "After")) %>% - filter(!is.na(ActionPhase)) %>% - select(-c(PreFlowStart:PostFlowEnd)) - -# Order the new ActionPhase so that it plots in the order Pre < During < Post -phase.order <- c("Before","During","After") -df_phyto_FASTR$ActionPhase <- factor(as.character(df_phyto_FASTR$ActionPhase), - levels = phase.order) - -## Add in Flow Pulse Category Data -df_pulse_category <- read_csv(here("phyto-final","CSVs","FlowPulseType.csv"), - show_col_types = FALSE) - -df_pulse_category <- df_pulse_category %>% - rename("FlowPulseCategory" = "FlowPulseType") - -df_pulse_category$Year <- as.factor(df_pulse_category$Year) - -# Add Flow Pulse Category to data frames to be exported -df_phyto_FASTR <- left_join(df_phyto_FASTR, df_pulse_category, - by = "Year") - -# List of stations used for FASTR -stations <- c("RMB","RCS","RD22","I80","LIS","STTD","BL5","LIB","RYI","RVB") - -# Re-order stations from North to South -df_phyto_FASTR$StationCode <- factor(as.character(df_phyto_FASTR$StationCode), - levels = stations) - -## Relocate metadata columns -df_phyto_FASTR <- df_phyto_FASTR %>% - relocate(FlowPulseCategory, .before = ActionPhase) %>% - relocate(WYType:ActionPhase, .before = GALD) - -# Save df to use for making plots -# save(df_phyto, file = "RData/df_phyto.RData") - -``` - -## Create New Data Frames summarizing by Genus and Algal Group - -```{r summarize phyto data} -# Summarize by algal group -df_phyto_FASTR_grp <- df_phyto_FASTR %>% - group_by(Year, Month, Region, ActionPhase, DateTime, StationCode, Group) %>% - summarize(across(Units.per.mL:BM.ug.per.mL, ~sum(.x, na.rm = TRUE))) %>% - ungroup - -## Summarize by genus -df_phyto_FASTR_gen <- df_phyto_FASTR %>% - group_by(Year, Month, Region, ActionPhase, DateTime, StationCode, Genus) %>% - summarize(across(Units.per.mL:BM.ug.per.mL, ~sum(.x, na.rm = TRUE))) %>% - ungroup - -## Set group display order -group.order <- c("Diatoms", - "Cyanobacteria", - "Green Algae", - "Cryptophytes", - "Ciliates", - "Dinoflagellates", - "Golden Algae", - "Other") - -df_phyto_FASTR_grp$Group <- factor(as.character(df_phyto_FASTR_grp$Group), - levels = group.order) - -## Summarize totals -df_phyto_FASTR_sum <- df_phyto_FASTR %>% - group_by(Year, Month, DateTime, StationCode, Region, WYType, FlowPulseType, FlowPulseCategory, ActionPhase) %>% - summarize(across(Units.per.mL:BM.ug.per.mL, ~sum(.x, na.rm = TRUE))) %>% - rename("Total.Units.per.mL" = "Units.per.mL") %>% - rename("Total.Cells.per.mL" = "Cells.per.mL") %>% - rename("Total.BV.per.mL" = "BV.um3.per.mL") %>% - rename("Total.BM.per.mL" = "BM.ug.per.mL") %>% - ungroup - -# Separate out data frames by density type and add zeros -# Biovolume Density + Group -df_phyto_FASTR_grp_BV <- df_phyto_FASTR_grp %>% - select(Year:Group,BV.um3.per.mL) - -# Add zeros for taxa that weren't detected -temp <- pivot_wider(df_phyto_FASTR_grp_BV, - names_from = Group, - values_from = BV.um3.per.mL, - values_fill = 0) - -df_phyto_FASTR_grp_BV <- pivot_longer(temp, - cols = Cyanobacteria:last_col(), - names_to = "Group", - values_to = "BV.um3.per.mL") - - - - -``` - -## Calculate LCEFA Abundance in FASTR Data - -```{r LCEFA calcs} -## Calculate LCEFA composition based on method in Galloway & Winder (2015) -## doi: 10.1371/journal.pone.0130053 - -df_phyto_FASTR_grp <- df_phyto_FASTR_grp %>% - mutate(LCEFA.per.mL = case_when(Group == 'Diatoms' ~ BM.ug.per.mL*2.77/100, - Group == 'Cyanobacteria' ~ BM.ug.per.mL*0.02/100, - Group == 'Green Algae' ~ BM.ug.per.mL*0.52/100, - Group == 'Cryptophytes' ~ BM.ug.per.mL*2.13/100, - Group == 'Dinoflagellates' ~ BM.ug.per.mL*3.82/100, - TRUE ~ 0)) - - -``` - -## Create Plots for Paper -```{r Total Biovolume Plots} - -# Set plot output directory -plots <- here("phyto-final","plots") - -# Plot Upstream and Downstream Total BV by ActionPhase for each year -fig1 <- ggplot(data = df_phyto_FASTR_sum, - aes(y = log10(Total.BV.per.mL), - x = Region, - fill = ActionPhase)) + - geom_boxplot() - -fig1 + - labs(title = "Total Phytoplankton Abundance by Year", - y = bquote(Log[10]~'Biovolume Density'~(um^3~mL^-1)), - x = "Sampling Region", - fill = "Pulse Period") + - facet_wrap(Year ~ ., ncol = 2) + - scale_fill_manual(values = c("#4DAF4A", - "#984EA3", - "#A3D0D4"), - labels=c("Before", "During", "After")) - -ggsave(path = plots, - filename = "fig1_log_phyto_biovolume_by_year_and_AP.png", - device = "png", - scale=1.0, - units="in", - height=4, - width=6, - dpi="print") - -# Plot Total BV by Flow Pulse Type at Each Station - -fig2 <- ggplot(data = df_phyto_FASTR_sum, - aes(y= log10(Total.BV.per.mL), - x = FlowPulseCategory, - color = ActionPhase)) + - geom_boxplot() - -fig2 + - labs(title = "Total Phytoplankton Abundance by Flow Pulse Type", - y = bquote(Log[10]~'Biovolume Density'~(um^3~mL^-1)), - x = "Flow Pulse Category") + - facet_wrap(StationCode ~ ., ncol = 2) - -ggsave(path = plots, - filename = "fig2_log_phyto_biovolume_by_site_and_pulse_type.png", - device = "png", - scale=1.0, - units="in", - height=8, - width=6.5, - dpi="print") - -# Stacked Bar Charts for Biovolume --------------------------------------------- - -# Upstream/Downstream -fig4 <- ggplot(df_phyto_FASTR_grp_BV, aes(x = ActionPhase, - y = BV.um3.per.mL, - fill = Group)) + - geom_bar(position = "stack", - width = 0.6, - stat = "summary", - fun = "mean") + - theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0)) - -fig4 + - labs(x = NULL, - y = bquote('Average Biovolume Density'~(um^3~mL^-1)), - title = paste0("Abundance of Phytoplankton Groups During Flow Pulses")) + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - scale_fill_manual(values = c("#E41A1C", - "#377EB8", - "#4DAF4A", - "#984EA3", - "#FF7F00", - "#FFFF33", - "#A65628", - "#F781BF")) + - theme(axis.text.x = element_text(angle = 45, vjust = 0.7)) + - facet_grid(Region~Year) # dir = v makes order of station go "north to south" - -ggsave(path = plots, - filename = paste0("fig4_phyto_group_biovolume_by_year.png"), - device = "png", - scale=1.0, - units="in", - height=5, - width=7.5, - dpi="print") - -``` - -## Plot Yearly Abundance of Phytoplankton - -```{r plot yearly abundance} - -## Create biovolume plots for each year -years <- unique(df_phyto_FASTR_grp_BV$Year) -years <- sort(years, decreasing = F, na.last = T) - -for (year in years) { - df_temp <- df_phyto_FASTR_grp_BV %>% - filter(Year == year) - - biovolume.plot <- ggplot(df_phyto_FASTR_grp_BV, - aes(x = ActionPhase, - y = BV.um3.per.mL, - fill = Group)) + - geom_bar(data = subset(df_phyto_FASTR_grp_BV, Year == year), - position = "stack", - width = 0.6, - stat = "summary", - fun = "mean") + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - labs(x = "Pulse Period", - y = bquote('Average Biovolume Density'~(um^3~mL^-1)), - title = paste0("Abundance of Phytoplankton Groups During Flow Pulses - ",year)) - - biovolume.plot + - scale_fill_manual(values = c("#E41A1C", - "#377EB8", - "#4DAF4A", - "#984EA3", - "#FF7F00", - "#FFFF33", - "#A65628", - "#F781BF")) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.7)) + - facet_wrap(StationCode ~ ., ncol = 5, dir = "h") - - ggsave(path = plots, - filename = paste0("phyto_avg_BV_by_station_",year,".png"), - device = "png", - scale=1.0, - units="in", - height=3.5, - width=7, - dpi="print") - - rm(df_temp) - -} - -``` - -## Multivariate Calculations - -```{r multivariate calcs} - -## Calculate NMDS axes -## Create Biovolume-only data frame at genus level -df_phyto_FASTR_gen_BV <- df_phyto_FASTR_gen %>% select(Year:Genus,BV.um3.per.mL) - -## Generate NMDS data with metaMDS by each year separately - -#df_phyto_FASTR_gen_BV$Year <- as.integer(df_phyto_FASTR_gen_BV$Year) - -years <- unique(df_phyto_FASTR_gen_BV$Year) -years <- sort(years, decreasing = F, na.last = T) - -## Create blank data frame to fill stresses in -stresses <- data.frame(Year = years, stress = NA) -ls_dfs <- list() - -for (i in 1:length(years)) { - genw <- pivot_wider(df_phyto_FASTR_gen_BV, - names_from = "Genus", - values_from = "BV.um3.per.mL", - values_fill = 0) - - genw <- genw %>% filter(Year == years[i]) - - #look at number of observations per station - #table(genw$StationCode) - - # Calculate the nMDS using vegan - # A good rule of thumb: stress < 0.05 provides an excellent representation in reduced dimensions, - # < 0.1 is great, < 0.2 is good/ok, and stress < 0.3 provides a poor representation. - phyto.NMDS <- metaMDS( - comm = genw[c(7:139)], - distance = "bray", - k = 3, - trymax = 500 - #trace = F, - #autotransform = F - ) - - stresses$stress[which(stresses$Year == years[i])] <- phyto.NMDS$stress - - #look at Shepard plot which shows scatter around the regression between the interpoint distances - #in the final configuration (i.e., the distances between each pair of communities) against their - #original dissimilarities. - stressplot(phyto.NMDS) - - # Using the scores function from vegan to extract the site scores and convert to a data.frame - data.scores <- as_tibble(scores(phyto.NMDS, display = "sites")) - - # Combine metadata with NMDS data scores to plot in ggplot - meta <- genw %>% select(1:6) - meta <- cbind(meta, data.scores) - - # Read in years as a character otherwise it shows up as a number and gets displayed as a gradient - meta$Year <- as.character(meta$Year) - - ls_dfs[[i]] <- meta - -} - -df_phyto_NMDS <- do.call(rbind, ls_dfs) - -# Plot NMDS Results - -# Create a ggplot2 theme for the NMDS plots -theme_update( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text = element_blank(), - axis.ticks = element_blank(), - plot.background = element_rect(fill = "white", colour = NA), - panel.background = element_rect(fill = "white", colour = NA), - panel.border = element_rect(fill = NA, colour = "black"), - strip.background = element_rect(fill = "gray", colour = "black"), - legend.position = "bottom", - legend.key = element_rect(fill = "white", colour = NA) - ) - -fig.NMDS.Reg <- ggplot(df_phyto_NMDS, - aes(x = NMDS1, y = NMDS2, color = Region)) + - geom_point(size = 3) + - stat_ellipse() + - labs(color = "Region", - x = NULL, - y = NULL) - -fig.NMDS.Reg + - facet_wrap(Year ~ ., ncol = 3, dir = "h") + - scale_color_brewer(palette = "Set1") - -ggsave(path = plots, - filename = paste0("phyto_NMDS_biovolume_by_reg.png"), - device = "png", - scale=1.0, - units="in", - height=4.5, - width=5.5, - dpi="print") - -# Create plot colored by ActionPhase -fig.NMDS.AP <- ggplot(df_phyto_NMDS, - aes(x = NMDS1, y = NMDS2, color = ActionPhase)) + - geom_point(size = 3) + - stat_ellipse() + - labs(color = "Flow Pulse Period", - x = NULL, - y = NULL) - -fig.NMDS.AP + - facet_wrap(Year ~ ., ncol = 3, dir = "h") + - scale_color_brewer(palette = "Set1") - -ggsave(path = plots, - filename = paste0("phyto_NMDS_biovolume_by_AP.png"), - device = "png", - scale=1.0, - units="in", - height=4.5, - width=5.5, - dpi="print") - -``` - -## Calculate ANOVAs - -```{r anovas} - -suppressWarnings(suppressMessages(library(ggpubr))) -suppressWarnings(suppressMessages(library(car))) -suppressWarnings(suppressMessages(library(rstatix))) -suppressWarnings(suppressMessages(library(visreg))) -suppressWarnings(suppressMessages(library(emmeans))) - -## Change Year category to factor -df_phyto_FASTR_sum$Year <- as.factor(df_phyto_FASTR_sum$Year) -df_phyto_FASTR_sum$FlowPulseType <- as.factor(df_phyto_FASTR_sum$FlowPulseType) - -## Create QQ Plot to check for normality -ggqqplot(log10(df_phyto_FASTR_sum$Total.BV.per.mL)) - -## View histogram to check for normality -hist(log10(df_phyto_FASTR_sum$Total.BV.per.mL)) - -## Run Shapiro-Wilks test to check whether data is normally distributed -shapiro.test(log10(df_phyto_FASTR_sum$Total.BV.per.mL)) - -## Run Levene test to compare variance -df_phyto_FASTR_sum %>% levene_test(log10(Total.BV.per.mL)~FlowPulseType) # p = 0.901 -df_phyto_FASTR_sum %>% levene_test(log10(Total.BV.per.mL)~Year) # p = 0.980 -df_phyto_FASTR_sum %>% levene_test(log10(Total.BV.per.mL)~Region) # p = 0.0230 - -## Run 3-way ANOVA -phyto.sum.aov <- df_phyto_FASTR_sum %>% anova_test(log10(Total.BV.per.mL) ~ Region*Year*ActionPhase) -phyto.sum.aov - -## Rosie Code -L1 <- lm(log10(Total.BV.per.mL)~ Region*Year + Region*ActionPhase + ActionPhase*Year, - data = df_phyto_FASTR_sum) - -summary(L1) - -Anova(L1, type = 2) - -visreg(L1) - -visreg.AP.Region <- visreg(L1, xvar = "ActionPhase", by = "Year", gg = T) - -visreg.AP.Region + - facet_wrap(Year ~ ., ncol = 3, dir = "h") - -ggsave(path = plots, - filename = "visreg.AP.Region.png", - device = "png", - scale=1.0, - units="in", - height=3.5, - width=7, - dpi="print") - -visreg(L1, xvar = "Region", by = "Year") -visreg(L1, xvar = "Region", by = "ActionPhase") -visreg(L1, xvar = "ActionPhase", by = "Region") - -emmeans(L1, pairwise ~ Region:ActionPhase) -emmeans(L1, pairwise ~ ActionPhase) -emmeans(L1, pairwise ~ Year:ActionPhase) - -detach("package:ggpubr", unload=TRUE) -detach("package:car", unload=TRUE) -detach("package:rstatix", unload=TRUE) -detach("package:visreg", unload=TRUE) -detach("package:emmeans", unload=TRUE) - -``` - -## PERMANOVA Comparisons - -```{r PERMANOVA} - -# Calculate PERMANOVA comparisons for phytoplankton communities by Region and Year -genw <- pivot_wider(df_phyto_FASTR_gen_BV, - names_from = "Genus", - values_from = "BV.um3.per.mL", - values_fill = 0) - -adon.results <- adonis2(genw[c(7:139)] ~ genw$Region + genw$Year, - strata = genw$StationCode, - method = "bray", - perm = 999) - -adon.results <- adonis2(genw[c(7:139)] ~ genw$Region*genw$ActionPhase + genw$Year*genw$ActionPhase + genw$Region*genw$Year, - strata = genw$StationCode, - method = "bray", - perm = 999) - -dm_phyto_gen <- vegdist(genw[c(7:139)], method = "bray") - -bd <- betadisper(dm_phyto_gen, genw$Region) - -anova(bd) -permutest(bd) - -``` - diff --git a/phyto-final/phyto.ANOVA.FASTR.R b/phyto-final/phyto.ANOVA.FASTR.R deleted file mode 100644 index 3115f7c..0000000 --- a/phyto-final/phyto.ANOVA.FASTR.R +++ /dev/null @@ -1,74 +0,0 @@ -## Calculate ANOVAs for FASTR -## - -library("tidyverse");packageVersion("tidyverse") -library("ggpubr");packageVersion("ggpubr") -library("rstatix");packageVersion("rstatix") -library("car");packageVersion("car") -library("visreg");packageVersion("visreg") -library("emmeans");packageVersion("emmeans") - -rm(list=ls()) #cleans workspace -theme_set(theme_bw()) - -# Set working directory -setwd("./FASTR.phyto.final/") -getwd() - -## Load biovolume density data at group and genus level -load("RData/phyto.gen.RData") -load("RData/phyto.sum.RData") - -## Change Year category to factor -phyto.sum$Year <- as.factor(phyto.sum$Year) -phyto.sum$FlowPulseType <- as.factor(phyto.sum$FlowPulseType) - -## Create QQ Plot to check for normality -ggqqplot(log10(phyto.sum$Total.BV.per.L)) - -## View histogram to check for normality -hist(log10(phyto.sum$Total.BV.per.L)) - -## Run Shapiro-Wilks test to check whether data is normally distributed -shapiro.test(log10(phyto.sum$Total.BV.per.L)) - -## Run Levene test to compare variance -phyto.sum %>% levene_test(log10(Total.BV.per.L)~FlowPulseType) # p = 0.901 -phyto.sum %>% levene_test(log10(Total.BV.per.L)~Year) # p = 0.980 -phyto.sum %>% levene_test(log10(Total.BV.per.L)~Region) # p = 0.0230 - -## Run 3-way ANOVA -phyto.sum.aov <- phyto.sum %>% anova_test(log10(Total.BV.per.L) ~ Region*Year*ActionPhase) -phyto.sum.aov - -## Rosie Code -L1 <- lm(log10(Total.BV.per.L)~ Region*Year + Region*ActionPhase + ActionPhase*Year, data = phyto.sum) - -summary(L1) - -Anova(L1, type = 2) - -visreg(L1) - -visreg.AP.Region <- visreg(L1, xvar = "ActionPhase", by = "Year", gg = T) - -visreg.AP.Region + - facet_wrap(Year ~ ., ncol = 3, dir = "h") - -ggsave(path="plots", - filename = "visreg.AP.Region.png", - device = "png", - scale=1.0, - units="in", - height=3.5, - width=7, - dpi="print") - -visreg(L1, xvar = "Region", by = "Year") -visreg(L1, xvar = "Region", by = "ActionPhase") -visreg(L1, xvar = "ActionPhase", by = "Region") - - -emmeans(L1, pairwise ~ Region:ActionPhase) -emmeans(L1, pairwise ~ ActionPhase) -emmeans(L1, pairwise ~ Year:ActionPhase) \ No newline at end of file diff --git a/phyto-final/phyto.data.processing.EMP.R b/phyto-final/phyto.data.processing.EMP.R deleted file mode 100644 index 28c8c11..0000000 --- a/phyto-final/phyto.data.processing.EMP.R +++ /dev/null @@ -1,222 +0,0 @@ -## Extracting FASTR data from -## AEU's Yolo Bypass Phyto datasheets -## 2/3/2022 - -library("tidyverse");packageVersion("tidyverse") -library("lubridate");packageVersion("lubridate") -library("janitor");packageVersion("janitor") - -# Set working directory -setwd("./phyto_code/") -getwd() - -# Set visual theme in ggplot -theme_set(theme_bw()) - -# Clean workspace -rm(list=ls()) - -# Import EMP data files -phyto_files <- dir(path = "data/EMP/csv", pattern = "\\.csv", full.names = T) - -phyto_all <- map_dfr(phyto_files, ~read_csv(.x)) - -## Read in files with non-standard headers individually -Dec2021 <- read_csv("data/EMP/oddballs/December 2021.csv") -Nov2021 <- read_csv("data/EMP/oddballs/November 2021.csv") -Sep2013 <- read_csv("data/EMP/oddballs/September 2013.csv") -Nov2013 <- read_csv("data/EMP/oddballs/November 2013.csv") - -## Combine like oddball dfs -phyto2013 <- bind_rows(Sep2013, Nov2013) -phyto2021 <- bind_rows(Dec2021, Nov2021) - -## Remove individual dfs -rm(Dec2021) -rm(Nov2021) -rm(Nov2013) -rm(Sep2013) - -## Rename headers to match standard BSA headers -## Oddballs actually have the "correct" name of Total Cells -## rather than the incorrect "Number of cells per unit" - -phyto2013 <- phyto2013 %>% - rename("Number of cells per unit" = "Total Cells Counted") - -phyto2021 <- phyto2021 %>% - rename("Number of cells per unit" = "Total Number of Cells") %>% - rename("Unit Abundance" = "Unit Abundance (# of Natural Units)") - -## Remove GALD measurements -phyto_all$GALD <- NULL -phyto_all$`GALD 1` <- NULL -phyto2021$`GALD 1` <- NULL -phyto2013$GALD <- NULL - -## Combine oddball files with others -phyto_all <- bind_rows(phyto_all, phyto2013) -phyto_all <- bind_rows(phyto_all, phyto2021) - -# Clean up column names -phyto <- phyto_all %>% clean_names(case = "big_camel") - -## Remove pre-calculated Unit Density and blank columns -phyto <- phyto %>% select(MethodCode:Biovolume10) - -## Remove empty rows -phyto <- phyto %>% filter_all(any_vars(!is.na(.))) - -## Average all 10 biovolume measurements for each taxon -phyto <- phyto %>% rowwise() %>% mutate(BV.Avg = mean(c_across(Biovolume1:Biovolume10), na.rm = T)) - -# Remove Individual Biovolume Columns -phyto <- phyto %>% select(!(Biovolume1:Biovolume10)) - -# Remove unneeded columns -phyto <- phyto %>% select(!c("MethodCode","BsaTin","DiatomSoftBody","Synonym")) -phyto <- phyto %>% select(!(ColonyFilamentIndividualGroupCode:Shape)) -phyto <- phyto %>% select(!(VolumeReceivedML:NumberOfFieldsCounted)) - -## Fix samples with missing times -## Just replace with 12:00:00 b/c aren't doing any time-based analyses -phyto <- phyto %>% replace_na(list(SampleTime = "12:00:00")) - -## Get dates in the right format -## Some are 1/1/14 and others 1/1/2014 -phyto$SampleDate <- mdy(phyto$SampleDate) - -## Combine date and time column -phyto <- phyto %>% unite(DateTime, c("SampleDate","SampleTime"), sep = " ") #, remove = FALSE, na.rm = FALSE) - -phyto$DateTime <- as_datetime(phyto$DateTime, - tz = "US/Pacific", - format = c("%Y-%m-%d %H:%M:%OS")) - -## Check for missing dates -phyto %>% filter(is.na(DateTime)) ## No missing dates - -## Correct BSA header -phyto <- phyto %>% rename("TotalCells" = "NumberOfCellsPerUnit") - -## Calculate Unit Density & Biovolume Density -phyto <- phyto %>% - mutate(Units.per.mL = UnitAbundance * Factor) %>% - mutate(BV.um3.per.mL= TotalCells * BV.Avg * Factor) - -## Add column for year and month for highlighting data -phyto <- phyto %>% mutate(Year = year(phyto$DateTime)) - -phyto <- phyto %>% mutate(Month = month(phyto$DateTime, label = T)) - -phyto$Year <- as.factor(phyto$Year) - -## Remove columns no longer needed -phyto <- phyto %>% - select(!(Species:BV.Avg)) %>% - select(!(Factor)) - -## List of stations - -#list(unique(phyto$StationCode)) - -## Fix site names -phyto$StationCode <- gsub("EZ6 SAC","EZ6",phyto$StationCode) -phyto$StationCode <- gsub("EZ6SAC","EZ6",phyto$StationCode) -phyto$StationCode <- gsub("EZ6SJR","EZ6-SJR",phyto$StationCode) -phyto$StationCode <- gsub("EZ2SAC","EZ2",phyto$StationCode) -phyto$StationCode <- gsub("EZ2 SAC","EZ2",phyto$StationCode) -phyto$StationCode <- gsub("EZ2SJR","EZ2-SJR",phyto$StationCode) -phyto$StationCode <- gsub("EZ2 SJR","EZ2-SJR",phyto$StationCode) -phyto$StationCode <- gsub("EZ6 SJR","EZ6-SJR",phyto$StationCode) -phyto$StationCode <- gsub("D16-Twitchell","D16",phyto$StationCode) -phyto$StationCode <- gsub("D16-Twitchel","D16",phyto$StationCode) -phyto$StationCode <- gsub("D16 - Twitchell","D16",phyto$StationCode) -phyto$StationCode <- gsub("D16 Twitchell","D16",phyto$StationCode) -phyto$StationCode <- gsub("NZ328","NZ325",phyto$StationCode) ## Typo in August 2019 -phyto$StationCode <- gsub("C3A-HOOD","C3A",phyto$StationCode) -phyto$StationCode <- gsub("C3A Hood","C3A",phyto$StationCode) -phyto$StationCode <- gsub("C3A- Hood","C3A",phyto$StationCode) -phyto$StationCode <- gsub("C3A-Hood","C3A",phyto$StationCode) -phyto$StationCode <- gsub("NZ542","NZS42",phyto$StationCode) -phyto$StationCode <- gsub("E26","EZ6",phyto$StationCode) -phyto$StationCode <- gsub("E22","EZ2",phyto$StationCode) # Typo in May 2018 - -## Remove extra Microcystis tows at D19 -phyto <- phyto %>% filter(StationCode != "D19 MC Tow") - -## Print out all station IDs to flag which ones to remove -#all.stations <- sort(unique(phyto$StationCode)) -#write(all.stations, file = "station_names.txt") - -## Confirm station IDs -unique(phyto$StationCode) -table(phyto$StationCode) - -## In Fall 2016, taxonomists began classifying the species -## Chroococcus microscopicus as Eucapsis microscopica. This is one of the most -## dominant species in this samples, so all taxa previously classified as -## C. microscopicus will be re-named E. microscopica - -phyto <- phyto %>% - mutate(Taxon = case_when(Taxon == 'Chroococcus microscopicus' ~ 'Eucapsis microscopica', - TRUE ~ Taxon)) %>% - mutate(Genus = case_when(Taxon == 'Eucapsis microscopica' ~ 'Eucapsis', - TRUE ~ Genus)) - -## The taxon Plagioselmis lacustris is inconsistently named, appearing sometimes as -## Rhodomonas lacustris. Change to Rhodomonas lacustris to avoid confusion. - -phyto <- phyto %>% - mutate(Taxon = case_when(Taxon == 'Plagioselmis lacustris' ~ 'Rhodomonas lacustris', - TRUE ~ Taxon)) %>% - mutate(Genus = case_when(Taxon == 'Rhodomonas lacustris' ~ 'Rhodomonas', - TRUE ~ Genus)) - -## Correct the genus label for a Chlorella entry -phyto$Genus <- gsub("cf Chlorella","Chlorella",phyto$Genus) - -sort(unique(phyto$Genus)) ## 238 unique genera - -## Add column for year and month for highlighting data -phyto <- phyto %>% mutate(Year = year(phyto$DateTime)) - -phyto <- phyto %>% mutate(Month = month(phyto$DateTime, label = T)) - -## Order month in calendar order rather than (default) alphabetical -phyto$Month = factor(phyto$Month, levels = month.abb) - -## Units for Density (unit, biovolume) are in per mL, will convert to per L -phyto <- phyto %>% mutate(across(Units.per.mL:BV.um3.per.mL, ~ .x * 1000,.keep = "unused")) - -## Rename headers b/c units are now in L -phyto <- phyto %>% - rename("Units.per.L" = "Units.per.mL") %>% - rename("BV.um3.per.L" = "BV.um3.per.mL") - -## Reorder columns -phyto <- phyto %>% - #relocate(Group, .after = Genus) %>% - relocate(Year, .after = DateTime) %>% - relocate(Month, .after = DateTime) - -## Summarize by genus -phyto_gen_EMP <- phyto %>% - group_by(Year, Month, DateTime, StationCode, Genus) %>% - summarize(across(Units.per.L:BV.um3.per.L, ~sum(.x, na.rm = TRUE))) %>% - ungroup - -## Read in Region data -regions <- read_csv("CSVs/station_regions_EMP.csv") - -# Combine region data and phyto data -phyto_gen_EMP <- left_join(phyto_gen_EMP, regions) - -# Reorder column -phyto_gen_EMP <- phyto_gen_EMP %>% relocate(Region, .after = StationCode) - -# check if there are any NAs in Region after the join -table(is.na(phyto_gen_EMP$Region)) # no NAs - -## Save data file -save(phyto_gen_EMP, file = "RData/phyto_gen_EMP.RData") diff --git a/phyto-final/phyto.data.processing.FASTR.R b/phyto-final/phyto.data.processing.FASTR.R deleted file mode 100644 index 175e969..0000000 --- a/phyto-final/phyto.data.processing.FASTR.R +++ /dev/null @@ -1,553 +0,0 @@ -## Extracting FASTR data from -## AEU's Yolo Bypass Phyto datasheets and formatting it to make graphs -## Converting biovolume data into biomass and LCEFA data -## 2/3/2022 - Current - -library("tidyverse");packageVersion("tidyverse") -library("lubridate");packageVersion("lubridate") -library("janitor");packageVersion("janitor") -library("vegan");packageVersion("vegan") -library("RColorBrewer");packageVersion("RColorBrewer") - -# Set working directory -setwd("./phyto_code/") -getwd() - -# Set visual theme in ggplot -theme_set(theme_bw()) - -# Clean workspace -rm(list=ls()) - -# Import AEU data files (comment out when finished) -#phyto_files <- dir(path = "data/csv", pattern = "\\.csv", full.names = T) -#phyto.all <- map_dfr(phyto_files, ~read_csv(.x)) - -## Save imported data files (takes a while) -#save(phyto.all, file = "RData/phyto_raw.RData") -load("RData/phyto_raw.RData") - -# Clean up column names -phyto <- phyto.all %>% clean_names(case = "big_camel") - -## Remove GALD measurements -phyto <- phyto %>% select(MethodCode:Biovolume10) - -## Remove empty rows -phyto <- phyto %>% filter_all(any_vars(!is.na(.))) - -## Remove weird row with only a single zero in biovolume -phyto <- phyto %>% drop_na(MethodCode) - -## Average all 10 biovolume measurements for each taxon -phyto <- phyto %>% rowwise() %>% mutate(BV.Avg = mean(c_across(Biovolume1:Biovolume10), na.rm = T)) - -# Remove Biovolume Columns -phyto <- phyto %>% select(!(Biovolume1:Biovolume10)) - -# Remove other unneeded columns -phyto <- phyto %>% select(!c("MethodCode","BsaTin","DiatomSoftBody","Synonym")) -phyto <- phyto %>% select(!(Gald:Shape)) -phyto <- phyto %>% select(!(VolumeReceivedML:NumberOfFieldsCounted)) - -## Fix samples with missing times -## Just replace with 12:00:00 b/c aren't doing any time-based analyses -phyto <- phyto %>% replace_na(list(SampleTime = "12:00:00")) - -## Get dates in the right format -## Some are 1/1/14 and others 1/1/2014 -phyto$SampleDate <- parse_date_time(phyto$SampleDate, c("%m/%d/%Y", "%m/%d/%y")) - -## Combine date and time column -phyto <- phyto %>% unite(DateTime, c("SampleDate","SampleTime"), sep = " ") - -phyto$DateTime <- as_datetime(phyto$DateTime, - tz = "US/Pacific", - format = c("%Y-%m-%d %H:%M:%OS")) - -## Check for missing dates -phyto %>% filter(is.na(DateTime)) ## No missing dates - -## Correct BSA header -phyto <- phyto %>% rename("TotalCells" = "NumberOfCellsPerUnit") - -## List of stations used for FASTR -stations <- c("RMB","RCS","RD22","I80","LIS","STTD","BL5","LIB","RYI","RVB") - -## Fix site names -phyto$StationCode <- gsub("SHER","SHR",phyto$StationCode) -phyto$StationCode <- gsub("BL-5","BL5",phyto$StationCode) - -## Print out all station IDs to flag which ones to remove -#all.stations <- sort(unique(phyto$StationCode)) -#write(all.stations, file = "station_names.txt") - -## Read in station names with flags and merge -keepers <- read_csv("CSVs/station_names_flagged.csv") -phyto <- left_join(phyto, keepers) - -## Remove data unrelated to project -phyto <- phyto %>% filter(Flag == "Keep") - -## Remove flag column -phyto <- phyto %>% select(!(Flag)) - -## Confirm station IDs -unique(phyto$StationCode) -table(phyto$StationCode) - -## Re-order stations from North to South -phyto$StationCode <- factor(as.character(phyto$StationCode), levels = stations) - -## Import Stations listed as Upstream and Downstream -region <- read_csv("CSVs/upstream_downstream_stations.csv") -region <- region %>% - rename("Region" = "UpDown") %>% - rename("StationCode" = "Site") - -region$Region <- factor(region$Region, levels = c("Upstream","Downstream")) - -## In Fall 2016, taxonomists began classifying the species -## Chroococcus microscopicus as Eucapsis microscopica. This is one of the most -## dominant species in this samples, so all taxa previously classified as -## C. microscopicus will be re-named E. microscopica - -phyto <- phyto %>% - mutate(Taxon = case_when(Taxon == 'Chroococcus microscopicus' ~ 'Eucapsis microscopica', - TRUE ~ Taxon)) %>% - mutate(Genus = case_when(Taxon == 'Eucapsis microscopica' ~ 'Eucapsis', - TRUE ~ Genus)) - -## The taxon Plagioselmis lacustris is inconsistently named, appearing sometimes as -## Rhodomonas lacustris. Change to Rhodomonas lacustris to avoid confusion. - -phyto <- phyto %>% - mutate(Taxon = case_when(Taxon == 'Plagioselmis lacustris' ~ 'Rhodomonas lacustris', - TRUE ~ Taxon)) %>% - mutate(Genus = case_when(Taxon == 'Rhodomonas lacustris' ~ 'Rhodomonas', - TRUE ~ Genus)) - -## Correct the genus label for a Chlorella entry -phyto$Genus <- gsub("cf Chlorella","Chlorella",phyto$Genus) - -sort(unique(phyto$Genus)) ## 153 unique genera - -## Add column for year and month for highlighting data -phyto <- phyto %>% mutate(Year = year(phyto$DateTime)) -phyto <- phyto %>% mutate(Month = month(phyto$DateTime, label = T)) - -## Order month in calendar order rather than (default) alphabetical -phyto$Month = factor(phyto$Month, levels = month.abb) - -## Add higher-level taxonomy names -taxa <- read_csv("CSVs/phyto_group_taxonomy.csv") -phyto <- left_join(phyto, taxa) - -## Check for NAs in group -unique(phyto$Group) ## no NAs - -## Look to see which ones are NAs -##phyto.NA <- phyto %>% filter(is.na(Group)) - -## Reorder columns -phyto <- phyto %>% - relocate(Group, .after = Genus) %>% - relocate(Year, .after = DateTime) %>% - relocate(Month, .after = DateTime) - -## Remove columns no longer needed -phyto <- phyto %>% select(!(Species)) - -## Convert biovolume to biomass using relationships from Menden-Deuer and Lussard -## (2000) doi: 10.4319/lo.2000.45.3.0569 -## Only relevant to group-level data -## Units of BV.Density are um^3 per L - -## Calculate Biomass (pg-C per cell) from Biovolume (um^3 per cell) -phyto <- phyto %>% - mutate(Biomass.pg.C = case_when(Group == "Diatoms" ~ 0.288 * (BV.Avg^0.811), - Group != "Diatoms" ~ 0.216 * (BV.Avg^0.939))) - -## Convert pg to ug (ug-C per L) -phyto <- phyto %>% mutate(Biomass.ug.C = Biomass.pg.C / 10^6, .keep = "unused") - -## Calculate Unit Density, Cell Density, and Biovolume Density -phyto <- phyto %>% - mutate(Units.per.mL = UnitAbundance * Factor) %>% - mutate(BV.um3.per.mL = TotalCells * BV.Avg * Factor) %>% - mutate(Cells.per.mL = TotalCells * Factor) - -## Calculate Biomass Density (pg-C per mL) -phyto <- phyto %>% - mutate(BM.ug.per.mL = Biomass.ug.C * Cells.per.mL) - -## Remove columns no longer needed -phyto <- phyto %>% select(DateTime:StationCode,Taxon:Group,Units.per.mL:BM.ug.per.mL) - -## Units for Density (unit, cell, biomass, and biovolume) are in per mL, will convert to per L because -## final units of biomass and LCEFA will be per liter. -phyto <- phyto %>% mutate(across(8:11, ~ .x * 1000,.keep = "unused")) - -## Rename headers b/c units are now in L -phyto <- phyto %>% rename("Units.per.L" = "Units.per.mL") -phyto <- phyto %>% rename("Cells.per.L" = "Cells.per.mL") -phyto <- phyto %>% rename("BV.um3.per.L" = "BV.um3.per.mL") -phyto <- phyto %>% rename("BM.ug.per.L" = "BM.ug.per.mL") - -################################################################################ -############## Analyze Biovolume Data for Outliers ############################# -################################################################################ - -## Subset to biovolume data only -phyto.BV <- phyto %>% select(DateTime:Group,BV.um3.per.L) - -## Identify outliers to remove -boxplot(phyto.BV$BV.um3.per.L) - -# Identify 0.1% and 99.9% percentiles of the data -quartiles <- quantile(phyto.BV$BV.um3.per.L, probs=c(0.001, 0.999), na.rm = TRUE) - -# List upper cutoff (99.9%) -cutoff <- quartiles[2] - -# Filter dataset to show all taxa above this 99.9% cutoff -outliers <- phyto.BV %>% filter(BV.um3.per.L > cutoff) - -list(outliers) ## 4 of top 6 are Spirogyra. -phyto.BV %>% filter(Genus == "Spirogyra")# Only 5 total samples w/ Spirogyra - -# Remove Spirogyra from datasets -# 4/5 are above 99.9% cutoff, 5 is also very very abundant -# These are benthic algae that clump, more likely to be "bullseye" samples -# that got a big blob or clumb -phyto <- phyto %>% filter(Genus != "Spirogyra") - -## Remove 2013 data (very limited) -phyto <- phyto %>% filter(Year != 2013) - -rm(phyto.BV) - -## Summarize by algal group -phyto.grp <- phyto %>% - group_by(Year, Month, DateTime, StationCode, Group) %>% - summarize(across(Units.per.L:BM.ug.per.L, ~sum(.x, na.rm = TRUE))) %>% - ungroup - -## Summarize by genus -phyto.gen <- phyto %>% - group_by(Year, Month, DateTime, StationCode, Genus) %>% - summarize(across(Units.per.L:BM.ug.per.L, ~sum(.x, na.rm = TRUE))) %>% - ungroup - -## Calculate LCEFA composition based on method in Galloway & Winder (2015) -## doi: 10.1371/journal.pone.0130053 - -phyto.grp <- phyto.grp %>% - mutate(LCEFA.per.L = case_when(Group == 'Diatoms' ~ BM.ug.per.L*2.77/100, - Group == 'Cyanobacteria' ~ BM.ug.per.L*0.02/100, - Group == 'Green Algae' ~ BM.ug.per.L*0.52/100, - Group == 'Cryptophytes' ~ BM.ug.per.L*2.13/100, - Group == 'Dinoflagellates' ~ BM.ug.per.L*3.82/100, - TRUE ~ 0)) - -##### Cat Pien's Code for Merging Flow Designations ##### -# -# FlowDesignation <- read_csv("FlowDatesDesignations.csv") -# Update with 45 days limit on either end -### Update 5/27/22 TF added >= to two of the mutate commands to avoid removing -### samples falling on the PreFlowEnd and PostFlowStart dates. - -## Upload flow dates and categories -FlowDesignation <- read_csv("CSVs/FlowDatesDesignations_45days.csv") - -# Format dates as dates -FlowDesignation$PreFlowStart <- mdy(FlowDesignation$PreFlowStart) -FlowDesignation$PreFlowEnd <- mdy(FlowDesignation$PreFlowEnd) -FlowDesignation$PostFlowStart <- mdy(FlowDesignation$PostFlowStart) -FlowDesignation$PostFlowEnd <- mdy(FlowDesignation$PostFlowEnd) - -# Remove column with net flow days -FlowDesignation <- FlowDesignation %>% select(!(NetFlowDays)) - -# Save file for use in making plots -save(FlowDesignation, file = "RData/FlowDesignation.RData") - -# Merge data from FlowDesignation Table (Water Year Type, Flow days and type) -# Filter only Pre-During-Post Flow Action Data. -phyto.grp <- inner_join(phyto.grp,FlowDesignation, by = "Year") -phyto.grp <- phyto.grp %>% - mutate(ActionPhase = ifelse(DateTime > PreFlowStart & DateTime < PreFlowEnd, "Before", NA)) %>% - mutate(ActionPhase = replace(ActionPhase, DateTime >= PreFlowEnd & DateTime < PostFlowStart, "During")) %>% ## added >= to avoid removing samples that fall on PreFlowEnd date - mutate(ActionPhase = replace(ActionPhase, DateTime >= PostFlowStart & DateTime < PostFlowEnd, "After")) %>% ## added >= to avoid removing samples that fall on PostFlowStart date - filter(!is.na(ActionPhase)) %>% - select(-c(PreFlowStart:PostFlowEnd)) - -phyto.gen <- inner_join(phyto.gen,FlowDesignation, by = "Year") -phyto.gen <- phyto.gen %>% - mutate(ActionPhase = ifelse(DateTime > PreFlowStart & DateTime < PreFlowEnd, "Before", NA)) %>% - mutate(ActionPhase = replace(ActionPhase, DateTime >= PreFlowEnd & DateTime < PostFlowStart, "During")) %>% ## added >= to avoid removing samples that fall on PreFlowEnd date - mutate(ActionPhase = replace(ActionPhase, DateTime >= PostFlowStart & DateTime < PostFlowEnd, "After")) %>% ## added >= to avoid removing samples that fall on PostFlowStart date - filter(!is.na(ActionPhase)) %>% - select(-c(PreFlowStart:PostFlowEnd)) - -# Order the new ActionPhase so that it plots in the order Pre < During < Post -phase.order <- c("Before","During","After") -phyto.grp$ActionPhase <- factor(as.character(phyto.grp$ActionPhase), levels = phase.order) -phyto.gen$ActionPhase <- factor(as.character(phyto.gen$ActionPhase), levels = phase.order) - -## Set group display order -group.order <- c("Diatoms","Cyanobacteria","Green Algae","Cryptophytes","Ciliates","Dinoflagellates","Golden Algae","Other") -phyto.grp$Group <- factor(as.character(phyto.grp$Group), levels = group.order) - -## Add Region to data frames -phyto.grp <- left_join(phyto.grp, region) -phyto.gen <- left_join(phyto.gen, region) - -phyto.grp <- phyto.grp %>% relocate(Region, .after = StationCode) -phyto.gen <- phyto.gen %>% relocate(Region, .after = StationCode) - -## Add in Flow Pulse Category Data -FlowPulseCategory <- read_csv("CSVs/FlowPulseType.csv") -FlowPulseCategory <- FlowPulseCategory %>% rename("FlowPulseCategory" = "FlowPulseType") - -## Add Flow Pulse Category to data frames to be exported -phyto.grp <- left_join(phyto.grp, FlowPulseCategory) -phyto.gen <- left_join(phyto.gen, FlowPulseCategory) - -## Relocate metadata columns -phyto.grp <- phyto.grp %>% - relocate(FlowPulseCategory, .before = ActionPhase) %>% - relocate(WYType:ActionPhase, .before = Units.per.L) - -phyto.gen <- phyto.gen %>% - relocate(FlowPulseCategory, .before = ActionPhase) %>% - relocate(WYType:ActionPhase, .before = Units.per.L) - -## Set order for stations to be displayed -phyto.grp$StationCode <- factor(as.character(phyto.grp$StationCode), levels = stations) -phyto.gen$StationCode <- factor(as.character(phyto.gen$StationCode), levels = stations) - -## Separate out data frames by density type and add zeros -# Biovolume Density + Group -phyto.grp.BV <- phyto.grp %>% select(Year:ActionPhase,BV.um3.per.L) - -# Add zeros for taxa that weren't detected -temp <- pivot_wider(phyto.grp.BV, - names_from = Group, - values_from = BV.um3.per.L, - values_fill = 0) - -phyto.grp.BV <- pivot_longer(temp, - cols = Cyanobacteria:last_col(), - names_to = "Group", - values_to = "BV.um3.per.L") - -# Biomass Density + Group -phyto.grp.BM <- phyto.grp %>% select(Year:ActionPhase,BM.ug.per.L) - -# Add zeros for taxa that weren't detected -temp <- pivot_wider(phyto.grp.BM, - names_from = Group, - values_from = BM.ug.per.L, - values_fill = 0) - -phyto.grp.BM <- pivot_longer(temp, - cols = Cyanobacteria:last_col(), - names_to = "Group", - values_to = "BM.ug.per.L") - -# LCEFA Density + Group -phyto.grp.LCEFA <- phyto.grp %>% select(Year:ActionPhase,LCEFA.per.L) - -# Add zeros for taxa that weren't detected -temp <- pivot_wider(phyto.grp.LCEFA, - names_from = Group, - values_from = LCEFA.per.L, - values_fill = 0) - -#Remove taxa that aren't used for LCEFA calculations -temp <- temp %>% select(!c("Ciliates","Other","Golden Algae")) - -phyto.grp.LCEFA <- pivot_longer(temp, - cols = Cyanobacteria:last_col(), - names_to = "Group", - values_to = "LCEFA.per.L") - -rm(temp) - -# Biovolume Density + Genus -#phyto.gen.BV <- phyto.gen %>% select(1:10,BV.um3.per.L) -# -# Add zeros for taxa that weren't detected -# temp <- pivot_wider(phyto.gen.BV, -# names_from = Genus, -# values_from = BV.um3.per.L, -# values_fill = 0) -# -# phyto.gen.BV <- pivot_longer(temp, -# cols = 10:last_col(), -# names_to = "Genus", -# values_to = "BV.um3.per.L") -# - - -## Summarize totals -phyto.sum <- phyto.grp %>% - group_by(Year, Month, DateTime, StationCode, Region, WYType, FlowPulseType, FlowPulseCategory, ActionPhase) %>% - summarize(across(Units.per.L:LCEFA.per.L, ~sum(.x, na.rm = TRUE))) %>% - rename("Total.Units.per.L" = "Units.per.L") %>% - rename("Total.Cells.per.L" = "Cells.per.L") %>% - rename("Total.BV.per.L" = "BV.um3.per.L") %>% - rename("Total.BM.per.L" = "BM.ug.per.L") %>% - rename("Total.LCEFA.per.L" = "LCEFA.per.L") %>% - ungroup - -## Add zeros to genus data frame -temp <- phyto.gen %>% select(Year:ActionPhase,BV.um3.per.L) - -temp <- pivot_wider(temp, - names_from = "Genus", - values_from = "BV.um3.per.L", - values_fill = 0) - -phyto.grp.gen.BV <- pivot_longer(temp, - cols = Chlorella:last_col(), - names_to = "Genus", - values_to = "BV.um3.per.L") - -## Re-add group data to genus table -phyto.grp.gen.BV <- left_join(phyto.grp.gen.BV, taxa) - -## Rearrange location of Group column -phyto.grp.gen.BV <- phyto.grp.gen.BV %>% relocate(Group, .before = Genus) - -## Calculate relative abundance of biovolume by group -phyto.grp.BV.RA <- phyto.grp.BV %>% - group_by(Year, Region, ActionPhase, Group) %>% - summarize(Mean.BV.per.L = mean(BV.um3.per.L)) %>% - ungroup() - -phyto.grp.BV.RA <- phyto.grp.BV.RA %>% - group_by(Year, Region, ActionPhase) %>% - mutate(MeanRelAbund = Mean.BV.per.L/sum(Mean.BV.per.L)) %>% - ungroup - -## Calculate relative abundance of each genus within a group -phyto.grp.gen.BV.RA <- phyto.grp.gen.BV %>% - group_by(Group, Genus) %>% - summarize(Mean.BV.per.L = mean(BV.um3.per.L)) %>% - mutate(MeanRelAbund = Mean.BV.per.L/sum(Mean.BV.per.L)) %>% - ungroup() - -# Highlight most abundant genera -phyto.grp.gen.BV.RA <- phyto.grp.gen.BV.RA %>% - mutate(Type = case_when(MeanRelAbund > 0.05 ~ Genus, - TRUE ~ 'Other')) - -# lump together all "other" taxa -phyto.grp.gen.BV.RA.tot <- phyto.grp.gen.BV.RA %>% - group_by(Group, Type) %>% - summarize(MeanRelAbund = sum(MeanRelAbund)) %>% - ungroup() - -## Create cross-walk table for what taxa are lumped into the Other category -phyto.types <- phyto.grp.gen.BV.RA %>% select(Genus, Type) - -## Calculate NMDS axes -## Create Biovolume-only data frame at genus level -phyto.gen.BV <- phyto.gen %>% select(Year:ActionPhase,BV.um3.per.L) - -## Generate NMDS data with metaMDS by each year separately - -years <- unique(phyto.gen.BV$Year) -years <- sort(years, decreasing = F, na.last = T) - -## Create blank data frame to fill stresses in -stresses <- data.frame(Year = years, stress = NA) -ls_dfs <- list() - -for (i in 1:length(years)) { - genw <- pivot_wider(phyto.gen.BV, - names_from = "Genus", - values_from = "BV.um3.per.L", - values_fill = 0) - - genw <- genw %>% filter(Year == years[i]) - - #look at number of observations per station - #table(genw$StationCode) - - # Calculate the nMDS using vegan - # A good rule of thumb: stress < 0.05 provides an excellent representation in reduced dimensions, - # < 0.1 is great, < 0.2 is good/ok, and stress < 0.3 provides a poor representation. - phyto.NMDS <- metaMDS( - comm = genw[c(10:142)], - distance = "bray", - k = 3, - trymax = 10000 - #trace = F, - #autotransform = F - ) - - #paste0("phyto.NMDS.",year) <- phyto.NMDS - - #save(phyto.NMDS, file = paste("NMDS", year,".RData")) - - stresses$stress[which(stresses$Year == years[i])] <- phyto.NMDS$stress - - #look at Shepard plot which shows scatter around the regression between the interpoint distances - #in the final configuration (i.e., the distances between each pair of communities) against their - #original dissimilarities. - stressplot(phyto.NMDS) - - # Using the scores function from vegan to extract the site scores and convert to a data.frame - data.scores <- as_tibble(scores(phyto.NMDS, display = "sites")) - - # Combine metadata with NMDS data scores to plot in ggplot - meta <- genw %>% select(1:9) - meta <- cbind(meta, data.scores) - - # Read in years as a character otherwise it shows up as a number and gets displayed as a gradient - meta$Year <- as.character(meta$Year) - - ls_dfs[[i]] <- meta - -} - -phyto.gen.NMDS <- do.call(rbind, ls_dfs) - -# Calculate PERMANOVA comparisons for phytoplankton communities by Region and Year -genw <- pivot_wider(phyto.gen.BV, - names_from = "Genus", - values_from = "BV.um3.per.L", - values_fill = 0) - -adon.results <- adonis2(genw[c(10:142)] ~ genw$Region + genw$Year, - strata = genw$StationCode, - method = "bray", - perm = 999) - -adon.results <- adonis2(genw[c(10:142)] ~ genw$Region*genw$ActionPhase + genw$Year*genw$ActionPhase + genw$Region*genw$Year, - strata = genw$StationCode, - method = "bray", - perm = 999) - -dm_phyto_gen <- vegdist(genw[c(10:142)], method = "bray") - -bd <- betadisper(dm_phyto_gen, genw$Region) - -anova(bd) -permutest(bd) - -## Save data files -save(phyto.sum, file = "RData/phyto.sum.RData") -save(phyto.gen, file = "RData/phyto.gen.RData") -save(phyto.grp.gen.BV, file = "RData/phyto.grp.gen.BV.RData") -save(phyto.grp.BV, file = "RData/phyto.grp.BV.RData") -save(phyto.grp.BM, file = "RData/phyto.grp.BM.RData") -save(phyto.grp.LCEFA, file = "RData/phyto.grp.LCEFA.RData") -save(phyto.gen.NMDS, file = "RData/phyto.gen.NMDS.Rdata") -save(phyto.grp.gen.BV.RA.tot, file = "RData/phyto.grp.gen.BV.RA.tot.Rdata") -save(phyto.types, file = "RData/phyto.types.RData") - -write_csv(stresses, file = "analyses/NMDS_stress.csv") diff --git a/phyto-final/phyto.data.processing.combined.R b/phyto-final/phyto.data.processing.combined.R deleted file mode 100644 index e318802..0000000 --- a/phyto-final/phyto.data.processing.combined.R +++ /dev/null @@ -1,325 +0,0 @@ -# Extracting phyto data from EMP and AEU datasheets -# AEU's Yolo Bypass Phyto datasheets -# 9/7/2022 -# Goal is to combine these datasets while retaining all data -# including GALD and Biovolume - -library("tidyverse");packageVersion("tidyverse") -library("lubridate");packageVersion("lubridate") -library("janitor");packageVersion("janitor") - -# Set working directory -setwd("./phyto_code") -getwd() - -# Clean workspace -rm(list=ls()) - -# Importing Raw EMP Data ------------------------------------------------------- - -# Import EMP data files -phyto_files_EMP <- dir(path = "data/EMP/csv", pattern = "\\.csv", full.names = T) - -df_phyto_EMP <- map_dfr(phyto_files_EMP, ~read_csv(.x)) - -# Read in files with non-standard headers individually -df_Dec2021 <- read_csv("data/EMP/oddballs/December 2021.csv") -df_Nov2021 <- read_csv("data/EMP/oddballs/November 2021.csv") -df_Sep2013 <- read_csv("data/EMP/oddballs/September 2013.csv") -df_Nov2013 <- read_csv("data/EMP/oddballs/November 2013.csv") - -# Combine like oddball dfs -df_phyto2013 <- bind_rows(df_Sep2013, df_Nov2013) -df_phyto2021 <- bind_rows(df_Dec2021, df_Nov2021) - -# Remove individual dfs -rm(df_Dec2021) -rm(df_Nov2021) -rm(df_Nov2013) -rm(df_Sep2013) - -# Rename headers to match standard BSA headers Oddballs actually have the -#"correct" name of Total Cells rather than the incorrect "Number of cells per -# unit" - -df_phyto2013 <- df_phyto2013 %>% - rename("Number of cells per unit" = "Total Cells Counted") - -df_phyto2021 <- df_phyto2021 %>% - rename("Number of cells per unit" = "Total Number of Cells") %>% - rename("Unit Abundance" = "Unit Abundance (# of Natural Units)") - -# Combine oddball files with others -df_phyto_EMP <- bind_rows(df_phyto_EMP, df_phyto2013) -df_phyto_EMP <- bind_rows(df_phyto_EMP, df_phyto2021) - -# Remove unneeded dfs -rm(df_phyto2013) -rm(df_phyto2021) - -# Remove empty rows -df_phyto_EMP <- df_phyto_EMP %>% filter_all(any_vars(!is.na(.))) - -# Correct GALD, which is imported into two separate columns -# Test to see if NAs are 'either/or' and that there aren't some rows with a -# value in both GALD and GALD 1 - -sum(is.na(df_phyto_EMP$GALD)) # Total is 5880 -sum(is.na(df_phyto_EMP$`GALD 1`)) # Total is 8072 - -# Sum of NAs is 13952 which is the same as the number of rows in the df. -# This shows that there aren't any rows with two values so that we can -# Combine them without any issues. - -# Move -df_phyto_EMP <- df_phyto_EMP %>% relocate(`GALD 1`, .after = GALD) - -# Combine both GALD columns -df_phyto_EMP <- df_phyto_EMP %>% - rowwise() %>% - mutate(GALD.Tot = sum(c_across(GALD:`GALD 1`), na.rm = TRUE)) - -# Remove old GALD columns and rename GALD.Tot -df_phyto_EMP <- df_phyto_EMP %>% - select(!(GALD:`GALD 1`)) %>% - rename("GALD" = "GALD.Tot") - -# Clean up column names -df_phyto_EMP <- df_phyto_EMP %>% clean_names(case = "big_camel") - -df_phyto_EMP <- df_phyto_EMP %>% rename("GALD" = "Gald") - -# Remove blank columns -df_phyto_EMP <- df_phyto_EMP %>% select_if(~ !all(is.na(.))) - -# Remove columns that just have the method code "Phyto" as well as -# pre-calculated organisms per mL -df_phyto_EMP <- df_phyto_EMP %>% select(SampleDate:Biovolume10,GALD) - -# Add column to indicate what study it came from -df_phyto_EMP <- df_phyto_EMP %>% mutate(Study = "EMP", .after = StationCode) - -# Importing Raw AEU Data ------------------------------------------------------- - -# Import AEU data files (comment out when finished) -phyto_files_AEU <- dir(path = "data/csv", pattern = "\\.csv", full.names = T) -df_phyto_FASTR <- map_dfr(phyto_files_AEU, ~read_csv(.x)) - -# Remove empty rows -df_phyto_FASTR <- df_phyto_FASTR %>% filter_all(any_vars(!is.na(.))) - -# Remove weird row with only a single zero in biovolume -df_phyto_FASTR <- df_phyto_FASTR %>% drop_na(MethodCode) - -# Clean up column names -df_phyto_FASTR <- df_phyto_FASTR %>% clean_names(case = "big_camel") - -# Filter out samples from other projects. Read in station names with flags and -# merge together. -keepers <- read_csv("CSVs/station_names_flagged.csv") -df_phyto_FASTR <- left_join(df_phyto_FASTR, keepers) -rm(keepers) - -# Remove data unrelated to project -df_phyto_FASTR <- df_phyto_FASTR %>% filter(Flag == "Keep") - -# Remove flag column -df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Flag)) - -# Confirm station IDs -unique(df_phyto_FASTR$StationCode) # Shows only 10 FASTR stations -table(df_phyto_FASTR$StationCode) - -# Remove blank columns -df_phyto_FASTR <- df_phyto_FASTR %>% select_if(~ !all(is.na(.))) - -# Remove individual measurment columns as they are only present in a small -# number of samples. -df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Dimension:DepthM)) - -# Remove secondary and tertiary GALD measurements as they don't vary much and -# aren't present in EMP data -df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Gald2:Gald3)) - -# Correct GALD, which is imported into two separate columns. Test to see if NAs -# are 'either/or' and that there aren't some rows with a value in both GALD and -# GALD 1 - -sum(is.na(df_phyto_FASTR$Gald)) # Total is 1791 -sum(is.na(df_phyto_FASTR$Gald1)) # Total is 4535 - -# Sum of NAs is 6326 which is the same as the number of rows in the df. This -# shows that there aren't any rows with two values so that we can Combine them -# without any issues. - -# Move Gald1 column -df_phyto_FASTR <- df_phyto_FASTR %>% relocate(Gald1, .after = Gald) - -# Combine both GALD columns -df_phyto_FASTR <- df_phyto_FASTR %>% - rowwise() %>% - mutate(GALD.Tot = sum(c_across(Gald:Gald1), na.rm = TRUE)) - -# Check if rows have two NAs or no NAs in the GALD columns -# test <- df_phyto_FASTR %>% -# mutate(GALD.Value = case_when(is.na(Gald) & is.na(Gald1) ~ "Fix", -# TRUE ~ "Okay")) - -# Remove old GALD columns and rename GALD.Tot -df_phyto_FASTR <- df_phyto_FASTR %>% - select(!(Gald:Gald1)) %>% - rename("GALD" = "GALD.Tot") - -# Remove MethodCode column that just says "Phyto" -df_phyto_FASTR <- df_phyto_FASTR %>% select(!(MethodCode)) - -# Add column to indicate what study it came from -df_phyto_FASTR <- df_phyto_FASTR %>% mutate(Study = "FASTR", .after = StationCode) - -# Combine Data & Analyze ------------------------------------------------------- - -df_phyto <- bind_rows(df_phyto_EMP, df_phyto_FASTR) - -# Average all 10 biovolume measurements for each taxon -df_phyto <- df_phyto %>% rowwise() %>% - mutate(BV.Avg = mean(c_across(Biovolume1:Biovolume10), na.rm = T)) %>% - select(!(Biovolume1:Biovolume10)) # Remove Individual Biovolume Columns - -# Remove unneeded columns -df_phyto <- df_phyto %>% select(!c("BsaTin","DiatomSoftBody")) -df_phyto <- df_phyto %>% select(!(ColonyFilamentIndividualGroupCode:Shape)) -df_phyto <- df_phyto %>% select(!(VolumeReceivedML:NumberOfFieldsCounted)) - -# Fix samples with missing times -# Just replace with 12:00:00 b/c aren't doing any time-based analyses -df_phyto <- df_phyto %>% replace_na(list(SampleTime = "12:00:00")) - -# Get dates in the right format. Some are 1/1/14 and others 1/1/2014. -df_phyto$SampleDate <- mdy(df_phyto$SampleDate) - -# Combine date and time column -df_phyto <- df_phyto %>% unite(DateTime, c("SampleDate","SampleTime"), sep = " ") #, remove = FALSE, na.rm = FALSE) - -df_phyto$DateTime <- as_datetime(df_phyto$DateTime, - tz = "US/Pacific", - format = c("%Y-%m-%d %H:%M:%OS")) - -# Check for missing dates -df_phyto %>% filter(is.na(DateTime)) # No missing dates - -# Correct BSA header -df_phyto <- df_phyto %>% rename("TotalCells" = "NumberOfCellsPerUnit") - -# Calculate Unit Density & Biovolume Density -df_phyto <- df_phyto %>% - mutate(Units.per.mL = UnitAbundance * Factor) %>% - mutate(BV.um3.per.mL= TotalCells * BV.Avg * Factor) - -# Add column for year and month for highlighting data -df_phyto <- df_phyto %>% - mutate(Year = year(df_phyto$DateTime)) %>% - mutate(Month = month(df_phyto$DateTime, label = T)) - -# Order month in calendar order rather than (default) alphabetical -df_phyto$Month = factor(df_phyto$Month, levels = month.abb) - -# Reorder date/time columns -df_phyto <- df_phyto %>% - relocate(Year, .after = DateTime) %>% - relocate(Month, .after = DateTime) - -# Convert years to factors for plotting -df_phyto$Year <- as.factor(df_phyto$Year) - -# Data Cleaning ---------------------------------------------------------------- - -# Fix EMP site names -df_phyto$StationCode <- gsub("EZ6 SAC","EZ6",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ6SAC","EZ6",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ6SJR","EZ6-SJR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ2SAC","EZ2",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ2 SAC","EZ2",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ2SJR","EZ2-SJR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ2 SJR","EZ2-SJR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("EZ6 SJR","EZ6-SJR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("D16-Twitchell","D16",df_phyto$StationCode) -df_phyto$StationCode <- gsub("D16-Twitchel","D16",df_phyto$StationCode) -df_phyto$StationCode <- gsub("D16 - Twitchell","D16",df_phyto$StationCode) -df_phyto$StationCode <- gsub("D16 Twitchell","D16",df_phyto$StationCode) -df_phyto$StationCode <- gsub("NZ328","NZ325",df_phyto$StationCode) # Typo in August 2019 -df_phyto$StationCode <- gsub("C3A-HOOD","C3A",df_phyto$StationCode) -df_phyto$StationCode <- gsub("C3A Hood","C3A",df_phyto$StationCode) -df_phyto$StationCode <- gsub("C3A- Hood","C3A",df_phyto$StationCode) -df_phyto$StationCode <- gsub("C3A-Hood","C3A",df_phyto$StationCode) -df_phyto$StationCode <- gsub("NZ542","NZS42",df_phyto$StationCode) -df_phyto$StationCode <- gsub("E26","EZ6",df_phyto$StationCode) -df_phyto$StationCode <- gsub("E22","EZ2",df_phyto$StationCode) # Typo in May 2018 - -# Fix AEU site names -df_phyto$StationCode <- gsub("SHER","SHR",df_phyto$StationCode) -df_phyto$StationCode <- gsub("BL-5","BL5",df_phyto$StationCode) - -# Remove extra Microcystis tows at D19 -df_phyto <- df_phyto %>% filter(StationCode != "D19 MC Tow") - -# In Fall 2016, taxonomists began classifying the species -# Chroococcus microscopicus as Eucapsis microscopica. This is one of the most -# dominant species in this samples, so all taxa previously classified as -# C. microscopicus will be re-named E. microscopica - -df_phyto <- df_phyto %>% - mutate(Taxon = case_when(Taxon == 'Chroococcus microscopicus' ~ 'Eucapsis microscopica', - TRUE ~ Taxon)) %>% - mutate(Genus = case_when(Taxon == 'Eucapsis microscopica' ~ 'Eucapsis', - TRUE ~ Genus)) - -# The taxon Plagioselmis lacustris is inconsistently named, appearing sometimes as -# Rhodomonas lacustris. Change to Rhodomonas lacustris to avoid confusion. - -df_phyto <- df_phyto %>% - mutate(Taxon = case_when(Taxon == 'Plagioselmis lacustris' ~ 'Rhodomonas lacustris', - TRUE ~ Taxon)) %>% - mutate(Genus = case_when(Taxon == 'Rhodomonas lacustris' ~ 'Rhodomonas', - TRUE ~ Genus)) - -# Correct the genus label for a Chlorella entry -df_phyto$Genus <- gsub("cf Chlorella","Chlorella",df_phyto$Genus) - -# Units for Density (unit, biovolume) are in per mL, will convert to per L -df_phyto <- df_phyto %>% mutate(across(Units.per.mL:BV.um3.per.mL, ~ .x * 1000,.keep = "unused")) - -# Rename headers b/c units are now in L -df_phyto <- df_phyto %>% - rename("Units.per.L" = "Units.per.mL") %>% - rename("BV.um3.per.L" = "BV.um3.per.mL") - -# Add Taxonomy Data------------------------------------------------------------- - -# Create list of all unique genera in the combined dataset and comment out once finished -# genera <- as_tibble_col(sort(unique(df_phyto$Genus)), column_name = "Genus") - -# Read in previous list of taxa -# taxa <- read_csv("CSVs/phyto_group_taxonomy.csv") -# genera <- left_join(genera, taxa) -# -# write_csv(genera, file = "list_of_genera.csv") - -# Read in CSV with manually-added WoRMS classification -taxa <- read_csv("CSVs/phyto_group_classification.csv") - -df_phyto <- left_join(df_phyto, taxa) - -# Check if any groups are missing -sum(is.na(df_phyto$Group)) # none missing -rm(taxa) - -# Save df to use for making plots -save(df_phyto, file = "RData/df_phyto.RData") - -# Calculate Biomass Values------------------------------------------------------ -# Convert biovolume to biomass using relationships from Menden-Deuer and Lussard -# (2000) doi: 10.4319/lo.2000.45.3.0569 -# Only relevant to group-level data -# Units of BV.Density are um^3 per L - - diff --git a/phyto-final/phyto.error.calcs.R b/phyto-final/phyto.error.calcs.R deleted file mode 100644 index 66a0168..0000000 --- a/phyto-final/phyto.error.calcs.R +++ /dev/null @@ -1,33 +0,0 @@ -## Calculate Standard Error for Total Phytoplankton Biomass -## FASTR Synthesis Project -## 6/8/2022 TMF - -#library("tidyverse");packageVersion("tidyverse") -library("Rmisc");packageVersion("Rmisc") - -# Set working directory -setwd("./phyto_code/") -getwd() - -# Clean workspace -rm(list=ls()) - -## Load total biovolume data for FASTR project -load("RData/phyto.sum.RData") -load("RData/phyto.grp.BV.RData") - -## Calculate standard error for total phyto BV -phyto.grp.sum.error <- summarySE(phyto.sum, - measurevar="Total.BV.per.L", - groupvars=c("Year","StationCode","ActionPhase")) - -## Calculate standard error for group-level phyto BV -phyto.grp.error <- summarySE(phyto.grp.BV, - measurevar="BV.um3.per.L", - groupvars=c("Year","ActionPhase","Region","Group")) - -## Save RData File -save(phyto.grp.sum.error, file = "RData/phyto.grp.sum.error.RData") - -## Save as CSV file -#write.csv(phyto.grp.error, file = "phyto.grp.error.csv") diff --git a/phyto-final/phyto.plots.FASTR.R b/phyto-final/phyto.plots.FASTR.R deleted file mode 100644 index cc8f392..0000000 --- a/phyto-final/phyto.plots.FASTR.R +++ /dev/null @@ -1,617 +0,0 @@ -# Load Libraries and Data Files ------------------------------------------------ -# Create data plots for FASTR synthesis Report -# Using correct biovolume data (revised Feb 2022) -# 2/15/2022 - -library(tidyverse) -library(lubridate) -library(RColorBrewer) - -# Set working directory -setwd("./phyto_code/") -getwd() - -# Set visual theme in ggplot -theme_set(theme_bw()) - -# Clean workspace -rm(list=ls()) - -# Load biovolume density data at group and genus level -load("RData/phyto.sum.RData") -load("RData/phyto_gen_EMP.RData") -load("RData/phyto.gen.RData") -load("RData/phyto.gen.NMDS.RData") -load("RData/phyto.grp.gen.BV.RData") -load("RData/phyto.grp.BV.RData") -load("RData/phyto.grp.BM.RData") -load("RData/phyto.grp.LCEFA.RData") -load("RData/phyto.grp.sum.error.RData") -load("RData/FlowDesignation.RData") -load("RData/phyto.grp.gen.BV.RA.tot.Rdata") -load("RData/phyto.types") - -test <- phyto.grp.BV %>% - group_by(Year, StationCode, Region, ActionPhase, DateTime) %>% - summarize(Total.BV = sum(BV.um3.per.L, na.rm = TRUE)) %>% - ungroup - -plot <- ggplot(test, aes(x = Year, fill = ActionPhase)) + - geom_histogram() - -plot - -plot2 <- ggplot(test, aes(x = Year, fill = Region)) + - geom_histogram() - -plot2 - -plot3 <- ggplot(test, aes(x = Year, fill = StationCode)) + - geom_histogram() - -plot3 - -# Create Biovolume-only data frame at genus level -phyto.gen.BV <- phyto.gen %>% select(Year:Region,Genus:ActionPhase,BV.um3.per.L) - -# Create custom color palette -brewer.pal(n=8, name = "Set1") - -# Set folder name to output graphs into -output <- "plots" - -# Format Flow Designation data frame for graphing -FlowDesignation <- pivot_longer(FlowDesignation, - cols = PreFlowStart:PostFlowEnd, - names_to = "Phase", - values_to = "Date") - -FlowDesignation$Date <- as_datetime(FlowDesignation$Date,tz = "US/Pacific") - -# Pie Chart for Overall Taxa --------------------------------------------------- -# Plot general relative abundance for all data. -phyto.RA <- phyto.grp.BV %>% - group_by(Group) %>% - summarize(Mean.BV.per.L = mean(BV.um3.per.L)) %>% - mutate(MeanRelAbund = Mean.BV.per.L/sum(Mean.BV.per.L)) %>% - ungroup() - -# Set group display order -phyto.RA <- phyto.RA %>% - arrange(desc(MeanRelAbund)) %>% - mutate(Group = factor(Group, levels = Group)) - -#type.order <- c("Aulacoseira","Cyclotella","Ulnaria","Other") -#phyto.dia.BV$Type <- factor(as.character(phyto.dia.BV$Type), levels = type.order) - -phyto.BV.RA.pie <- ggplot(phyto.RA, aes(x = "", y = MeanRelAbund, fill = Group)) + - geom_bar(stat="identity", width=1, color = "white") + - coord_polar("y", start=0) + - theme_void() - -phyto.BV.RA.pie - -ggsave(path = output, - filename = "phyto_BV_RA_pie.pdf", - device = "pdf", - scale=1.0, - units="in", - height=3.5, - width=3, - dpi="print") - -# Relative Abundance Plots ----------------------------------------------------- -# Make individual bar plots to export to Illustrator -groups <- unique(phyto.grp.gen.BV.RA.tot$Group) - -for (group in groups) { - df_temp <- phyto.grp.gen.BV.RA.tot %>% - filter(Group == group) - - RA.plot <- ggplot(df_temp, - aes(x = Group, - y = MeanRelAbund, - fill = Type)) + - geom_bar(data = df_temp, - position = "stack", - width = 1, - stat = "summary", - fun = "mean") + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - labs(x = "Group", - y = "Relative Abundance (%)") - - RA.plot - - ggsave(path = output, - filename = paste0("phyto_RA_",group,".png"), - device = "png", - scale=1.0, - units="in", - height=3.5, - width=3, - dpi="print") - - rm(df_temp) - -} - -# Box plots for Total Phytoplankton Biovolume ---------------------------------- - -# Plot Upstream and Downstream Total BV by ActionPhase for each year -fig1 <- ggplot(data=phyto.sum, aes(y= log10(Total.BV.per.L), - x = Region, - fill = ActionPhase)) + - geom_boxplot() - -fig1 + - labs(title = "Total Phytoplankton Abundance by Year", - y = bquote(Log[10]~'Biovolume Density'~(um^3~L^-1)), - x = "Sampling Region", - fill = "Pulse Period") + - facet_wrap(Year ~ ., ncol = 2) + - scale_fill_manual(values = c("#4DAF4A", - "#984EA3", - "#A3D0D4"), - labels=c("Before", "During", "After")) - -ggsave(path = output, - filename = "fig1_log_phyto_biovolume_by_year_and_AP.png", - device = "png", - scale=1.0, - units="in", - height=4, - width=6, - dpi="print") - -# Plot Total BV by Flow Pulse Type at Each Station - -fig2 <- ggplot(data=phyto.sum, aes(y= log10(Total.BV.per.L), - x = FlowPulseCategory, - color = ActionPhase)) + - geom_boxplot() - -fig2 + - labs(title = "Total Phytoplankton Abundance by Flow Pulse Type", - y = bquote(Log[10]~'Biovolume Density'~(um^3~L^-1)), - x = "Flow Pulse Category") + - facet_wrap(StationCode ~ ., ncol = 2) - -ggsave(path = output, - filename = "fig2_log_phyto_biovolume_by_site_and_pulse_type.png", - device = "png", - scale=1.0, - units="in", - height=8, - width=6.5, - dpi="print") - -# Bar Charts with Error Bars --------------------------------------------------- - -fig3 <- ggplot(phyto.sum, aes(x = Year, y = log10(Total.BV.per.L), fill = ActionPhase)) + - geom_jitter(width = 0.1, - size = 2, - pch = 21, - color = "black") + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - #scale_fill_brewer(palette = "Dark2") + - labs(x = "Year", - y = bquote(Log[10]~'Biovolume Density'~(um^3~L^-1)), - title = "Yearly Abundance of Phytoplankton During Flow Pulses") - -fig3 + - facet_wrap(StationCode ~ ., ncol = 2, dir = "h") - -# fig3 <- ggplot(phyto.grp.sum.error, aes(x=Year, -# y=Total.BV.per.L, -# ymin=Total.BV.per.L-se, -# ymax=Total.BV.per.L+se, -# fill=ActionPhase)) + -# geom_bar(position=position_dodge(), -# aes(y=Total.BV.per.L), -# stat="identity", -# color = "black") + -# geom_errorbar(position=position_dodge(width=0.9), -# color="black", -# width = 0.2) -# -# fig3 + -# ylab(bquote('Biovolume Density'~(um^3~L^-1))) + -# xlab("Month") + -# labs(title = "Phytoplankton Abundance During Flow Actions") + -# facet_wrap(StationCode ~ ., ncol = 2, dir = "v") - -ggsave(path = output, - filename = "fig3_phyto_BV_jitter.png", - device = "png", - scale=1.0, - units="in", - height=7.5, - width=6, - dpi="print") - -# Stacked Bar Charts for Biovolume --------------------------------------------- - -# Upstream/Downstream -fig4 <- ggplot(phyto.grp.BV, aes(x = ActionPhase, - y = BV.um3.per.L, - fill = Group)) + - geom_bar(position = "stack", - width = 0.6, - stat = "summary", - fun = "mean") + - theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0)) - -fig4 + - labs(x = NULL, - y = bquote('Average Biovolume Density'~(um^3~L^-1)), - title = paste0("Abundance of Phytoplankton Groups During Flow Pulses")) + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - scale_fill_manual(values = c("#E41A1C", - "#377EB8", - "#4DAF4A", - "#984EA3", - "#FF7F00", - "#FFFF33", - "#A65628", - "#F781BF")) + - theme(axis.text.x = element_text(angle = 45, vjust = 0.7)) + - facet_grid(Region~Year) # dir = v makes order of station go "north to south" - -ggsave(path = output, - filename = paste0("fig4_phyto_group_biovolume_by_year.png"), - device = "png", - scale=1.0, - units="in", - height=5, - width=7.5, - dpi="print") - -## Create biovolume plots for each year -years <- unique(phyto.grp.BV$Year) -years <- sort(years, decreasing = F, na.last = T) - -for (year in years) { - df_temp <- phyto.grp.BV %>% - filter(Year == year) - - biovolume.plot <- ggplot(phyto.grp.BV, - aes(x = ActionPhase, - y = BV.um3.per.L, - fill = Group)) + - geom_bar(data = subset(phyto.grp.BV, Year == year), - position = "stack", - width = 0.6, - stat = "summary", - fun = "mean") + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - labs(x = "Pulse Period", - y = bquote('Average Biovolume Density'~(um^3~L^-1)), - title = paste0("Abundance of Phytoplankton Groups During Flow Pulses - ",year)) - - biovolume.plot + - scale_fill_manual(values = c("#E41A1C", - "#377EB8", - "#4DAF4A", - "#984EA3", - "#FF7F00", - "#FFFF33", - "#A65628", - "#F781BF")) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.7)) + - facet_wrap(StationCode ~ ., ncol = 5, dir = "h") - - ggsave(path = output, - filename = paste0("phyto_avg_BV_by_station_",year,".png"), - device = "png", - scale=1.0, - units="in", - height=3.5, - width=7, - dpi="print") - - rm(df_temp) - -} - -# Recreate Jared's Plots from 2016 --------------------------------------------- - -for (year in years) { - df_temp <- phyto.grp.BV %>% filter(Year == year) - - biovolume.plot <- ggplot(phyto.grp.BV, aes(x = StationCode, y = BV.um3.per.L, fill = Group)) + - geom_bar(data = subset(phyto.grp.BV, Year == year), - position = "stack", - width = 0.6, - stat = "summary", - fun = "mean") + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - labs(x = "Station", - y = "Biovolume Density (um^3 per mL)", - title = paste0("Abundance During Flow Pulses - ",year)) - - biovolume.plot + - scale_fill_manual(values = c("#E41A1C", - "#377EB8", - "#4DAF4A", - "#984EA3", - "#FF7F00", - "#FFFF33", - "#A65628", - "#F781BF")) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.7)) + - facet_wrap(ActionPhase ~ ., ncol = 1, dir = "h") - - ggsave(path = output, - filename = paste0("phyto_avg_BV_by_AP_",year,".png"), - device = "png", - scale=1.0, - units="in", - height=3.5, - width=5.5, - dpi="print") - - rm(df_temp) - -} - -# Relative Abundance Bar Plots ------------------------------------------------- - -# Summarize Total Biovolumes to look at relative contributions by Year - -phyto.grp.BV.RA.plot <- ggplot(phyto.grp.BV.RA, aes(x = ActionPhase, y = MeanRelAbund, fill = Group)) + - geom_bar(position = "stack", - width = 0.6, - stat = "summary", - fun = "mean") + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - labs(x = "Station", - y = "Relative Biovolume Density (%)", - title = "Relative Abundance of Phytoplankton During Flow Pulses") - -phyto.grp.BV.RA.plot + - scale_fill_manual(values = c("#E41A1C", - "#377EB8", - "#4DAF4A", - "#984EA3", - "#FF7F00", - "#FFFF33", - "#A65628", - "#F781BF")) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.7)) + - facet_grid(Region ~ Year) - -ggsave(path = output, - filename = "phyto_BV_relative_abund.png", - device = "png", - scale=1.0, - units="in", - height=3.5, - width=5.5, - dpi="print") - -## Make individual abundance jitter plots for each year biovolume -years <- unique(phyto.grp.gen.BV$Year) -years <- sort(years, decreasing = F, na.last = T) - -phyto.grp.gen.BV <- left_join(phyto.grp.gen.BV, phyto.types) - -for (year in years) { - - df_temp <- phyto.grp.gen.BV %>% - filter(Year==year) %>% - filter(Group == "Diatoms") - - jitter.plot <- ggplot(df_temp, aes(x = StationCode, y = BV.um3.per.L, color = Type, shape = Type)) + - geom_jitter(width = 0.1, size = 2) + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - labs(x = "Station", - y = "Biovolume (um^3 per L)", - title = paste0("Diatom Biovolume During Flow Pulses - ",year)) - #ylim(c(0,2e10)) - - jitter.plot + - scale_color_brewer(palette = "Dark2") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.7)) + - facet_wrap(ActionPhase ~ ., ncol = 1, dir = "h") - - ggsave(path = output, - filename = paste0("jitter_Diatom_BV_by_station_and_Region_",year,".pdf"), - device = "pdf", - scale=1.0, - units="in", - height=3.5, - width=6.5, - dpi="print") - - rm(df_temp) - -} - -## Make individual abundance jitter plots for each year LCEFA -years <- unique(phyto.grp.LCEFA$Year) -years <- sort(years, decreasing = F, na.last = T) - -for (year in years) { - - df_temp <- phyto.grp.LCEFA %>% - filter(Year==year) - - LCEFA.plot <- ggplot(df_temp, aes(x = StationCode, y = LCEFA.per.L, fill = Group, shape = Group)) + - geom_bar(data = subset(phyto.grp.LCEFA, Year == year), - position = "stack", - width = 0.6, - stat = "summary", - fun = "mean") + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - labs(x = "Station", - y = "LCEFA (ug per L)", - title = paste0("Est. Mass of LCEFA During Flow Pulses - ",year)) - #ylim(c(0,2e10)) - - LCEFA.plot + - scale_fill_brewer(palette = "Dark2") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.7)) + - facet_wrap(ActionPhase ~ ., ncol = 1, dir = "h") - - ggsave(path = output, - filename = paste0("barplot_LCEFA_by_station_and_Region_",year,".png"), - device = "png", - scale=1.0, - units="in", - height=3.5, - width=6.5, - dpi="print") - - rm(df_temp) - -} - -# Compare Cyclotella Abundance Upstream and Downstream ------------------------- - -cyc.plot <- ggplot(phyto.gen.BV, - aes(x = Region, y = log10(BV.um3.per.L+1))) + - geom_jitter(data = subset(phyto.gen.BV, Genus == "Cyclotella"), - width = 0.1) + - geom_boxplot(data = subset(phyto.gen.BV, Genus == "Cyclotella"), - width = 0.1, - outlier.shape = NA) - -cyc.plot + - labs(title = "Yearly Abundance of Cyclotella", - y = bquote(Log[10]~'Biovolume Density'~(um^3~L^-1)), - x = NULL) + - facet_wrap(Year ~ ., ncol = 3) - -ggsave(path = output, - filename = "cyclotella_abundance_plot.png", - device = "png", - scale=1.0, - units="in", - height=3.5, - width=6.5, - dpi="print") - -# NMDS Plots ------------------------------------------------------------------- - -# Create NMDS plots for each year by Region -NMDS.Region.plot <- ggplot(phyto.gen.NMDS, aes(x = NMDS1, y = NMDS2, color = ActionPhase)) + - geom_point(size = 3) + - stat_ellipse() + - theme_bw() - -NMDS.Region.plot + - facet_wrap(Year ~ ., ncol = 3, dir = "h") + - labs(x = NULL, - y = NULL, - color = "Region") + - theme(axis.text.x=element_blank(), - axis.ticks.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks.y=element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank()) + - scale_color_brewer(palette = "Set1") - -ggsave(path = output, - filename = paste0("phyto_NMDS_biovolume_by_AP.png"), - device = "png", - scale=1.0, - units="in", - height=5, - width=6.5, - dpi="print") - -# Create biovolume NMDS plots for each year by Region - -# Plot NMDS in ggplot2 - -NMDS.AP.plot <- ggplot(phyto.gen.NMDS, aes(x = NMDS1, y = NMDS2, color = ActionPhase)) + - geom_point(size = 3) + - stat_ellipse() + - labs(title = "Phytoplankton Community Composition") + - labs(color = "Region") + - theme_bw() - -NMDS.AP.plot + - facet_wrap(Year ~ ., ncol = 3, dir = "h") + - scale_color_brewer(palette = "Set1") - - -ggsave(path = output, - filename = paste0("phyto_NMDS_biovolume_by_AP.png"), - device = "png", - scale=1.0, - units="in", - height=3, - width=6.5, - dpi="print") - -# LCEFA Stacked Bar Plots ------------------------------------------------------ - -# Create stacked bar chart for yearly LCEFA abundance by Action Phase -# and Upstream/Downstream -fig12 <- ggplot(phyto.grp.LCEFA, aes(x = ActionPhase, - y = LCEFA.per.L, - fill = Group)) + - geom_bar(position = "stack", - width = 0.6, - stat = "summary", - fun = "mean") + - theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0)) - -fig12 + - labs(x = NULL, - y = bquote('Average LCEFA'~(?g~L^-1)), - title = paste0("Estimated Mass of LCEFA for Phytoplankton Groups During Flow Pulses")) + - theme(panel.background = element_rect(fill = "white", linetype = 0)) + - theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + - scale_fill_manual(values = c("#377EB8", - "#4DAF4A", - "#984EA3", - "#FF7F00", - "#A65628")) + - theme(axis.text.x = element_text(angle = 45, vjust = 0.7)) + - facet_grid(Region~Year) # dir = v makes order of station go "north to south" - -ggsave(path = output, - filename = paste0("fig12_phyto_group_LCEFA_by_year.png"), - device = "png", - scale=1.0, - units="in", - height=5, - width=7.5, - dpi="print") - -# Summarize data to get raw numbers for paper ---------------------------------- - -## Summarize by group -phyto.LCEFA.RA <- phyto.LCEFA.grp %>% - group_by(Year, Region, ActionPhase, Group) %>% - summarize(LCEFA.Total = mean(LCEFA, na.rm = TRUE)) %>% - ungroup - -phyto.LCEFA.RA <- phyto.LCEFA.RA %>% - group_by(Year, Region, ActionPhase) %>% - mutate(LCEFA.RA = LCEFA.Total / sum(LCEFA.Total)) %>% - ungroup - -phyto.BV.RA <- phyto.grp %>% - group_by(Year, Region, ActionPhase, Group) %>% - summarize(BV.Total = mean(BV.Density, na.rm = TRUE)) %>% - ungroup - -phyto.BV.RA <- phyto.BV.RA %>% - group_by(Year, Region, ActionPhase) %>% - mutate(BV.RA = BV.Total / sum(BV.Total)) %>% - ungroup -