diff --git a/phyto-final/phyto-analyses-FASTR.Rmd b/phyto-final/phyto-analyses-FASTR.Rmd
new file mode 100644
index 0000000..11e968a
--- /dev/null
+++ b/phyto-final/phyto-analyses-FASTR.Rmd
@@ -0,0 +1,957 @@
+---
+title: "Phytoplankton Data Processing for FASTR"
+author: "Ted Flynn"
+date: "2024-03-18"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+
+suppressWarnings(suppressMessages(library(tidyverse)))
+suppressWarnings(suppressMessages(library(lubridate)))
+suppressWarnings(suppressMessages(library(RColorBrewer)))
+suppressWarnings(suppressMessages(library(vegan)))
+suppressWarnings(suppressMessages(library(janitor)))
+suppressWarnings(suppressMessages(library(here)))
+
+# Set visual theme in ggplot
+theme_set(theme_bw())
+
+```
+
+## Import EMP Data
+
+Import phytoplankton data collected by the Environmental Monitoring Program. 
+
+```{r import EMP, echo=FALSE}
+
+# Import EMP data files
+phyto_files_EMP <- list.files(path = here("phyto-final","data","EMP","csv"), 
+                   pattern = "\\.csv", 
+                   full.names = T)
+
+df_phyto_EMP <- map(phyto_files_EMP, ~read_csv(.x, show_col_types = FALSE)) %>% 
+  list_rbind()
+
+# Read in files with non-standard headers individually
+df_Dec2021 <- read_csv(here("phyto-final","data","EMP","oddballs","December 2021.csv"))
+df_Nov2021 <- read_csv(here("phyto-final","data","EMP","oddballs","November 2021.csv"))
+df_Sep2013 <- read_csv(here("phyto-final","data","EMP","oddballs","September 2013.csv"))
+df_Nov2013 <- read_csv(here("phyto-final","data","EMP","oddballs","November 2013.csv"))
+
+# Combine like oddball dfs
+df_phyto2013 <- bind_rows(df_Sep2013, df_Nov2013)
+df_phyto2021 <- bind_rows(df_Dec2021, df_Nov2021)
+
+# Remove individual dfs
+rm(df_Dec2021)
+rm(df_Nov2021)
+rm(df_Nov2013)
+rm(df_Sep2013)
+
+# Rename headers to match standard BSA headers Oddballs actually have the 
+# "correct" name of Total Cells rather than the incorrect "Number of cells per 
+# unit"
+
+df_phyto2013 <- df_phyto2013 %>%
+  rename("Number of cells per unit" = "Total Cells Counted")
+
+df_phyto2021 <- df_phyto2021 %>%
+  rename("Number of cells per unit" = "Total Number of Cells") %>%
+  rename("Unit Abundance" = "Unit Abundance (# of Natural Units)")
+
+# Combine oddball files with others
+df_phyto_EMP <- bind_rows(df_phyto_EMP, df_phyto2013)
+df_phyto_EMP <- bind_rows(df_phyto_EMP, df_phyto2021)
+
+# Remove unneeded dfs
+rm(df_phyto2013)
+rm(df_phyto2021)
+
+# Remove empty rows
+df_phyto_EMP <- df_phyto_EMP %>% filter_all(any_vars(!is.na(.)))
+
+# Correct GALD, which is imported into two separate columns
+# Test to see if NAs are 'either/or' and that there aren't some rows with a 
+# value in both GALD and GALD 1
+
+sum(is.na(df_phyto_EMP$GALD)) # Total is 5880
+sum(is.na(df_phyto_EMP$`GALD 1`)) # Total is 8072
+
+# Sum of NAs is 13952 which is the same as the number of rows in the df.
+# This shows that there aren't any rows with two values so that we can 
+# Combine them without any issues.
+
+# Move GALD header
+df_phyto_EMP <- df_phyto_EMP %>% relocate(`GALD 1`, .after = GALD)
+
+# Combine both GALD columns
+df_phyto_EMP <- df_phyto_EMP %>%
+  rowwise() %>%
+  mutate(GALD.Tot = sum(c_across(GALD:`GALD 1`), na.rm = TRUE))
+
+# Remove old GALD columns and rename GALD.Tot
+df_phyto_EMP <- df_phyto_EMP %>% 
+  select(!(GALD:`GALD 1`)) %>%
+  rename("GALD" = "GALD.Tot")
+
+# Clean up column names
+df_phyto_EMP <- df_phyto_EMP %>% clean_names(case = "big_camel")
+
+df_phyto_EMP <- df_phyto_EMP %>% rename("GALD" = "Gald")
+
+# Remove blank columns
+df_phyto_EMP <- df_phyto_EMP %>% select_if(~ !all(is.na(.)))
+
+# Remove columns that just have the method code "Phyto" as well as 
+# pre-calculated organisms per mL
+df_phyto_EMP <- df_phyto_EMP %>% select(SampleDate:Biovolume10,GALD)
+
+# Add column to indicate what study it came from
+df_phyto_EMP <- df_phyto_EMP %>% mutate(Study = "EMP", .after = StationCode)
+
+```
+
+## Import phyto data collected for FASTR project
+
+```{r import FASTR}
+# Import AEU data files (comment out when finished)
+
+phyto_files_AEU <- list.files(path = here("phyto-final","data","csv"), 
+                   pattern = "\\.csv", 
+                   full.names = T)
+
+df_phyto_FASTR <- map(phyto_files_AEU, ~read_csv(.x, show_col_types = FALSE)) %>% 
+  list_rbind()
+
+# Remove empty rows
+df_phyto_FASTR <- df_phyto_FASTR %>% filter_all(any_vars(!is.na(.)))
+
+# Remove weird row with only a single zero in biovolume
+df_phyto_FASTR <- df_phyto_FASTR %>% drop_na(MethodCode)
+
+# Clean up column names
+df_phyto_FASTR <- df_phyto_FASTR %>% clean_names(case = "big_camel")
+
+# Filter out samples from other projects. Read in station names with flags and 
+# merge together.
+df_keepers <- read_csv(here("phyto-final","CSVs", "station_names_flagged.csv"))
+df_phyto_FASTR <- left_join(df_phyto_FASTR, df_keepers)
+rm(df_keepers)
+
+# Remove data unrelated to project
+df_phyto_FASTR <- df_phyto_FASTR %>% filter(Flag == "Keep")
+
+# Remove flag column
+df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Flag))
+
+# Confirm station IDs
+unique(df_phyto_FASTR$StationCode) # Shows only 10 FASTR stations
+table(df_phyto_FASTR$StationCode)
+
+# Remove blank columns
+df_phyto_FASTR <- df_phyto_FASTR %>% select_if(~ !all(is.na(.)))
+
+# Remove individual measurement columns as they are only present in a small 
+# number of samples.
+df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Dimension:DepthM))
+
+# Remove secondary and tertiary GALD measurements as they don't vary much and 
+# aren't present in EMP data
+df_phyto_FASTR <- df_phyto_FASTR %>% select(!(Gald2:Gald3))
+
+# Correct GALD, which is imported into two separate columns. Test to see if NAs 
+# are 'either/or' and that there aren't some rows with a value in both GALD and 
+# GALD 1
+
+sum(is.na(df_phyto_FASTR$Gald)) # Total is 1791
+sum(is.na(df_phyto_FASTR$Gald1)) # Total is 4535
+
+# Sum of NAs is 6326 which is the same as the number of rows in the df. This
+# shows that there aren't any rows with two values so that we can Combine them 
+# without any issues.
+
+# Move Gald1 column
+df_phyto_FASTR <- df_phyto_FASTR %>% relocate(Gald1, .after = Gald)
+
+# Combine both GALD columns
+df_phyto_FASTR <- df_phyto_FASTR %>%
+  rowwise() %>%
+  mutate(GALD.Tot = sum(c_across(Gald:Gald1), na.rm = TRUE))
+
+# Check if rows have two NAs or no NAs in the GALD columns
+# test <- df_phyto_FASTR %>%
+#   mutate(GALD.Value = case_when(is.na(Gald) & is.na(Gald1) ~ "Fix",
+#                                 TRUE ~ "Okay"))
+
+# Remove old GALD columns and rename GALD.Tot
+df_phyto_FASTR <- df_phyto_FASTR %>% 
+  select(!(Gald:Gald1)) %>%
+  rename("GALD" = "GALD.Tot")
+
+# Remove MethodCode column that just says "Phyto" 
+df_phyto_FASTR <- df_phyto_FASTR %>% select(!(MethodCode))
+
+# Add column to indicate what study it came from
+df_phyto_FASTR <- df_phyto_FASTR %>% mutate(Study = "FASTR", .after = StationCode)
+```
+
+## Combine EMP and AEU data
+
+```{r combine data}
+df_phyto <- bind_rows(df_phyto_EMP, df_phyto_FASTR)
+
+# Average all 10 biovolume measurements for each taxon
+df_phyto <- df_phyto %>% 
+  rowwise() %>% 
+  mutate(BV.Avg = mean(c_across(Biovolume1:Biovolume10), na.rm = T)) %>% 
+  select(!(Biovolume1:Biovolume10)) # Remove Individual Biovolume Columns
+
+# Remove unneeded columns
+df_phyto <- df_phyto %>% select(!c("BsaTin","DiatomSoftBody"))
+df_phyto <- df_phyto %>% select(!(ColonyFilamentIndividualGroupCode:Shape))
+df_phyto <- df_phyto %>% select(!(VolumeReceivedML:NumberOfFieldsCounted))
+
+# Fix samples with missing times
+# Just replace with 12:00:00 b/c aren't doing any time-based analyses
+df_phyto <- df_phyto %>% replace_na(list(SampleTime = "12:00:00"))
+
+# Get dates in the right format. Some are 1/1/14 and others 1/1/2014.
+df_phyto$SampleDate <- mdy(df_phyto$SampleDate)
+
+# Combine date and time column
+df_phyto <- df_phyto %>% unite(DateTime, c("SampleDate","SampleTime"), sep = " ") #, remove = FALSE, na.rm = FALSE)
+
+df_phyto$DateTime <- as_datetime(df_phyto$DateTime, 
+                              tz = "US/Pacific",
+                              format = c("%Y-%m-%d %H:%M:%OS"))
+
+# Check for missing dates
+df_phyto %>% filter(is.na(DateTime)) # No missing dates
+
+# Correct BSA header
+df_phyto <- df_phyto %>% rename("TotalCells" = "NumberOfCellsPerUnit")
+
+# Calculate Unit Density & Biovolume Density
+df_phyto <- df_phyto %>%
+  mutate(Units.per.mL = UnitAbundance * Factor) %>%
+  mutate(BV.um3.per.mL= TotalCells * BV.Avg * Factor) %>% 
+  mutate(Cells.per.mL = TotalCells * Factor) 
+
+# Remove columns used for density calcs
+# Remove Biovolume Columns
+df_phyto <- df_phyto %>% select(!(c(Factor,UnitAbundance:TotalCells)))
+
+# Add column for year and month for highlighting data
+df_phyto <- df_phyto %>% 
+  mutate(Year = year(df_phyto$DateTime)) %>%
+  mutate(Month = month(df_phyto$DateTime, label = T))
+
+# Order month in calendar order rather than (default) alphabetical
+df_phyto$Month = factor(df_phyto$Month, levels = month.abb)
+
+# Reorder date/time columns
+df_phyto <- df_phyto %>% 
+  relocate(Year, .after = DateTime) %>% 
+  relocate(Month, .after = DateTime)
+
+# Convert years to factors for plotting
+df_phyto$Year <- as.factor(df_phyto$Year)
+
+# 
+
+```
+
+## Clean Combined Data
+
+```{r data cleaning, echo = FALSE}
+# Fix EMP site names
+df_phyto$StationCode <- gsub("EZ6 SAC","EZ6",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("EZ6SAC","EZ6",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("EZ6SJR","EZ6-SJR",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("EZ2SAC","EZ2",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("EZ2 SAC","EZ2",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("EZ2SJR","EZ2-SJR",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("EZ2 SJR","EZ2-SJR",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("EZ6 SJR","EZ6-SJR",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("D16-Twitchell","D16",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("D16-Twitchel","D16",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("D16 - Twitchell","D16",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("D16 Twitchell","D16",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("NZ328","NZ325",df_phyto$StationCode) # Typo in August 2019
+df_phyto$StationCode <- gsub("C3A-HOOD","C3A",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("C3A Hood","C3A",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("C3A- Hood","C3A",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("C3A-Hood","C3A",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("NZ542","NZS42",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("E26","EZ6",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("E22","EZ2",df_phyto$StationCode) # Typo in May 2018
+
+# Fix AEU site names
+df_phyto$StationCode <- gsub("SHER","SHR",df_phyto$StationCode)
+df_phyto$StationCode <- gsub("BL-5","BL5",df_phyto$StationCode)
+
+# Remove extra Microcystis tows at D19
+df_phyto <- df_phyto %>% filter(StationCode != "D19 MC Tow")
+
+# In Fall 2016, taxonomists began classifying the species 
+# Chroococcus microscopicus as Eucapsis microscopica. This is one of the most
+# dominant species in this samples, so all taxa previously classified as 
+# C. microscopicus will be re-named E. microscopica
+
+df_phyto <- df_phyto %>% 
+  mutate(Taxon = case_when(Taxon == 'Chroococcus microscopicus' ~ 'Eucapsis microscopica',
+                           TRUE ~ Taxon)) %>%
+  mutate(Genus = case_when(Taxon == 'Eucapsis microscopica' ~ 'Eucapsis',
+                           TRUE ~ Genus))
+
+# The taxon Plagioselmis lacustris is inconsistently named, appearing sometimes as 
+# Rhodomonas lacustris. Change to Rhodomonas lacustris to avoid confusion.
+
+df_phyto <- df_phyto %>% 
+  mutate(Taxon = case_when(Taxon == 'Plagioselmis lacustris' ~ 'Rhodomonas lacustris',
+                           TRUE ~ Taxon)) %>%
+  mutate(Genus = case_when(Taxon == 'Rhodomonas lacustris' ~ 'Rhodomonas',
+                           TRUE ~ Genus))
+
+# Correct the genus label for a Chlorella entry
+df_phyto$Genus <- gsub("cf Chlorella","Chlorella",df_phyto$Genus)
+
+# Add Taxonomy Data-------------------------------------------------------------
+
+# Read in CSV with manually-added WoRMS classification
+df_taxa <- read_csv(here("phyto-final","CSVs","phyto_group_classification.csv"),
+                    show_col_types = FALSE)
+
+df_phyto <- left_join(df_phyto, df_taxa)
+
+# Check if any groups are missing
+sum(is.na(df_phyto$Group)) # none missing 
+rm(df_taxa)
+
+# Reorder Group classification column
+df_phyto <- df_phyto %>% 
+  relocate(Group, .before = Taxon)
+
+
+
+```
+
+## Analyze FASTR Biovolume Data for Outliers
+
+```{r outlier analysis}
+
+# Subset combined dataset to FASTR-only
+df_phyto_FASTR <- df_phyto %>% 
+  filter(Study == "FASTR")
+
+# Identify if outliers are present
+boxplot(df_phyto_FASTR$BV.um3.per.mL)
+
+# Identify 0.1% and 99.9% percentiles of the data
+quartiles <- quantile(df_phyto_FASTR$BV.um3.per.mL, 
+                      probs=c(0.001, 0.999), 
+                      na.rm = TRUE)
+
+# List upper cutoff (99.9%)
+cutoff <- quartiles[2]
+
+# Filter FASTR dataset to show all taxa above this 99.9% cutoff
+df_outliers <- df_phyto_FASTR %>% 
+  filter(BV.um3.per.mL > cutoff)
+
+list(df_outliers) ## 4 of top 6 are Spirogyra. 
+
+df_phyto_FASTR %>% filter(Genus == "Spirogyra")# Only 5 total samples w/ Spirogyra
+
+# Decision: Remove Spirogyra from datasets
+# 4/5 are above 99.9% cutoff, 5th is also highly abundant
+# These are benthic algae that clump, more likely to be "bullseye" samples
+# that got a big blob or clump
+
+# Remove Spirogyra from FASTR and combined dataset
+df_phyto_FASTR <- df_phyto_FASTR %>% 
+  filter(Genus != "Spirogyra")
+
+df_phyto <- df_phyto %>% 
+  filter(Genus != "Spirogyra")
+
+## Remove 2013 data from FASTR dataset (very limited)
+df_phyto_FASTR <- df_phyto_FASTR %>% filter(Year != 2013)
+
+```
+
+## Estimate Phytoplankton Biomass
+
+```{r biomass calcs}
+
+# Convert biovolume to biomass using relationships from Menden-Deuer and Lussard
+# (2000) doi: 10.4319/lo.2000.45.3.0569
+# Only relevant to group-level data
+# Units of BV.Density are um^3 per mL
+
+# Calculate Biomass (pg-C per cell) from Biovolume (um^3 per cell)
+df_phyto_FASTR <- df_phyto_FASTR %>% 
+  mutate(Biomass.pg.C = case_when(Group == "Diatoms" ~ 0.288 * (BV.Avg^0.811), 
+                                  Group != "Diatoms" ~ 0.216 * (BV.Avg^0.939)))
+
+# Convert pg to ug (ug-C per L)
+df_phyto_FASTR <- df_phyto_FASTR %>% 
+  mutate(Biomass.ug.C = Biomass.pg.C / 10^6, .keep = "unused")
+
+# Calculate Biomass Density (pg-C per mL)
+df_phyto_FASTR <- df_phyto_FASTR %>% 
+  mutate(BM.ug.per.mL = Biomass.ug.C * Cells.per.mL)
+
+
+
+```
+
+## Add FASTR-Specific Metadata
+
+```{r FASTR metadata}
+
+# Import Stations listed as Upstream and Downstream
+df_region <- read_csv(here("phyto-final","CSVs","upstream_downstream_stations.csv"),
+                      show_col_types = FALSE)
+
+df_region <- df_region %>% 
+  rename("Region" = "UpDown") %>%
+  rename("StationCode" = "Site")
+
+df_region$Region <- factor(df_region$Region, 
+                           levels = c("Upstream","Downstream"))
+
+## Add Region to data frames
+df_phyto_FASTR <- left_join(df_phyto_FASTR, df_region)
+
+df_phyto_FASTR <- df_phyto_FASTR %>% 
+  relocate(Region, .after = StationCode)
+
+# Cat Pien's Code for Merging Flow Designations #####
+# 
+# FlowDesignation <- read_csv("FlowDatesDesignations.csv")
+# Update with 45 days limit on either end
+# Update 5/27/22 TF added >= to two of the mutate commands to avoid removing 
+# samples falling on the PreFlowEnd and PostFlowStart dates. 
+
+# Upload flow dates and categories
+df_flow_designation <- read_csv(here("phyto-final","CSVs","FlowDatesDesignations_45days.csv"),
+                                show_col_types = FALSE)
+
+df_flow_designation$Year <- as.factor(df_flow_designation$Year)
+
+# Format dates as dates
+df_flow_designation$PreFlowStart <- mdy(df_flow_designation$PreFlowStart)
+df_flow_designation$PreFlowEnd <- mdy(df_flow_designation$PreFlowEnd)
+df_flow_designation$PostFlowStart <- mdy(df_flow_designation$PostFlowStart)
+df_flow_designation$PostFlowEnd <- mdy(df_flow_designation$PostFlowEnd)
+
+# Remove column with net flow days
+df_flow_designation <- df_flow_designation %>% 
+  select(!(NetFlowDays))
+
+# Save file for use in making plots
+# save(FlowDesignation, file = "RData/FlowDesignation.RData")
+
+# Merge data from FlowDesignation Table (Water Year Type, Flow days and type)
+df_phyto_FASTR <- inner_join(df_phyto_FASTR, df_flow_designation, 
+                             by = "Year")   
+
+# Filter only Pre-During-Post Flow Action Data. 
+df_phyto_FASTR <- df_phyto_FASTR %>%
+  mutate(ActionPhase = ifelse(DateTime > PreFlowStart & DateTime < PreFlowEnd, "Before", NA)) %>%
+  mutate(ActionPhase = replace(ActionPhase, DateTime >= PreFlowEnd & DateTime < PostFlowStart, "During")) %>% 
+  mutate(ActionPhase = replace(ActionPhase, DateTime >= PostFlowStart & DateTime < PostFlowEnd, "After")) %>% 
+  filter(!is.na(ActionPhase)) %>%
+  select(-c(PreFlowStart:PostFlowEnd))
+
+# Order the new ActionPhase so that it plots in the order Pre < During < Post
+phase.order <- c("Before","During","After")
+df_phyto_FASTR$ActionPhase <- factor(as.character(df_phyto_FASTR$ActionPhase), 
+                                     levels = phase.order)
+
+## Add in Flow Pulse Category Data
+df_pulse_category <- read_csv(here("phyto-final","CSVs","FlowPulseType.csv"),
+                              show_col_types = FALSE)
+
+df_pulse_category <- df_pulse_category %>% 
+  rename("FlowPulseCategory" = "FlowPulseType")
+
+df_pulse_category$Year <- as.factor(df_pulse_category$Year)
+
+# Add Flow Pulse Category to data frames to be exported
+df_phyto_FASTR <- left_join(df_phyto_FASTR, df_pulse_category,
+                       by = "Year")
+
+# List of stations used for FASTR
+stations <- c("RMB","RCS","RD22","I80","LIS","STTD","BL5","LIB","RYI","RVB")
+
+# Re-order stations from North to South
+df_phyto_FASTR$StationCode <- factor(as.character(df_phyto_FASTR$StationCode),
+                                     levels = stations)
+
+## Relocate metadata columns
+df_phyto_FASTR <- df_phyto_FASTR %>%
+  relocate(FlowPulseCategory, .before = ActionPhase) %>%
+  relocate(WYType:ActionPhase, .before = GALD)
+
+# Save df to use for making plots
+# save(df_phyto, file = "RData/df_phyto.RData")
+
+```
+
+## Create New Data Frames summarizing by Genus and Algal Group
+
+```{r summarize phyto data}
+# Summarize by algal group
+df_phyto_FASTR_grp <- df_phyto_FASTR %>%
+  group_by(Year, Month, Region, ActionPhase, DateTime, StationCode, Group) %>%
+  summarize(across(Units.per.mL:BM.ug.per.mL, ~sum(.x, na.rm = TRUE))) %>%
+  ungroup
+
+## Summarize by genus
+df_phyto_FASTR_gen <- df_phyto_FASTR %>%
+  group_by(Year, Month, Region, ActionPhase, DateTime, StationCode, Genus) %>%
+  summarize(across(Units.per.mL:BM.ug.per.mL, ~sum(.x, na.rm = TRUE))) %>%
+  ungroup
+
+## Set group display order
+group.order <- c("Diatoms",
+                 "Cyanobacteria",
+                 "Green Algae",
+                 "Cryptophytes",
+                 "Ciliates",
+                 "Dinoflagellates",
+                 "Golden Algae",
+                 "Other")
+
+df_phyto_FASTR_grp$Group <- factor(as.character(df_phyto_FASTR_grp$Group),
+                                   levels =  group.order)
+
+## Summarize totals
+df_phyto_FASTR_sum <- df_phyto_FASTR %>%
+  group_by(Year, Month, DateTime, StationCode, Region, WYType, FlowPulseType, FlowPulseCategory, ActionPhase) %>%
+  summarize(across(Units.per.mL:BM.ug.per.mL, ~sum(.x, na.rm = TRUE))) %>%
+  rename("Total.Units.per.mL" = "Units.per.mL") %>%
+  rename("Total.Cells.per.mL" = "Cells.per.mL") %>%
+  rename("Total.BV.per.mL" = "BV.um3.per.mL") %>%
+  rename("Total.BM.per.mL" = "BM.ug.per.mL") %>%
+  ungroup
+
+# Separate out data frames by density type and add zeros
+# Biovolume Density + Group
+df_phyto_FASTR_grp_BV <- df_phyto_FASTR_grp %>% 
+  select(Year:Group,BV.um3.per.mL)
+
+# Add zeros for taxa that weren't detected
+temp <- pivot_wider(df_phyto_FASTR_grp_BV, 
+                    names_from = Group,
+                    values_from = BV.um3.per.mL,
+                    values_fill = 0)
+
+df_phyto_FASTR_grp_BV <- pivot_longer(temp,
+                             cols = Cyanobacteria:last_col(),
+                             names_to = "Group",
+                             values_to = "BV.um3.per.mL")
+
+
+
+
+```
+
+## Calculate LCEFA Abundance in FASTR Data
+
+```{r LCEFA calcs}
+## Calculate LCEFA composition based on method in Galloway & Winder (2015) 
+## doi: 10.1371/journal.pone.0130053
+
+df_phyto_FASTR_grp <- df_phyto_FASTR_grp %>% 
+  mutate(LCEFA.per.mL = case_when(Group == 'Diatoms' ~ BM.ug.per.mL*2.77/100,
+                                  Group == 'Cyanobacteria' ~ BM.ug.per.mL*0.02/100,
+                                  Group == 'Green Algae' ~ BM.ug.per.mL*0.52/100,
+                                  Group == 'Cryptophytes' ~ BM.ug.per.mL*2.13/100,
+                                  Group == 'Dinoflagellates' ~ BM.ug.per.mL*3.82/100,
+                                  TRUE ~ 0))
+
+
+```
+
+## Create Plots for Paper
+```{r Total Biovolume Plots}
+
+# Set plot output directory 
+plots <- here("phyto-final","plots")
+
+# Plot Upstream and Downstream Total BV by ActionPhase for each year
+fig1 <- ggplot(data = df_phyto_FASTR_sum, 
+               aes(y = log10(Total.BV.per.mL),
+                   x = Region,
+                   fill = ActionPhase)) +
+  geom_boxplot() 
+
+fig1 +
+  labs(title = "Total Phytoplankton Abundance by Year",
+       y = bquote(Log[10]~'Biovolume Density'~(um^3~mL^-1)), 
+       x = "Sampling Region",
+       fill = "Pulse Period") +
+  facet_wrap(Year ~ ., ncol = 2) +
+  scale_fill_manual(values = c("#4DAF4A",
+                               "#984EA3",
+                               "#A3D0D4"),
+                    labels=c("Before", "During", "After"))
+
+ggsave(path = plots,
+       filename = "fig1_log_phyto_biovolume_by_year_and_AP.png", 
+       device = "png",
+       scale=1.0, 
+       units="in",
+       height=4,
+       width=6, 
+       dpi="print")
+
+# Plot Total BV by Flow Pulse Type at Each Station
+
+fig2 <- ggplot(data = df_phyto_FASTR_sum, 
+               aes(y= log10(Total.BV.per.mL),
+                   x = FlowPulseCategory,
+                   color = ActionPhase)) +
+  geom_boxplot()
+
+fig2 +
+  labs(title = "Total Phytoplankton Abundance by Flow Pulse Type",
+       y = bquote(Log[10]~'Biovolume Density'~(um^3~mL^-1)), 
+       x = "Flow Pulse Category") +
+  facet_wrap(StationCode ~ ., ncol = 2)
+
+ggsave(path = plots,
+       filename = "fig2_log_phyto_biovolume_by_site_and_pulse_type.png", 
+       device = "png",
+       scale=1.0, 
+       units="in",
+       height=8,
+       width=6.5, 
+       dpi="print")
+
+# Stacked Bar Charts for Biovolume ---------------------------------------------
+
+# Upstream/Downstream
+fig4 <- ggplot(df_phyto_FASTR_grp_BV, aes(x = ActionPhase,
+                                       y = BV.um3.per.mL,
+                                       fill = Group)) + 
+  geom_bar(position = "stack",  
+           width = 0.6, 
+           stat = "summary", 
+           fun = "mean") +
+  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0)) 
+
+fig4 + 
+  labs(x = NULL, 
+       y = bquote('Average Biovolume Density'~(um^3~mL^-1)), 
+       title = paste0("Abundance of Phytoplankton Groups During Flow Pulses")) + 
+  theme(panel.background = element_rect(fill = "white", linetype = 0)) + 
+  theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) +
+  scale_fill_manual(values = c("#E41A1C",
+                               "#377EB8",
+                               "#4DAF4A",
+                               "#984EA3",
+                               "#FF7F00",
+                               "#FFFF33",
+                               "#A65628",
+                               "#F781BF")) +
+  theme(axis.text.x = element_text(angle = 45, vjust = 0.7)) +
+  facet_grid(Region~Year) # dir = v makes order of station go "north to south"
+
+ggsave(path = plots,
+       filename = paste0("fig4_phyto_group_biovolume_by_year.png"), 
+       device = "png",
+       scale=1.0, 
+       units="in",
+       height=5,
+       width=7.5, 
+       dpi="print")
+
+```
+
+## Plot Yearly Abundance of Phytoplankton
+
+```{r plot yearly abundance}
+
+## Create biovolume plots for each year
+years <- unique(df_phyto_FASTR_grp_BV$Year) 
+years <- sort(years, decreasing = F, na.last = T)
+
+for (year in years) {
+  df_temp <- df_phyto_FASTR_grp_BV %>%
+    filter(Year == year)
+  
+  biovolume.plot <- ggplot(df_phyto_FASTR_grp_BV, 
+                           aes(x = ActionPhase, 
+                               y = BV.um3.per.mL, 
+                               fill = Group)) +
+    geom_bar(data = subset(df_phyto_FASTR_grp_BV, Year == year), 
+             position = "stack",  
+             width = 0.6, 
+             stat = "summary", 
+             fun = "mean") +
+    theme(panel.background = element_rect(fill = "white", linetype = 0)) + 
+    theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) +
+    labs(x = "Pulse Period", 
+         y = bquote('Average Biovolume Density'~(um^3~mL^-1)), 
+         title = paste0("Abundance of Phytoplankton Groups During Flow Pulses - ",year)) 
+  
+  biovolume.plot +
+    scale_fill_manual(values = c("#E41A1C",
+                                 "#377EB8",
+                                 "#4DAF4A",
+                                 "#984EA3",
+                                 "#FF7F00",
+                                 "#FFFF33",
+                                 "#A65628",
+                                 "#F781BF")) +
+    theme(axis.text.x = element_text(angle = 90, vjust = 0.7)) +
+    facet_wrap(StationCode ~ ., ncol = 5, dir = "h")
+  
+  ggsave(path = plots,
+         filename = paste0("phyto_avg_BV_by_station_",year,".png"), 
+         device = "png",
+         scale=1.0, 
+         units="in",
+         height=3.5,
+         width=7, 
+         dpi="print")
+  
+  rm(df_temp)
+  
+}
+
+```
+
+## Multivariate Calculations 
+
+```{r multivariate calcs}
+
+## Calculate NMDS axes
+## Create Biovolume-only data frame at genus level
+df_phyto_FASTR_gen_BV <- df_phyto_FASTR_gen %>% select(Year:Genus,BV.um3.per.mL)
+
+## Generate NMDS data with metaMDS by each year separately
+
+#df_phyto_FASTR_gen_BV$Year <- as.integer(df_phyto_FASTR_gen_BV$Year)
+
+years <- unique(df_phyto_FASTR_gen_BV$Year) 
+years <- sort(years, decreasing = F, na.last = T)
+
+## Create blank data frame to fill stresses in
+stresses <- data.frame(Year = years, stress = NA)
+ls_dfs <- list()
+
+for (i in 1:length(years)) {
+  genw <- pivot_wider(df_phyto_FASTR_gen_BV, 
+                      names_from = "Genus", 
+                      values_from = "BV.um3.per.mL",
+                      values_fill = 0)
+  
+  genw <- genw %>% filter(Year == years[i])
+  
+  #look at number of observations per station
+  #table(genw$StationCode)
+  
+  # Calculate the nMDS using vegan 
+  # A good rule of thumb: stress < 0.05 provides an excellent representation in reduced dimensions,
+  # < 0.1 is great, < 0.2 is good/ok, and stress < 0.3 provides a poor representation.
+  phyto.NMDS <- metaMDS(
+    comm = genw[c(7:139)],
+    distance = "bray",
+    k = 3,
+    trymax = 500
+    #trace = F,
+    #autotransform = F
+  )
+  
+  stresses$stress[which(stresses$Year == years[i])] <- phyto.NMDS$stress
+  
+  #look at Shepard plot which shows scatter around the regression between the interpoint distances 
+  #in the final configuration (i.e., the distances between each pair of communities) against their 
+  #original dissimilarities.
+  stressplot(phyto.NMDS)
+  
+  # Using the scores function from vegan to extract the site scores and convert to a data.frame
+  data.scores <- as_tibble(scores(phyto.NMDS, display = "sites"))
+  
+  # Combine metadata with NMDS data scores to plot in ggplot
+  meta <- genw %>% select(1:6)
+  meta <- cbind(meta, data.scores)
+  
+  # Read in years as a character otherwise it shows up as a number and gets displayed as a gradient
+  meta$Year <- as.character(meta$Year)
+  
+  ls_dfs[[i]] <- meta
+  
+}
+
+df_phyto_NMDS <- do.call(rbind, ls_dfs)
+
+# Plot NMDS Results
+
+# Create a ggplot2 theme for the NMDS plots
+theme_update(
+    panel.grid.major = element_blank(),
+    panel.grid.minor = element_blank(),
+    axis.text = element_blank(),
+    axis.ticks = element_blank(),
+    plot.background = element_rect(fill = "white", colour = NA),
+    panel.background = element_rect(fill = "white", colour = NA),
+    panel.border = element_rect(fill = NA, colour = "black"),
+    strip.background = element_rect(fill = "gray", colour = "black"),
+    legend.position = "bottom",
+    legend.key = element_rect(fill = "white", colour = NA)
+    )
+
+fig.NMDS.Reg <- ggplot(df_phyto_NMDS,
+                       aes(x = NMDS1, y = NMDS2, color = Region)) +
+  geom_point(size = 3) +
+  stat_ellipse() + 
+  labs(color = "Region",
+       x = NULL,
+       y = NULL)
+
+fig.NMDS.Reg +
+  facet_wrap(Year ~ ., ncol = 3, dir = "h") +
+  scale_color_brewer(palette = "Set1")
+
+ggsave(path = plots,
+       filename = paste0("phyto_NMDS_biovolume_by_reg.png"), 
+       device = "png",
+       scale=1.0, 
+       units="in",
+       height=4.5,
+       width=5.5, 
+       dpi="print")
+
+# Create plot colored by ActionPhase
+fig.NMDS.AP <- ggplot(df_phyto_NMDS, 
+                      aes(x = NMDS1, y = NMDS2, color = ActionPhase)) +
+  geom_point(size = 3) +
+  stat_ellipse() + 
+  labs(color = "Flow Pulse Period",
+       x = NULL,
+       y = NULL)
+
+fig.NMDS.AP +
+  facet_wrap(Year ~ ., ncol = 3, dir = "h") +
+  scale_color_brewer(palette = "Set1") 
+
+ggsave(path = plots,
+       filename = paste0("phyto_NMDS_biovolume_by_AP.png"), 
+       device = "png",
+       scale=1.0, 
+       units="in",
+       height=4.5,
+       width=5.5, 
+       dpi="print")
+
+```
+
+## Calculate ANOVAs
+
+```{r anovas}
+
+suppressWarnings(suppressMessages(library(ggpubr)))
+suppressWarnings(suppressMessages(library(car)))
+suppressWarnings(suppressMessages(library(rstatix)))
+suppressWarnings(suppressMessages(library(visreg)))
+suppressWarnings(suppressMessages(library(emmeans)))
+
+## Change Year category to factor
+df_phyto_FASTR_sum$Year <- as.factor(df_phyto_FASTR_sum$Year)
+df_phyto_FASTR_sum$FlowPulseType <- as.factor(df_phyto_FASTR_sum$FlowPulseType)
+
+## Create QQ Plot to check for normality
+ggqqplot(log10(df_phyto_FASTR_sum$Total.BV.per.mL))
+
+## View histogram to check for normality
+hist(log10(df_phyto_FASTR_sum$Total.BV.per.mL))
+
+## Run Shapiro-Wilks test to check whether data is normally distributed
+shapiro.test(log10(df_phyto_FASTR_sum$Total.BV.per.mL))
+
+## Run Levene test to compare variance
+df_phyto_FASTR_sum %>% levene_test(log10(Total.BV.per.mL)~FlowPulseType) # p = 0.901
+df_phyto_FASTR_sum %>% levene_test(log10(Total.BV.per.mL)~Year) # p = 0.980
+df_phyto_FASTR_sum %>% levene_test(log10(Total.BV.per.mL)~Region) # p = 0.0230
+
+## Run 3-way ANOVA
+phyto.sum.aov <- df_phyto_FASTR_sum %>% anova_test(log10(Total.BV.per.mL) ~ Region*Year*ActionPhase)
+phyto.sum.aov
+
+## Rosie Code
+L1 <- lm(log10(Total.BV.per.mL)~ Region*Year + Region*ActionPhase + ActionPhase*Year, 
+         data = df_phyto_FASTR_sum)
+
+summary(L1)
+
+Anova(L1, type = 2)
+
+visreg(L1)
+
+visreg.AP.Region <- visreg(L1, xvar = "ActionPhase", by = "Year", gg = T)
+
+visreg.AP.Region +
+  facet_wrap(Year ~ ., ncol = 3, dir = "h")
+
+ggsave(path = plots,
+       filename = "visreg.AP.Region.png", 
+       device = "png",
+       scale=1.0, 
+       units="in",
+       height=3.5,
+       width=7, 
+       dpi="print")
+
+visreg(L1, xvar = "Region", by = "Year")
+visreg(L1, xvar = "Region", by = "ActionPhase")
+visreg(L1, xvar = "ActionPhase", by = "Region")
+
+emmeans(L1, pairwise ~ Region:ActionPhase)
+emmeans(L1, pairwise ~ ActionPhase)
+emmeans(L1, pairwise ~ Year:ActionPhase)
+
+detach("package:ggpubr", unload=TRUE)
+detach("package:car", unload=TRUE)
+detach("package:rstatix", unload=TRUE)
+detach("package:visreg", unload=TRUE)
+detach("package:emmeans", unload=TRUE)
+
+```
+
+## PERMANOVA Comparisons
+
+```{r PERMANOVA}
+
+# Calculate PERMANOVA comparisons for phytoplankton communities by Region and Year
+genw <- pivot_wider(df_phyto_FASTR_gen_BV, 
+                    names_from = "Genus", 
+                    values_from = "BV.um3.per.mL",
+                    values_fill = 0)
+
+adon.results <- adonis2(genw[c(7:139)] ~ genw$Region + genw$Year, 
+                        strata = genw$StationCode,
+                        method = "bray",
+                        perm = 999)
+
+adon.results <- adonis2(genw[c(7:139)] ~ genw$Region*genw$ActionPhase + genw$Year*genw$ActionPhase + genw$Region*genw$Year, 
+                        strata = genw$StationCode,
+                        method = "bray",
+                        perm = 999)
+
+dm_phyto_gen <- vegdist(genw[c(7:139)], method = "bray")
+
+bd <- betadisper(dm_phyto_gen, genw$Region)
+
+anova(bd)
+permutest(bd)
+
+```
+