Supplemental-Results.Rmd

---
title: "Supplemental Results for coastal Puerto Nuevo 16S and 18S (Summer 2016)"
author: "Sabah Ul-Hasan"
output:
  word_document: default
  pdf_document: default
  html_document: default
---

Supp Tables 1-6   Data upload, clean-up, and ampvis2 prep       Lines 20 - 204
Supp Fig 1        Alpha diversity indices & rarefaction curves  Lines 206 - 287
Figure 1          Maps of sampling sites                        Lines 289 - 360
Figure 2          Taxa richness & abundance boxplots            Lines 362 - 519
Figure 3, SF 2    Top Phyla boxplots rch, Class heatmaps abd    Lines 521 - 581
Figure 4, SF 3    Venn diagrams shared vs unique rch, PERMANOVA Lines 583 - 910
Table 1, ST 7-8   Core beta of shared vs unique OTU richness    Lines 912 - 960

Colors      https://www.rapidtables.com/web/color/RGB_Color.html , colorbrewer2.org

Supp Tables 1-6: Data upload, clean-up, and ampvis2 prep
```{r}
######### Upload Data ###########
# Download package for downloading data from GitHub
# install.packages("RCurl")
library("RCurl")
# packageVersion("RCurl") # v1.95.4.10 

# Sites = Metadata for seawater and sediment sites (June 7, 2016)
Sites=read.csv(text=getURL("https://raw.githubusercontent.com/sabahzero/Puerto-Nuevo_Coastal-Microbial-Ecology_16S-18S-Workflow_UlHasan-etal/master/Raw-Data/Supp_Tab6_Sites-Metadata.csv"))
# Metadata for Prok and Euk
Meta=read.csv(text=getURL("https://raw.githubusercontent.com/sabahzero/Puerto-Nuevo_Coastal-Microbial-Ecology_16S-18S-Workflow_UlHasan-etal/master/Raw-Data/Supp_Tab5_Amplicon-Metadata.csv"))

# Caenogastropoda (cone snail) removed from all
# Additiional contaminants in Prokaryote and Eukaryote data removed from all
# Prok = Prokaryotes (seawater & sediment microbes), reads rarefied at 1000
Prok=read.csv(text=getURL("https://raw.githubusercontent.com/sabahzero/Puerto-Nuevo_Coastal-Microbial-Ecology_16S-18S-Workflow_UlHasan-etal/master/Raw-Data/Supp_Tab3_16S-WatSedOTUs-1000Reads.csv"))
# Prokaryote taxa by OTU ID 
ProkTaxa=read.csv(text=getURL("https://raw.githubusercontent.com/sabahzero/Puerto-Nuevo_Coastal-Microbial-Ecology_16S-18S-Workflow_UlHasan-etal/master/Raw-Data/Supp_Tab1_16S-Taxa.csv")) # Note "Kingdom" (for ampvis2) is actually Domain
# Euk = Eukaryotes (seawater & sediment microbes), reads rarefied at 1000
Euk=read.csv(text=getURL("https://raw.githubusercontent.com/sabahzero/Puerto-Nuevo_Coastal-Microbial-Ecology_16S-18S-Workflow_UlHasan-etal/master/Raw-Data/Supp_Tab4_18S-WatSedOTUs-1000Reads.csv"))
# Eukaryote taxa by OTU ID 
EukTaxa=read.csv(text=getURL("https://raw.githubusercontent.com/sabahzero/Puerto-Nuevo_Coastal-Microbial-Ecology_16S-18S-Workflow_UlHasan-etal/master/Raw-Data/Supp_Tab2_18S-Taxa.csv")) # Note "Kingdom" (for ampvis2) is actually Domain 
######### Upload Data ###########


######### Data Clean-up ###########
# Merge Sites and Meta data tables into one master metadata table
Sites$Location = Sites$Site_Name # Change name of Site_Name column to match Meta
Sites=Sites[, -1] # Drop Site_Name column
Sites$Location=sub("-", " ", Sites$Location) # Replace "-" in Locations with space to match Meta
Meta_MSTR=merge(Sites, Meta, by = "Location") # Merge data tables by Location

# Prokaryotes (abundance and richness)
# install.packages("data.table")
library(data.table)
# packageVersion("data.table") # v1.11.4
Prok2=t(Prok)
Prok2=data.frame(Prok2)
setDT(Prok2, keep.rownames = TRUE) # Transpose data table & shift rows to 1st column
# install.packages("dplyr")
library("dplyr")
# packageVersion("dplyr") # v0.7.6
names(Prok2)=as.matrix(Prok2[1, ])
Prok2=Prok2[-1, ]
Prok2[]=lapply(Prok2, function(x) type.convert(as.character(x))) # Make 1st row a column
colnames(Prok2)[1]="OTU" # Re-label column as OTU
Prok2[Prok2<3]=0 # Remove singletons and doubletons
Prok_abd=Prok2
Prok_rch=Prok2
Prok_rch[Prok_rch>1]=1 # Set abundance to 1 "‘>’ not meaningful for factors" still works
Prok_abd$OTU=sub("X", "", Prok_abd$OTU) # Remove X for abundance
Prok_rch$OTU=sub("X", "", Prok_rch$OTU) # Remove X for richness
# Note: Assigning richness at a later stage results in OTU ID as 0 or 1
Prok_abd$OTU=as.numeric(as.character(Prok_abd$OTU)) # Convert from characters to numeric
Prok_ABD=merge(Prok_abd, ProkTaxa, by = "OTU") # Adds taxa at end of OTU table
Prok_ABD=data.frame(Prok_ABD) # FINAL
Prok_rch$OTU=as.numeric(as.character(Prok_rch$OTU)) # Convert from characters to numeric
Prok_RCH=merge(Prok_rch, ProkTaxa, by = "OTU") # Adds taxa at end of OTU table
Prok_RCH=data.frame(Prok_RCH) # FINAL

# Eukaryotes (abundance and richness)
# install.packages("data.table")
# library(data.table)
# packageVersion("data.table") # v1.11.4
Euk2=t(Euk)
Euk2=data.frame(Euk2)
setDT(Euk2, keep.rownames = TRUE) # Transpose data table & shift rows to 1st column
# install.packages("dplyr")
# library("dplyr")
# packageVersion("dplyr") # v0.7.6
names(Euk2)=as.matrix(Euk2[1, ])
Euk2=Euk2[-1, ]
Euk2[]=lapply(Euk2, function(x) type.convert(as.character(x))) # Make 1st row a column
colnames(Euk2)[1]="OTU" # Re-label column as OTU
Euk2[Euk2<3]=0 # Remove singletons and doubletons
Euk_abd=Euk2
Euk_rch=Euk2
Euk_rch[Euk_rch>1]=1 # Set abundance to 1 "‘>’ not meaningful for factors" still works
Euk_abd$OTU=sub("X", "", Euk_abd$OTU) # Remove X for abundance
Euk_rch$OTU=sub("X", "", Euk_rch$OTU) # Remove X for richness
# Note: Assigning richness at a later stage results in OTU ID as 0 or 1
Euk_abd$OTU=as.numeric(as.character(Euk_abd$OTU)) # Convert from characters to numeric
Euk_ABD=merge(Euk_abd, EukTaxa, by = "OTU") # Adds taxa at end of OTU table
Euk_ABD=data.frame(Euk_ABD) # FINAL
Euk_rch$OTU=as.numeric(as.character(Euk_rch$OTU)) # Convert from characters to numeric
Euk_RCH=merge(Euk_rch, EukTaxa, by = "OTU") # Adds taxa at end of OTU table
Euk_RCH=data.frame(Euk_RCH) # FINAL

str(Prok_ABD) # Domains (2), Phyla (50), Classes (130), Orders (240), Families (441), Genera (859)
str(Prok_RCH) # sanity check that dataset outputs match
str(Euk_ABD) # Domains (1), Phyla (30), Classes (56), Orders (130), Families (165), Genera (317)
str(Euk_RCH) # sanity check that dataset outputs match

# Transposed dataset for downstream figures
tProk_ABD=t(Prok_ABD)
tProk_ABD=data.frame(tProk_ABD)
setDT(tProk_ABD, keep.rownames = TRUE) # Shift rows to 1st column
names(tProk_ABD)=as.matrix(tProk_ABD[1, ])
tProk_ABD=tProk_ABD[-1, ]
tProk_ABD=tProk_ABD[1:23, ] # Drop taxa name columns
colnames(tProk_ABD)[1]="Name" # Re-label column as Name

tProk_RCH=t(Prok_RCH)
tProk_RCH=data.frame(tProk_RCH)
setDT(tProk_RCH, keep.rownames = TRUE) # Shift rows to 1st column
names(tProk_RCH)=as.matrix(tProk_RCH[1, ])
tProk_RCH=tProk_RCH[-1, ]
tProk_RCH=tProk_RCH[1:23, ] # Drop taxa name columns
colnames(tProk_RCH)[1]="Name" # Re-label column as Name

tEuk_ABD=t(Euk_ABD)
tEuk_ABD=data.frame(tEuk_ABD)
setDT(tEuk_ABD, keep.rownames = TRUE) # Shift rows to 1st column
names(tEuk_ABD)=as.matrix(tEuk_ABD[1, ])
tEuk_ABD=tEuk_ABD[-1, ]
tEuk_ABD=tEuk_ABD[1:21, ] # Drop taxa name columns *note Euks differ from Proks*
colnames(tEuk_ABD)[1]="ID" # Re-label column as Name *note Euks differ from Proks*

tEuk_RCH=t(Euk_RCH)
tEuk_RCH=data.frame(tEuk_RCH)
setDT(tEuk_RCH, keep.rownames = TRUE) # Shift rows to 1st column
names(tEuk_RCH)=as.matrix(tEuk_RCH[1, ])
tEuk_RCH=tEuk_RCH[-1, ]
tEuk_RCH=tEuk_RCH[1:21, ] 
colnames(tEuk_RCH)[1]="ID" 
######### Data Clean-up ###########


######### Save 'Clean' Data Tables as New Files #########
write.csv(Meta_MSTR, file = "Meta_MSTR.csv") 
write.csv(Prok_ABD, file = "Prok_ABD.csv") 
write.csv(tProk_ABD, file = "tProk_ABD.csv") # transposed
write.csv(Prok_RCH, file = "Prok_RCH.csv") 
write.csv(tProk_RCH, file = "tProk_RCH.csv") # transposed
write.csv(Euk_ABD, file = "Euk_ABD.csv") 
write.csv(tEuk_ABD, file = "tEuk_ABD.csv")# transposed
write.csv(Euk_RCH, file = "Euk_RCH.csv")
write.csv(tEuk_RCH, file = "tEuk_RCH.csv") # transposed

# Sanity check by comparing to previously existing files
# https://github.com/sabahzero/Puerto-Nuevo_Coastal-Microbial-Ecology_16S-18S-Workflow_UlHasan-etal/tree/master/Raw-Data
######### Save 'Clean' Data Tables as New Files #########


######### ampvis2 Download and Data Prep #########
# Note: Make sure you have R v3.4.1 or later
install.packages("remotes")
remotes::install_github("MadsAlbertsen/ampvis2")
library("ampvis2") # Note: Includes ggplot2
# packageVersion("ampvis2") # v2.3.18

# Upload the "clean" data data tables with the Meta_MSTR data table for ampvis2
# Prokaryotes (abundance and richness)
Prok_MSTR=Meta_MSTR[c(16,17,1:15)] # Re-order since sample name needs to be in 1st column, drop "ID" (for Euk)
Prok_ABD_amp=amp_load(otutable = Prok_ABD,
              metadata = Prok_MSTR) # Taxa column removed
Prok_RCH_amp=amp_load(otutable = Prok_RCH,
              metadata = Prok_MSTR) # Taxa column removed
# Eukaryotes (abundance and richness)
Euk_MSTR=Meta_MSTR[c(18,17,1:15)] # Re-order since sample name needs to be in 1st column, drop "Name" (for Prok)
Rows_NA=apply(Euk_MSTR, 1, function(x){any(is.na(x))}) # Finds all rows with NAs
Euk_MSTR=Euk_MSTR[!Rows_NA,] # Creates data table absent of NA rows
Euk_ABD_amp=amp_load(otutable = Euk_ABD,
              metadata = Euk_MSTR) # Taxa column removed
Euk_RCH_amp=amp_load(otutable = Euk_RCH,
              metadata = Euk_MSTR) # Taxa column removed

# Test data set that comes with ampvis2 package for comparison / "sanity checks"
# (573 samples from 55 Danish Wastewater Treatment Plants in 2006 to 2013)
data("MiDAS")
MiDAS # Summary of data
MiDAS$refseq # Additional summary of data, but only viewable if from fasta files

# Note: Reads were rarefied at 1000 before OTUs assigned
# Note: amp_rarecurve() and amp_alphadiv() are ampvis2 functions for rarefaction and alpha diversity (already assessed in previous section)
Prok_ABD_amp # Summary of total reads
Prok_RCH_amp # Summary of total reads * based on richness *
Euk_ABD_amp # Summary of total reads
Euk_RCH_amp # Summary of total reads * based on richness *
######### ampvis2 Download and Data Prep #########
```

Supp Figure 1: Alpha diversity indices (Shannon and Simpson) and rarefaction curves
```{r}
######### Package Download and Data Prep ###########
# Upload the respective packages
# install.packages("vegan")
library("vegan")
# packageVersion("vegan")     # v2.5.2
# install.packages("ggplot2")
library("ggplot2")
# packageVersion("ggplot2") # v3.0.0
# install.packages("dplyr")
library("dplyr")
# packageVersion("dplyr") # v0.7.6

tProk_ABD=mutate_all(tProk_ABD, function(x) as.numeric(as.character(x))) # Convert to numeric
tEuk_ABD=mutate_all(tEuk_ABD, function(x) as.numeric(as.character(x))) # Convert to numeric

ProkData=(tProk_ABD[,2:2201])
EukData=(tEuk_ABD[,2:1239])
######### Packages ###########


######### Statistics ###########
# Carrying the statistical analyses
# Shannon Index (abundance)
Prok_Shan=diversity(ProkData)
Prok_Shan

?diversity()
Euk_Shan=diversity(EukData)
Euk_Shan
# Simpson Index (abundance)
Prok_Simp=diversity(ProkData, "simpson")
Prok_Simp
Euk_Simp=diversity(EukData, "simpson")
Euk_Simp
######### Statistics ###########


######### Plot diversity indices ###########
png("Supp_ProkShannon.png.png")
hist(Prok_Shan)
dev.off()
png("Supp_ProkSimpson.png")
hist(Prok_Simp)
dev.off()
png("Supp_EukShannon.png.png")
hist(Euk_Shan)
dev.off()
png("Supp_EukSimpson.png")
hist(Euk_Simp)
dev.off()
######### Plot diversity indices ###########


######### Plot rarefaction curves ###########
# Set-up plotting parameters
col=c("black", "darkred", "forestgreen", "orange", "blue", "yellow", "hotpink")
lty=c("solid", "dashed", "longdash", "dotdash")
pars=expand.grid(col = col, lty = lty, stringsAsFactors = FALSE)
head(pars)

P_max=min(rowSums(ProkData)) # number of individual in each plot 
P_max                        # use smallest number of observations
png("Supp_Prok_RareCurve.png")
Prok_RareCurve=with(pars[1:28, ],
            rarecurve(ProkData, step = 20, sample = P_max, col = col,
                      xlab = "Number of 16S Reads", ylab = "Number of OTUs"))
dev.off()

E_max=min(rowSums(EukData)) 
E_max
png("Supp_Euk_RareCurve.png")
Euk_RareCurve=with(pars[1:28, ],
            rarecurve(EukData, step = 20, sample = E_max, col = col,
                      xlab = "Number of 18S Reads", ylab = "Number of OTUs"))
dev.off()
######### Plot rarefaction curves ###########
```

Figure 1: Maps of sampling sites
```{r}
######### Packages ###########
# Upload the respective packages
# install.packages("devtools")
devtools::install_github("hadley/ggplot2") # Need for main figure
devtools::install_github("dkahle/ggmap") # Need for main figure
# install.packages("ggmap") # If ggmap download above doesn't work (version transfer)
# install.packages("maps")
# install.packages("mapproj")
# install.packages("mapdata")
# install.packages("maptools")
# install.packages("sp")
# install.packages("ggsn")
# Libraries
library("ggplot2")
library("ggmap")
library("maps")
library("mapproj")
library("mapdata")
library("maptools")
library("sp")
library("ggsn")
# Package versions
# packageVersion("ggplot2") # v3.0.0.9000
# packageVersion("ggmap") # v2.6.1
# packageVersion("maps") # v3.3.0
# packageVersion("mapproj") # v1.2.6
# packageVersion("mapdata") # v2.3.0
# packageVersion("maptools") # v0.9.3
# packageVersion("sp") # v1.3.1
# packageVersion("ggsn") # v0.4.0
######### Packages ###########


######### Data Clean-up ###########
# Subset for lat and lon only
Sites_LatLon=Sites[ , 1:3] # [ rows, columns ]
######### Data and Clean-up ###########


######### Create Maps for Figure 1 ###########
# Identify lon and lat ranges, create buffer around
xlim=range(Sites_LatLon$Lon)+c(-.01, 0.01)
# -116.9684 -116.9242
ylim=range(Sites_LatLon$Lat)+c(-0.01, 0.01)
# 32.22645 32.26754

# Create broad-scale map (inset)
Broad=get_map(location=c(-118.5,27,-113,33), zoom=7, maptype="terrain")
# satellite shows bathymetry
Broad_Map=ggmap(Broad) 
ggsave("Figure1_InsetMap.png", dpi = 300)

# Create zoomed-in map (main figure)
Zoom=get_map(location=c(-116.9685,32.22644,-116.9241,32.26755), maptype="roadmap")
Zoom_Map=ggmap(Zoom) 
Zoom_Map2 = Zoom_Map + 
  geom_point(data=Sites_LatLon, aes(x=Lon, y=Lat, fill=Site_Name, shape=Site_Name), color="black", cex=4.5) + # points
  scale_fill_hue(labels=c("Minor Outlet", "Sheltered", "Major Outlet"), name=NULL) +
  scale_shape_manual(values = c(21,21,21), labels=c("Minor Outlet", "Sheltered", "Major Outlet"), name=NULL) + # define shapes
  scale_fill_manual(labels=c("Minor Outlet", "Sheltered", "Major Outlet"), values=c("#F65A00", "#68AF2B", "#0202DE")) + # MJ orange, MN green, SH blue
  theme(legend.position=0) +
  labs(x="Latitude", y="Longitude") # label axes 
Zoom_Map2 
ggsave("Figure1_Map.png", dpi = 300)
######### Create Maps for Figure 1 ###########

# Include and adjust for final Figure 1 manually
```

Figure 2: Taxa richness (alpha) & abundance boxplots  
```{r}
######### Package Download and Data Prep ###########
# install.packages("ggplot2")
library(ggplot2)
# packageVersion("ggplot2") # v3.0.0.9000
# install.packages("gridExtra")
library(gridExtra)
# packageVersion("gridExtra") # v2.3
# install.packages("dplyr")
library("dplyr")
# packageVersion("dplyr") # v0.7.6

Prok_Fig2=merge(Meta_MSTR, tProk_RCH, by = "Name") # Merge datasets
Euk_Fig2=merge(Meta_MSTR, tEuk_RCH, by = "ID")
######### Package Download and Data Prep ###########


######### Statistics ###########
# Richness
# Prokaryotes
Prok_Fig2$richness=rowSums((mutate_all(Prok_Fig2[,21:2220], function(x) as.numeric(as.character(x))))) # Convert to numeric, create a column with sums of row richness

aggregate(richness ~ Medium*Location, data = Prok_Fig2, FUN = "mean")
Prok_aov=aov(richness ~ Medium*Location, data = Prok_Fig2)

shapiro.test(residuals(Prok_aov)) # p = 0.6168 
summary(Prok_aov) 
# Medium: p < 0.001, Location p = 0.4778, Interaction p = 0.0562
# Significant difference in medium only
# How much richer is the sediment compared to seawater? 
ProkWat = subset(Prok_Fig2, Medium=="Water")
ProkWat = sum(ProkWat$richness) # 739
ProkSed = subset(Prok_Fig2, Medium=="Sediment")
ProkSed = sum(ProkSed$richness) 
ProkSed = ProkSed * 400 # ( 200 g / 0.5 g = 400 ) 371200
ProkPercent = (ProkSed/ProkWat) # Sed is 5.0 x 10^2 fold richer than seawater

# Eukaryotes
Euk_Fig2$richness=rowSums((mutate_all(Euk_Fig2[,21:1258], function(x) as.numeric(as.character(x))))) # Convert to numeric, create a column with sums of row richness

aggregate(richness ~ Medium*Location, data = Euk_Fig2, FUN = "mean")
Euk_aov=aov(richness ~ Medium*Location, data = Euk_Fig2)

shapiro.test(residuals(Euk_aov)) # p = 0.4487 (passed)
summary(Euk_aov)
# Medium: p < 0.003, Location p = 0.189, Interaction p = 0.229
# Significant difference in medium only
# How much richer is the sediment compared to seawater? 
EukWat = subset(Euk_Fig2, Medium=="Water")
EukWat = sum(EukWat$richness) # 474
EukSed = subset(Euk_Fig2, Medium=="Sediment")
EukSed = sum(EukSed$richness)
EukSed = EukSed * 400 # ( 200 g / 0.5 g = 400 ) 184400
EukPercent = (EukSed/EukWat) # Sed is 3.9 x 10^2 fold richer than seawater

# Abundance
# Prokaryotes
Prok_Fig2ii=merge(Meta_MSTR, tProk_ABD, by = "Name") # Merge datasets
Prok_Fig2ii$richness=rowSums((mutate_all(Prok_Fig2ii[,21:2220], function(x) as.numeric(as.character(x))))) # Convert to numeric, create a column with sums of row richness
aggregate(richness ~ Medium*Location, data = Prok_Fig2ii, FUN = "mean")
Prok_aov=aov(richness ~ Medium*Location, data = Prok_Fig2ii)

shapiro.test(residuals(Prok_aov)) # p = 0.1709
summary(Prok_aov) 
# Medium: p < 0.001, Location p = 0.7956, Interaction p = 0.0622
# Significant difference in medium, with some location interaction 
# How much more abundant is the sediment compared to seawater? 
ProkWat = subset(Prok_Fig2ii, Medium=="Water")
ProkWat = sum(ProkWat$richness) # 9448
ProkSed = subset(Prok_Fig2ii, Medium=="Sediment")
ProkSed = sum(ProkSed$richness) 
ProkSed = ProkSed * 400 # ( 200 g / 0.5 g = 400 ) 2894800
ProkPercent = (ProkSed/ProkWat) # Sed is 3.0 x 10^2 fold more abundant than seawater

# Eukaryotes
Euk_Fig2ii=merge(Meta_MSTR, tEuk_ABD, by = "ID")
Euk_Fig2ii$richness=rowSums((mutate_all(Euk_Fig2ii[,21:1258], function(x) as.numeric(as.character(x))))) 

aggregate(richness ~ Medium*Location, data = Euk_Fig2ii, FUN = "mean")
Euk_aov=aov(richness ~ Medium*Location, data = Euk_Fig2ii)

shapiro.test(residuals(Euk_aov)) # p = 0.4431
summary(Euk_aov) 
# Medium: p < 0.001, Location p = 0.176, Interaction p = 0.915
# Significant difference in medium only
# How much more abundant is the sediment compared to seawater? 
EukWat = subset(Euk_Fig2ii, Medium=="Water")
EukWat = sum(EukWat$richness) # 10799
EukSed = subset(Euk_Fig2ii, Medium=="Sediment")
EukSed = sum(EukSed$richness) 
EukSed = EukSed * 400 # ( 200 g / 0.5 g = 400 ) 2965600
EukPercent = (EukSed/EukWat) # Sed is 2.7 x 10^2 fold more abundant than seawater
######### Statistics ###########


######### Richness Plots for Figure 2 ###########
##### Figure doesn't account for for 200 mL (g) water vs 0.5 g sediment (raw)
##### Fold richness does account for this (see statistics above)
# Prokaryotes
Prok_Rich = 
  ggplot(Prok_Fig2, aes(x = Location, y = richness, fill = Location, alpha = Medium)) +
  geom_boxplot(lwd=1, outlier.size = 2) +
  scale_y_continuous(name = "Bacteria and Archaea Richness, Raw Sample Mass") +
  scale_fill_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  scale_alpha_manual(values=c(1,1)) +
  theme_linedraw() +
  theme(legend.position = "right", text = element_text(size=9),  axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 100)), axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
Prok_Rich
ggsave("Figure2a_ProkRich.png", dpi = 300)

# Eukaryotes
Euk_Rich = 
  ggplot(Euk_Fig2, aes(x = Location, y = richness, fill = Location, alpha = Medium)) +
  geom_boxplot(lwd=1, outlier.size = 2) +
  scale_y_continuous(name = "Eukaryote Richness, Raw Sample Mass") +
  scale_fill_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  scale_alpha_manual(values=c(1,1)) +
  theme_linedraw() +
  theme(legend.position = "right", text = element_text(size=9),  axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 100)), axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
Euk_Rich
ggsave("Figure2b_EukRich.png", dpi = 300)
######### Richness Plots for Figure 2 ###########


######### Abundance Plots for Figure 2 ###########
##### Figure doesn't account for for 200 mL (g) water vs 0.5 g sediment (raw)
##### Fold abundance does account for this (see statistics above)
# Prokaryotes
Prok_Abund = 
  ggplot(Prok_Fig2ii, aes(x = Location, y = richness, fill = Location, alpha = Medium)) +
  geom_boxplot(lwd=1, outlier.size = 2) +
  scale_y_continuous(name = "Bacteria and Archaea Abundance, Raw Sample Mass") +
  scale_fill_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  scale_alpha_manual(values=c(1,1)) +
  theme_linedraw() +
  theme(legend.position = "right", text = element_text(size=9),  axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 100)), axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
Prok_Abund
ggsave("Figure2a_ProkAbd.png", dpi = 300)

# Eukaryotes
Euk_Abund = 
  ggplot(Euk_Fig2ii, aes(x = Location, y = richness, fill = Location, alpha = Medium)) +
  geom_boxplot(lwd=1, outlier.size = 2) +
  scale_y_continuous(name = "Eukaryote Abundance, Raw Sample Mass") +
  scale_fill_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  scale_alpha_manual(values=c(1,1)) +
  theme_linedraw() +
  theme(legend.position = "right", text = element_text(size=9),  axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 100)), axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
Euk_Abund
ggsave("Figure2b_EukAbd.png", dpi = 300)
######### Abundance Plots for Figure 2 ###########

# Adjust for final Figure 2 manually
```

Figure 3, SF 2: Top Phyla boxplots rch, Class heatmaps abd 
```{r}
# Top 10 phyla boxplots (richness) with top 20 class heatmaps (abundance)
# Ampvis2 package used

######### Boxplots of top phyla, based on richness (includes SI2) #########
amp_boxplot(Prok_RCH_amp,
                 group_by = "Medium",
                 tax_aggregate = "Phylum",
                 tax_show = 10) 
ggsave("Fig3_Prok10Phyla_Boxplot.png", dpi = 300)

amp_boxplot(Euk_RCH_amp,
            group_by = "Medium",
            tax_aggregate = "Phylum",
            tax_show = 10) 
ggsave("Fig3_Euk10Phyla_Boxplot.png", dpi = 300)
# Supp Image 2 (pt 1)
amp_boxplot(Prok_RCH_amp,
                 group_by = "Location",
                 tax_aggregate = "Phylum",
                 tax_show = 20) 
ggsave("SuppI2_Prok20Phyla_Boxplot.png", dpi = 300)

amp_boxplot(Euk_RCH_amp,
            group_by = "Location",
            tax_aggregate = "Phylum",
            tax_show = 20) 
ggsave("SuppI2_Euk20Phyla_Boxplot.png", dpi = 300)
######### Boxplots of top phyla, based on richness (includes SI2) #########


######### Heatmaps of top class, based on abundance #########
P_abd=amp_heatmap(Prok_ABD_amp,
            group_by = "Medium",
            facet_by = "Location",
            tax_aggregate = "Class",
            tax_add = "Phylum",
            tax_show = 20,
            color_vector = c("white", "#181817"),
            plot_values = FALSE) +
  theme_linedraw() +
  theme(legend.position = "right", text = element_text(size=8),  axis.title.y = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0)), axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0)))
P_abd
ggsave("Fig3_Prok20Class_Heatmap.png", dpi = 300)

E_abd=amp_heatmap(Euk_ABD_amp,
            group_by = "Medium",
            facet_by = "Location",
            tax_aggregate = "Class",
            tax_add = "Phylum",
            tax_show = 20,
            color_vector = c("white", "#181817"),
            plot_values = FALSE) +
  theme_linedraw() +
  theme(legend.position = "right", text = element_text(size=8),  axis.title.y = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0)), axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0)))
E_abd
ggsave("Fig3_Euk20Class_Heatmap.png", dpi = 300)
######### Heatmaps of top class, based on abundance #########
```

Figure 4, SF3: Venn diagrams shared vs unique rch and PCAs, PERMANOVA
```{r}
# Merge the appropriate datasets
Prok_Fig2=merge(Meta_MSTR, tProk_RCH, by = "Name") 
Euk_Fig2=merge(Meta_MSTR, tEuk_RCH, by = "ID")

######### Statistics ###########
# Upload packages
# install.packages("vegan")
library("vegan")
# packageVersion("vegan") # v2.5.2
# install.packages("dplyr")
library("dplyr")
# packageVersion("dplyr") # v0.7.6

# Permanova statistics
adonis((mutate_all(Prok_Fig2[,21:2220], function(x) as.numeric(as.character(x))))~Prok_Fig2$Medium*Prok_Fig2$Location, method="bray") # bray is a quantitative distance method 
# Medium: p = 0.001, Location p = 0.198, interaction: p = 0.182 
# F.model: Medium = 68.06, Location = 1.62, interaction = 1.65
# significant difference by medium, not by location or interaction
adonis((mutate_all(Euk_Fig2[,21:1258], function(x) as.numeric(as.character(x))))~Euk_Fig2$Medium*Euk_Fig2$Location, method = "bray") 
# Medium: p = 0.001, Location: p = 0.097, interaction: p = 0.163
# F.model: Medium = 25.09, Location = 1.80, interaction = 1.58
# significant difference by medium and location, but not by interaction  

# Pairwise function identifies which locations are different
pairwise.adonis <- function(x,factors, sim.method, p.adjust.m)
{
library(vegan)
co = as.matrix(combn(unique(factors),2))
pairs = c()
F.Model =c()
R2 = c()
p.value = c()
for(elem in 1:ncol(co)){
ad = adonis(x[factors %in% c(as.character(co[1,elem]),as.character(co[2,elem])),] ~
factors[factors %in% c(as.character(co[1,elem]),as.character(co[2,elem]))] , method =sim.method);
pairs = c(pairs,paste(co[1,elem],'vs',co[2,elem]));
F.Model =c(F.Model,ad$aov.tab[1,4]);
R2 = c(R2,ad$aov.tab[1,5]);
p.value = c(p.value,ad$aov.tab[1,6])
}
p.adjusted = p.adjust(p.value,method=p.adjust.m)
pairw.res = data.frame(pairs,F.Model,R2,p.value,p.adjusted)
return(pairw.res)
}

# Permanova statistics by environment type
Prok_Sed=Prok_Fig2[13:23,]
adonis((mutate_all(Prok_Sed[,21:2220], function(x) as.numeric(as.character(x))))~Prok_Sed$Location,method="bray") # p = 0.127, f = 1.23
pairwise.adonis((mutate_all(Prok_Sed[,21:2220], function(x) as.numeric(as.character(x)))), Prok_Sed$Location, sim.method = "bray", p.adjust.m = "bonferroni") 
# Sediment: No prok sites are different
Prok_Wat=Prok_Fig2[1:12,]
adonis((mutate_all(Prok_Wat[,21:2220], function(x) as.numeric(as.character(x))))~Prok_Wat$Location,method="bray") # p = 0.017, f = 2.09
pairwise.adonis((mutate_all(Prok_Wat[,21:2220], function(x) as.numeric(as.character(x)))), Prok_Wat$Location, sim.method = "bray", p.adjust.m = "bonferroni") 
# Seawater: Major outlet proks are different (MJ-SH p = 0.084 : f = 2.4502, MJ-MN p = 0.021 : f = 2.7988)
Euk_Sed=Euk_Fig2[1:9,]
adonis((mutate_all(Euk_Sed[,21:1258], function(x) as.numeric(as.character(x))))~Euk_Sed$Location,method="bray") # p = 0.22, f = 1.17
pairwise.adonis((mutate_all(Euk_Sed[,21:1258], function(x) as.numeric(as.character(x)))), Euk_Sed$Location, sim.method = "bray", p.adjust.m = "bonferroni") 
# Sediment: No euks sites are different
Euk_Wat=Euk_Fig2[10:21,]
adonis((mutate_all(Euk_Wat[,21:1258], function(x) as.numeric(as.character(x))))~Euk_Wat$Location,method="bray") # p = 0.003, f = 2.2826
pairwise.adonis((mutate_all(Euk_Wat[,21:1258], function(x) as.numeric(as.character(x)))), Euk_Wat$Location, sim.method = "bray", p.adjust.m = "bonferroni") 
# Seawater: All euks different 
# MJ-SH p = 0.057 : f = 2.061600, MJ-MN p = 0.029 : f = 2.687631, SH-MN p = 0.027 : f = 2.016761)
######### Statistics ###########


######### Venn diagrams, based on richness #########
# install.packages("VennDiagram")
library("VennDiagram")
# packageVersion("VennDiagram") # v1.6.20
# install.packages("dplyr")
library("dplyr")
# packageVersion("dplyr") # v0.7.6

# Aggregate by Medium and Location 
# Proks
venn.df.Prok=aggregate((mutate_all(Prok_Fig2[,21:2220], function(x) as.numeric(as.character(x)))), by = list(Prok_Fig2$Medium, Prok_Fig2$Location), FUN = "sum")

venn.df.Prok[,1:7] # check row order, sediment should be rows 1,3,5
venn.df.Prok=venn.df.Prok[,-1] # Get rid of "Group.1"
venn.df.Prok=venn.df.Prok[,-1] # Get rid of "Group.2"

# Euks
venn.df.Euk=aggregate((mutate_all(Euk_Fig2[,21:1258], function(x) as.numeric(as.character(x)))), by = list(Euk_Fig2$Medium, Euk_Fig2$Location), FUN = "sum")

venn.df.Euk[,1:7] # check row order, sediment should be rows 1,3,5
venn.df.Euk=venn.df.Euk[,-1] # Get rid of "Group.1"
venn.df.Euk=venn.df.Euk[,-1] # Get rid of "Group.2"

# Create figures
# ProkSed
ProkSed.MjO = colnames(venn.df.Prok[5,apply(venn.df.Prok[5,], MARGIN=2, function(x) any(x >0))])
ProkSed.MnO = colnames(venn.df.Prok[1, apply(venn.df.Prok[1,], MARGIN=2, function(x) any(x >0))])
ProkSed.Sh = colnames(venn.df.Prok[3, apply(venn.df.Prok[3,], MARGIN=2, function(x) any(x >0))])

venn.diagram(x = list(ProkSed.MjO, ProkSed.MnO, ProkSed.Sh), filename = "SI2_ProkSed.png", category = c("Major Outlet", "Minor Outlet", "Sheltered"), print.mode = "percent", sigdigs = 2 , cex= 1.5, fill = c("#FFFFFF", "#797171", "#000000"), alpha = 0.4)
# ProkWat
ProkWat.MjO = colnames(venn.df.Prok[6,apply(venn.df.Prok[6,], MARGIN=2, function(x) any(x >0))])
ProkWat.MnO = colnames(venn.df.Prok[2, apply(venn.df.Prok[2,], MARGIN=2, function(x) any(x >0))])
ProkWat.Sh = colnames(venn.df.Prok[4, apply(venn.df.Prok[4,], MARGIN=2, function(x) any(x >0))])

venn.diagram(x = list(ProkWat.MjO, ProkWat.MnO, ProkWat.Sh), filename = "SI2_ProkWat.png", category = c("Major Outlet", "Minor Outlet", "Sheltered"), print.mode = "percent", sigdigs = 2 , cex= 1.5, fill = c("#ca431d", "#797171", "#000000"), alpha = 0.4)
# EukSed
EukSed.MjO = colnames(venn.df.Euk[5,apply(venn.df.Euk[5,], MARGIN=2, function(x) any(x >0))])
EukSed.MnO = colnames(venn.df.Euk[1, apply(venn.df.Euk[1,], MARGIN=2, function(x) any(x >0))])
EukSed.Sh = colnames(venn.df.Euk[3, apply(venn.df.Euk[3,], MARGIN=2, function(x) any(x >0))])

venn.diagram(x = list(EukSed.MjO, EukSed.MnO, EukSed.Sh), filename = "SI2_EukSed.png", category = c("Major Outlet", "Minor Outlet", "Sheltered"), print.mode = "percent", sigdigs = 2, cex= 1.5, fill = c("#FFFFFF", "#797171", "#000000"), alpha = 0.4) 
# EukWat
EukWat.MjO = colnames(venn.df.Euk[6,apply(venn.df.Euk[6,], MARGIN=2, function(x) any(x >0))])
EukWat.MnO = colnames(venn.df.Euk[2, apply(venn.df.Euk[2,], MARGIN=2, function(x) any(x >0))])
EukWat.Sh = colnames(venn.df.Euk[4, apply(venn.df.Euk[4,], MARGIN=2, function(x) any(x >0))])

venn.diagram(x = list(EukWat.MjO, EukWat.MnO, EukWat.Sh), filename = "SI2_EukWat.png", category = c("Major Outlet", "Minor Outlet", "Sheltered"), print.mode = "percent", sigdigs = 2 , cex= 1.5, fill = c("#ca431d", "#4f9da6", "#8CB045"), alpha = 0.6) 
######### Venn diagrams, based on richness #########


######### PCAs based on richness (SI3) #########
# Upload packages
# install.packages("vegan")
library(vegan)
# packageVersion("vegan") # v2.5.2
# install.packages("ggplot2")
library(ggplot2)
# packageVersion("ggplot2") # v3.0.0.9000

# Subset by Medium and Location
ProkSed=subset(Prok_Fig2,Medium=="Sediment")
ProkWat=subset(Prok_Fig2,Medium=="Water")
EukSed=subset(Euk_Fig2,Medium=="Sediment")
EukWat=subset(Euk_Fig2,Medium=="Water")

Sed_16S=(mutate_all(ProkSed[,21:2220], function(x) as.numeric(as.character(x))))
H2O_16S=(mutate_all(ProkWat[,21:2220], function(x) as.numeric(as.character(x))))
Sed_18S=(mutate_all(EukSed[,21:1258], function(x) as.numeric(as.character(x))))
H2O_18S=(mutate_all(EukWat[,21:1258], function(x) as.numeric(as.character(x))))

# Create PCAs
ProkSed.PCA<-prcomp(Sed_16S)
summary(ProkSed.PCA) # Comps 1(.15), 2(.13) = 28% variance
ProkWat.PCA<-prcomp(H2O_16S)
summary(ProkWat.PCA) # Comps 1(.23), 2(.12) = 35% variance
EukSed.PCA<-prcomp(Sed_18S)
summary(EukSed.PCA) # Comps 1(.18), 2(.16) = 34% variance
EukWat.PCA<-prcomp(H2O_18S)
summary(EukWat.PCA) # Comps 1(.22, 2(.14) = 36% variance

# Adjust stat_ellipse function so n=3 or more for ellipse (n=4 before)
stat_ellipse <- function(mapping = NULL, data = NULL,
                         geom = "path", position = "identity",
                         ...,
                         type = "t",
                         level = 0.95,
                         segments = 51,
                         na.rm = FALSE,
                         show.legend = NA,
                         inherit.aes = TRUE) {
  layer(
    data = data,
    mapping = mapping,
    stat = StatEllipse,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      type = type,
      level = level,
      segments = segments,
      na.rm = na.rm,
      ...
    )
  )
}

StatEllipse <- ggproto("StatEllipse", Stat,
  required_aes = c("x", "y"),

  compute_group = function(data, scales, type = "t", level = 0.95,
                           segments = 51, na.rm = FALSE) {
    calculate_ellipse(data = data, vars = c("x", "y"), type = type,
                      level = level, segments = segments)
  }
)

calculate_ellipse <- function(data, vars, type, level, segments){
  dfn <- 2
  dfd <- nrow(data) - 1

  if (!type %in% c("t", "norm", "euclid")) {
    message("Unrecognized ellipse type")
    ellipse <- rbind(as.numeric(c(NA, NA)))
  } else if (dfd < 2) { #dfd changed from 3 to 2
    message("Too few points to calculate an ellipse")
    ellipse <- rbind(as.numeric(c(NA, NA)))
  } else {
    if (type == "t") {
      v <- MASS::cov.trob(data[,vars])
    } else if (type == "norm") {
      v <- stats::cov.wt(data[,vars])
    } else if (type == "euclid") {
      v <- stats::cov.wt(data[,vars])
      v$cov <- diag(rep(min(diag(v$cov)), 2))
    }
    shape <- v$cov
    center <- v$center
    chol_decomp <- chol(shape)
    if (type == "euclid") {
      radius <- level/max(chol_decomp)
    } else {
      radius <- sqrt(dfn * stats::qf(level, dfn, dfd))
    }
    angles <- (0:segments) * 2 * pi/segments
    unit.circle <- cbind(cos(angles), sin(angles))
    ellipse <- t(center + radius * t(unit.circle %*% chol_decomp))
  }

  ellipse <- as.data.frame(ellipse)
  colnames(ellipse) <- vars
  ellipse
}

# ProkSed PCA
ProkSed_Meta=data.frame(ProkSed.PCA$x, 
                   Type=ProkSed$Medium, 
                   Location=ProkSed$Location)
ProkSed=
  ggplot(data=ProkSed_Meta, 
         x=PC1, 
         y=PC2, 
         col=Type) +
  geom_point(data=ProkSed_Meta, 
             aes(x=PC1, 
                 y=PC2, 
                 col=Location),
             size=5) +
   stat_ellipse(geom = "polygon",
                alpha = 0.35,
                aes(x=PC1, # default normal confidence is 95% 
                    y=PC2, 
                    fill=Location,
                    color=Location)) +
  scale_fill_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  scale_color_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  theme(plot.title = element_text(size = 40))
# http://ggplot2.tidyverse.org/reference/ggtheme.html
ProkSed=ProkSed+ggtitle("Bacteria and Archaea (Sediment)")+ theme_set(theme_linedraw())
ggsave("SI3_ProkSed.png", dpi = 300)
# ProkWat PCA
ProkWat_Meta=data.frame(ProkWat.PCA$x, 
                   Type=ProkWat$Medium, 
                   Location=ProkWat$Location)
ProkWat=
  ggplot(data=ProkWat_Meta, 
         x=PC1, 
         y=PC2, 
         col=Type) +
  geom_point(data=ProkWat_Meta, 
             aes(x=PC1, 
                 y=PC2, 
                 col=Location),
             size=5) +
   stat_ellipse(geom = "polygon",
                alpha = 0.15,
                aes(x=PC1, # default normal confidence is 95% 
                    y=PC2, 
                    fill=Location)) +
  scale_fill_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  scale_color_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  theme_set(theme_linedraw()) # http://ggplot2.tidyverse.org/reference/ggtheme.html
ProkWat=ProkWat+ggtitle("Bacteria and Archaea (Seawater)")+ theme_set(theme_linedraw())
ggsave("SI3_ProkWat.png", dpi = 300)
# EukSed PCA
EukSed_Meta=data.frame(EukSed.PCA$x, 
                   Type=EukSed$Medium, 
                   Location=EukSed$Location)
EukSed=
  ggplot(data=EukSed_Meta, 
         x=PC1, 
         y=PC2, 
         col=Type) +
  geom_point(data=EukSed_Meta, 
             aes(x=PC1, 
                 y=PC2, 
                 col=Location),
             size=5) +
   stat_ellipse(geom = "polygon",
                alpha = 0.35,
                aes(x=PC1, # default normal confidence is 95% 
                    y=PC2, 
                    fill=Location,
                    color=Location)) +
  scale_fill_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  scale_color_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  theme_set(theme_linedraw()) # http://ggplot2.tidyverse.org/reference/ggtheme.html
EukSed=EukSed+ggtitle("Eukaryotes (Sediment)")+ theme_set(theme_linedraw())
ggsave("SI3_EukSed.png", dpi = 300)
# EukWat PCA
EukWat_Meta=data.frame(EukWat.PCA$x, 
                   Type=EukWat$Medium, 
                   Location=EukWat$Location)
EukWat=
  ggplot(data=EukWat_Meta, 
         x=PC1, 
         y=PC2, 
         col=Type) +
  geom_point(data=EukWat_Meta, 
             aes(x=PC1, 
                 y=PC2, 
                 col=Location),
             size=5) +   stat_ellipse(geom = "polygon",
                alpha = 0.15,
                aes(x=PC1, # default normal confidence is 95% 
                    y=PC2, 
                    fill=Location)) +
  scale_fill_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  scale_color_manual(values=c("#ca431d", "#4f9da6", "#8CB045")) +
  theme_set(theme_linedraw()) # http://ggplot2.tidyverse.org/reference/ggtheme.html
EukWat=EukWat+ggtitle("Eukaryotes (Seawater)")+ theme_set(theme_linedraw())
ggsave("SI3_EukWat.png", dpi = 300)
######### PCAS based on richness (SI3) #########

# Adjust for final figures manually
```

Table 1, Supp Tables 7-8: Core beta, shared vs unique OTU richness 
```{r}
# Data prep
PData = Prok_RCH
EData = Euk_RCH

# Core taxa across all samples for Proks or Euks (locations and environments)
# Across all Prok samples
PData$TotalSum=rowSums(PData[,2:24]) 
# Environment types (seawater vs sediment)
PData$WatSum=rowSums(PData[,2:13])
PData$SedSum=rowSums(PData[,14:24])
# Locations (SH, MN, and MJ)
PData$SHSum=rowSums(PData[,c(2:5,14:17)]) 
PData$MNSum=rowSums(PData[,c(6:9,18:20)]) 
PData$MJSum=rowSums(PData[,c(10:13,21:24)]) 

Prok_Core=subset(PData, TotalSum==23) # All: 1 core taxa
ProkWat_Core=subset(PData, WatSum==12) # SEA: 19 core taxa 
ProkSed_Core=subset(PData, SedSum==11) # SED: 27 core taxa
ProkSH_Core=subset(PData, SHSum==8) # SH: 1 core taxa
ProkMN_Core=subset(PData, MNSum==7) # MN: 1 core taxa
ProkMJ_Core=subset(PData, MJSum==8) # MJ: 1 core taxa

# Across all Euk samples
EData$TotalSum=rowSums(EData[,2:21]) 
# Environment types (seawater vs sediment)
EData$WatSum=rowSums(EData[,11:21]) 
EData$SedSum=rowSums(EData[,2:10]) 
# Locations (SH, MN, and MJ)
EData$SHSum=rowSums(EData[,c(2:4,11:14)]) 
EData$MNSum=rowSums(EData[,c(5:7,15:18)]) 
EData$MJSum=rowSums(EData[,c(8:10,19:22)]) 

Euk_Core=subset(EData, TotalSum==20) # All: 1 core taxa
EukWat_Core=subset(EData, WatSum==11) # SEA: 6 core taxa 
EukSed_Core=subset(EData, SedSum==9) # SED: 10 core taxa
EukSH_Core=subset(EData, SHSum==7) # SH: 6 core taxa
EukMN_Core=subset(EData, MNSum==7) # MN: 3 core taxa
EukMJ_Core=subset(EData, MJSum==7) # MJ: 5 core taxa

# Merge into a tables and save
Tab1_Prok=rbind(Prok_Core, ProkWat_Core, ProkSed_Core, ProkSH_Core, ProkMN_Core, ProkMJ_Core)
write.csv(Tab1_Prok, file = "Tab1_Prok.csv") 
Tab1_Euk=rbind(Euk_Core, EukWat_Core, EukSed_Core, EukSH_Core, EukMN_Core, EukMJ_Core)
write.csv(Tab1_Euk, file = "Tab1_Euk.csv") 

# Adjust for final Table 1 manually
```