D Catlett 8/24/2020
This is an analysis of ensemble taxonomic assignments computed from 4 different assignment algorithm-reference database combo's. bayes and idtax are the algorithms, pr2 and silva are the databases. Starting from initial taxonomy tables from Mar20 dada2 run.
We'll start with a large data set of 18S-V9 ASVs inferred by dada2. The data come from several studies of marine protist communities, in addition to some extensive mock community validations. The primers are 'universal', meaning we have both prokaryotes and eukaryotes in our data set. The raw MiSeq data were processed with dada2 and four different batches of taxonomic assignments were made by running the bayesian classifer (via dada2's assignTaxonomy function) and the idtaxa algorithm (via the DECIPHER package) against both the Silva nr SSU v138 database and the Protistan Ribosomal Reference database v4.12.0. No bootstrap thresholds have been designated yet - we will apply those with the ensembleTax pre-processing functions below.
First we'll load the data and the ensembleTax package, and use the ensembleTax pre-processing functions to format the taxonomy tables properly and to set our bootstrap thresholds. Included in this repo is a "asv rubric" fasta file that has ASV sequences and arbitrary identifying names.
rm(list=ls())
library("ensembleTax")
packageVersion("ensembleTax")
## [1] '1.0.2'
library("Biostrings")
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
library("stringr")
bayes.pr2 <- readRDS("initial_tax_tabs/bayes_pr2_0boot_Mar20.rds")
bayes.silva <- readRDS("initial_tax_tabs/bayes_silva_0boot_Mar20.rds")
idtax.pr2 <- readRDS("initial_tax_tabs/idtax_pr2_0boot_Mar20.rds")
idtax.silva <- readRDS("initial_tax_tabs/idtax_silva_0boot_Mar20.rds")
asv.rubric <- readDNAStringSet("asv_rubric.fasta")
bayes.pr2 <- bayestax2df(bayes.pr2, db = "pr2", boot = 60, rubric = asv.rubric, return.conf = FALSE)
bayes.silva <- bayestax2df(bayes.silva, db = "silva", boot = 60, rubric = asv.rubric, return.conf = FALSE)
idtax.pr2 <- idtax2df(idtax.pr2, db = "pr2", boot = 50, rubric = asv.rubric, return.conf = FALSE)
idtax.silva <- idtax2df(idtax.silva, db = "silva", boot = 50, rubric = asv.rubric, return.conf = FALSE)
head(bayes.pr2)
## svN
## 1 sv1
## 2 sv10
## 3 sv100
## 4 sv1000
## 5 sv10000
## 6 sv10001
## ASV
## 1 GCTACTACCGATTGAACATTTTAGTGAGGTCCTCGGACTGTGAGCCAGGCGGGTCGCCCTGCCTGGTCTACGGGAAGACGACCAAACTGTAGTGTTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
## 2 GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGATTAGTTTCCTTTATTGGAAGGTAGTTATGAGAACCTGTCTAAACCTTATCATTTAGAGGAAGGTGAAGTCGTAACAAGGTTTCT
## 3 GCTCCTACCGATTGAGTGGTCCGGTGAATAATTCGGACTGGTGCCGATTTCGGTTCTCCGAGTTCGGCGCTGGGAAGTCTAGTGAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 4 GCTCCTACCGATTGAATGGTCCGGTGAAGTGTTCGGATCGTGGCGACGTGGGCGGTTCGCTGCCTGCGACGTCGCGAGAAGTCCACTGAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 5 GTTTCTTCCGACTAATGCTTTATGTGAGTGTCACGGATTTTAAAGAAGTGCTGTGAACATTGAGTATCGGAGGAAGAAAAAGTCGTAACAAGGTTATC
## 6 GCTGCTACCGATTGAGTGTCCTGGTGAATTATTTGGACCGGCAGTAATTCGAGTTTCTCGATTTACAGCTGGAAAATCTTGTAAACCCTGACACTTAGAGGAAGCAGAAGTCGTAACAAGGTTTCC
## kingdom supergroup division class order
## 1 Eukaryota Opisthokonta Metazoa Arthropoda Crustacea
## 2 Eukaryota Stramenopiles Ochrophyta Bacillariophyta Bacillariophyta_X
## 3 Eukaryota Alveolata Dinoflagellata Syndiniales <NA>
## 4 Eukaryota Archaeplastida Streptophyta Embryophyceae Embryophyceae_X
## 5 Eukaryota <NA> <NA> <NA> <NA>
## 6 Eukaryota <NA> <NA> <NA> <NA>
## family genus species
## 1 Maxillopoda Paracalanus Paracalanus_parvus
## 2 Raphid-pennate Pseudo-nitzschia Pseudo-nitzschia_fraudulenta
## 3 <NA> <NA> <NA>
## 4 Embryophyceae_XX <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
head(bayes.silva)
## svN
## 1 sv1
## 2 sv10
## 3 sv100
## 4 sv1000
## 5 sv10000
## 6 sv10001
## ASV
## 1 GCTACTACCGATTGAACATTTTAGTGAGGTCCTCGGACTGTGAGCCAGGCGGGTCGCCCTGCCTGGTCTACGGGAAGACGACCAAACTGTAGTGTTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
## 2 GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGATTAGTTTCCTTTATTGGAAGGTAGTTATGAGAACCTGTCTAAACCTTATCATTTAGAGGAAGGTGAAGTCGTAACAAGGTTTCT
## 3 GCTCCTACCGATTGAGTGGTCCGGTGAATAATTCGGACTGGTGCCGATTTCGGTTCTCCGAGTTCGGCGCTGGGAAGTCTAGTGAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 4 GCTCCTACCGATTGAATGGTCCGGTGAAGTGTTCGGATCGTGGCGACGTGGGCGGTTCGCTGCCTGCGACGTCGCGAGAAGTCCACTGAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 5 GTTTCTTCCGACTAATGCTTTATGTGAGTGTCACGGATTTTAAAGAAGTGCTGTGAACATTGAGTATCGGAGGAAGAAAAAGTCGTAACAAGGTTATC
## 6 GCTGCTACCGATTGAGTGTCCTGGTGAATTATTTGGACCGGCAGTAATTCGAGTTTCTCGATTTACAGCTGGAAAATCTTGTAAACCCTGACACTTAGAGGAAGCAGAAGTCGTAACAAGGTTTCC
## domain phylum class order
## 1 Eukaryota Arthropoda Maxillopoda Calanoida
## 2 Eukaryota Diatomea Bacillariophyceae Bacillariophyceae_or
## 3 Eukaryota Protalveolata Syndiniales Syndiniales_or
## 4 Eukaryota Phragmoplastophyta Embryophyta <NA>
## 5 <NA> <NA> <NA> <NA>
## 6 Eukaryota <NA> <NA> <NA>
## family genus
## 1 <NA> <NA>
## 2 Bacillariophyceae_fa Pseudo-nitzschia
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## 6 <NA> <NA>
head(idtax.pr2)
## svN
## 1 sv1
## 2 sv10
## 3 sv100
## 4 sv1000
## 5 sv10000
## 6 sv10001
## ASV
## 1 GCTACTACCGATTGAACATTTTAGTGAGGTCCTCGGACTGTGAGCCAGGCGGGTCGCCCTGCCTGGTCTACGGGAAGACGACCAAACTGTAGTGTTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
## 2 GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGATTAGTTTCCTTTATTGGAAGGTAGTTATGAGAACCTGTCTAAACCTTATCATTTAGAGGAAGGTGAAGTCGTAACAAGGTTTCT
## 3 GCTCCTACCGATTGAGTGGTCCGGTGAATAATTCGGACTGGTGCCGATTTCGGTTCTCCGAGTTCGGCGCTGGGAAGTCTAGTGAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 4 GCTCCTACCGATTGAATGGTCCGGTGAAGTGTTCGGATCGTGGCGACGTGGGCGGTTCGCTGCCTGCGACGTCGCGAGAAGTCCACTGAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 5 GTTTCTTCCGACTAATGCTTTATGTGAGTGTCACGGATTTTAAAGAAGTGCTGTGAACATTGAGTATCGGAGGAAGAAAAAGTCGTAACAAGGTTATC
## 6 GCTGCTACCGATTGAGTGTCCTGGTGAATTATTTGGACCGGCAGTAATTCGAGTTTCTCGATTTACAGCTGGAAAATCTTGTAAACCCTGACACTTAGAGGAAGCAGAAGTCGTAACAAGGTTTCC
## kingdom supergroup division class order
## 1 Eukaryota Opisthokonta Metazoa Arthropoda Crustacea
## 2 Eukaryota Stramenopiles Ochrophyta Bacillariophyta Bacillariophyta_X
## 3 Eukaryota Alveolata Dinoflagellata <NA> <NA>
## 4 Eukaryota Archaeplastida Streptophyta Embryophyceae Embryophyceae_X
## 5 Eukaryota <NA> <NA> <NA> <NA>
## 6 Eukaryota Alveolata Dinoflagellata Syndiniales Dino-Group-II
## family genus species
## 1 Maxillopoda <NA> <NA>
## 2 Raphid-pennate Pseudo-nitzschia Pseudo-nitzschia_fraudulenta
## 3 <NA> <NA> <NA>
## 4 Embryophyceae_XX <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 Dino-Group-II-Clade-14 Dino-Group-II-Clade-14_X Dino-Group-II-Clade-14_X_sp.
head(idtax.silva)
## svN
## 1 sv1
## 2 sv10
## 3 sv100
## 4 sv1000
## 5 sv10000
## 6 sv10001
## ASV
## 1 GCTACTACCGATTGAACATTTTAGTGAGGTCCTCGGACTGTGAGCCAGGCGGGTCGCCCTGCCTGGTCTACGGGAAGACGACCAAACTGTAGTGTTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
## 2 GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGATTAGTTTCCTTTATTGGAAGGTAGTTATGAGAACCTGTCTAAACCTTATCATTTAGAGGAAGGTGAAGTCGTAACAAGGTTTCT
## 3 GCTCCTACCGATTGAGTGGTCCGGTGAATAATTCGGACTGGTGCCGATTTCGGTTCTCCGAGTTCGGCGCTGGGAAGTCTAGTGAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 4 GCTCCTACCGATTGAATGGTCCGGTGAAGTGTTCGGATCGTGGCGACGTGGGCGGTTCGCTGCCTGCGACGTCGCGAGAAGTCCACTGAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 5 GTTTCTTCCGACTAATGCTTTATGTGAGTGTCACGGATTTTAAAGAAGTGCTGTGAACATTGAGTATCGGAGGAAGAAAAAGTCGTAACAAGGTTATC
## 6 GCTGCTACCGATTGAGTGTCCTGGTGAATTATTTGGACCGGCAGTAATTCGAGTTTCTCGATTTACAGCTGGAAAATCTTGTAAACCCTGACACTTAGAGGAAGCAGAAGTCGTAACAAGGTTTCC
## domain phylum class order
## 1 Eukaryota Arthropoda Maxillopoda Calanoida
## 2 Eukaryota Diatomea Bacillariophyceae Bacillariophyceae_or
## 3 Eukaryota Protalveolata Syndiniales Syndiniales_or
## 4 Eukaryota Phragmoplastophyta Embryophyta <NA>
## 5 <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA>
## family genus
## 1 <NA> <NA>
## 2 Bacillariophyceae_fa <NA>
## 3 Syndiniales Group III <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## 6 <NA> <NA>
The data looks good and at first glance it looks like taxonomic assignments are mostly in agreement across the taxonomy tables where they are assigned, though there are differences in resolution. Here we'll subset ASVs from a particular project so they're all from the same data set:
# sanity check
identical(bayes.pr2[, 1:2], bayes.silva[, 1:2])
## [1] TRUE
identical(bayes.pr2[, 1:2], idtax.silva[, 1:2])
## [1] TRUE
identical(bayes.pr2[, 1:2], idtax.pr2[, 1:2])
## [1] TRUE
seqtab <- readRDS("initial_tax_tabs/seqtab_nochime_Mar20.rds")
samsub <- readRDS("initial_tax_tabs/sample_subsets.rds")
subber <- c(samsub$pbsurf, samsub$pbdeep)
seqtab <- seqtab[ row.names(seqtab) %in% subber , ]
rm.me <- which(colSums(seqtab) == 0)
# also remove <90 or >180
rm.me2 <- which(str_length(colnames(seqtab)) < 90 | str_length(colnames(seqtab)) > 180)
rm.me <- unique(c(rm.me, rm.me2))
asv.sub <- colnames(seqtab)[-rm.me]
bayes.pr2 <- bayes.pr2[bayes.pr2$ASV %in% asv.sub , ]
bayes.silva <- bayes.silva[bayes.silva$ASV %in% asv.sub , ]
idtax.pr2 <- idtax.pr2[idtax.pr2$ASV %in% asv.sub , ]
idtax.silva <- idtax.silva[idtax.silva$ASV %in% asv.sub , ]
identical(bayes.pr2[, 1:2], bayes.silva[, 1:2])
## [1] TRUE
identical(bayes.pr2[, 1:2], idtax.silva[, 1:2])
## [1] TRUE
identical(bayes.pr2[, 1:2], idtax.pr2[, 1:2])
## [1] TRUE
Here we'll run taxmapper to map the pr2 tables onto silva, and the silva tables onto pr2. We don't care about the mapping results other than returning the mapped taxonomy tables (you can see more on that on the mapping doc), so will set streamline = TRUE. We'll be as permissive as possible to try to get the most-resolved mapped taxonomic assignments we can by using synonym.file = "default" and ignore.format = TRUE.
bayes.pr2.mapped2silva <- taxmapper(bayes.pr2,
tt.ranks = colnames(bayes.pr2)[3:ncol(bayes.pr2)],
tax2map2 = "Silva",
ignore.format = TRUE,
synonym.file = "default",
streamline = TRUE,
outfilez = NULL)
bayes.silva.mapped2pr2 <- taxmapper(bayes.silva,
tt.ranks = colnames(bayes.silva)[3:ncol(bayes.silva)],
tax2map2 = "pr2",
exceptions = c("Bacteria", "Archaea"),
ignore.format = TRUE,
synonym.file = "default",
streamline = TRUE,
outfilez = NULL)
idtax.pr2.mapped2silva <- taxmapper(idtax.pr2,
tt.ranks = colnames(idtax.pr2)[3:ncol(idtax.pr2)],
tax2map2 = "Silva",
ignore.format = TRUE,
synonym.file = "default",
streamline = TRUE,
outfilez = NULL)
idtax.silva.mapped2pr2 <- taxmapper(idtax.silva,
tt.ranks = colnames(idtax.silva)[3:ncol(idtax.silva)],
tax2map2 = "pr2",
exceptions = c("Bacteria", "Archaea"),
ignore.format = TRUE,
synonym.file = "default",
streamline = TRUE,
outfilez = NULL)
We've now mapped each taxonomy table onto a new taxonomic nomenclature -- the 2 silva tables were mapped onto pr2's nomenclature, and the 2 pr2 tables were mapped onto silva's nomenclature. Let's look at the mapped taxonomy tables:
head(bayes.pr2.mapped2silva)
## svN
## 1 sv1
## 2 sv10
## 3 sv100
## 4 sv1000
## 5 sv10000
## 6 sv10001
## ASV
## 1 GCTACTACCGATTGAACATTTTAGTGAGGTCCTCGGACTGTGAGCCAGGCGGGTCGCCCTGCCTGGTCTACGGGAAGACGACCAAACTGTAGTGTTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
## 2 GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGATTAGTTTCCTTTATTGGAAGGTAGTTATGAGAACCTGTCTAAACCTTATCATTTAGAGGAAGGTGAAGTCGTAACAAGGTTTCT
## 3 GCTCCTACCGATTGAGTGGTCCGGTGAATAATTCGGACTGGTGCCGATTTCGGTTCTCCGAGTTCGGCGCTGGGAAGTCTAGTGAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 4 GCTCCTACCGATTGAATGGTCCGGTGAAGTGTTCGGATCGTGGCGACGTGGGCGGTTCGCTGCCTGCGACGTCGCGAGAAGTCCACTGAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 5 GTTTCTTCCGACTAATGCTTTATGTGAGTGTCACGGATTTTAAAGAAGTGCTGTGAACATTGAGTATCGGAGGAAGAAAAAGTCGTAACAAGGTTATC
## 6 GCTGCTACCGATTGAGTGTCCTGGTGAATTATTTGGACCGGCAGTAATTCGAGTTTCTCGATTTACAGCTGGAAAATCTTGTAAACCCTGACACTTAGAGGAAGCAGAAGTCGTAACAAGGTTTCC
## domain phylum class order
## 1 Eukaryota Arthropoda Maxillopoda <NA>
## 2 Eukaryota Diatomea Bacillariophyceae Bacillariophyceae_or
## 3 Eukaryota Protalveolata Syndiniales <NA>
## 4 Eukaryota <NA> <NA> <NA>
## 5 Eukaryota <NA> <NA> <NA>
## 6 Eukaryota <NA> <NA> <NA>
## family genus
## 1 <NA> <NA>
## 2 Bacillariophyceae_fa Pseudo-nitzschia
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## 6 <NA> <NA>
head(bayes.silva.mapped2pr2)
## svN
## 1 sv1
## 2 sv10
## 3 sv100
## 4 sv1000
## 5 sv10000
## 6 sv10001
## ASV
## 1 GCTACTACCGATTGAACATTTTAGTGAGGTCCTCGGACTGTGAGCCAGGCGGGTCGCCCTGCCTGGTCTACGGGAAGACGACCAAACTGTAGTGTTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
## 2 GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGATTAGTTTCCTTTATTGGAAGGTAGTTATGAGAACCTGTCTAAACCTTATCATTTAGAGGAAGGTGAAGTCGTAACAAGGTTTCT
## 3 GCTCCTACCGATTGAGTGGTCCGGTGAATAATTCGGACTGGTGCCGATTTCGGTTCTCCGAGTTCGGCGCTGGGAAGTCTAGTGAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 4 GCTCCTACCGATTGAATGGTCCGGTGAAGTGTTCGGATCGTGGCGACGTGGGCGGTTCGCTGCCTGCGACGTCGCGAGAAGTCCACTGAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 5 GTTTCTTCCGACTAATGCTTTATGTGAGTGTCACGGATTTTAAAGAAGTGCTGTGAACATTGAGTATCGGAGGAAGAAAAAGTCGTAACAAGGTTATC
## 6 GCTGCTACCGATTGAGTGTCCTGGTGAATTATTTGGACCGGCAGTAATTCGAGTTTCTCGATTTACAGCTGGAAAATCTTGTAAACCCTGACACTTAGAGGAAGCAGAAGTCGTAACAAGGTTTCC
## kingdom supergroup division class order
## 1 Eukaryota Opisthokonta Metazoa Arthropoda Crustacea
## 2 Eukaryota Stramenopiles Ochrophyta Bacillariophyta Bacillariophyta_X
## 3 Eukaryota Alveolata Dinoflagellata Syndiniales <NA>
## 4 Eukaryota <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA>
## 6 Eukaryota <NA> <NA> <NA> <NA>
## family genus species
## 1 Maxillopoda <NA> <NA>
## 2 Raphid-pennate Pseudo-nitzschia <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
head(idtax.pr2.mapped2silva)
## svN
## 1 sv1
## 2 sv10
## 3 sv100
## 4 sv1000
## 5 sv10000
## 6 sv10001
## ASV
## 1 GCTACTACCGATTGAACATTTTAGTGAGGTCCTCGGACTGTGAGCCAGGCGGGTCGCCCTGCCTGGTCTACGGGAAGACGACCAAACTGTAGTGTTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
## 2 GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGATTAGTTTCCTTTATTGGAAGGTAGTTATGAGAACCTGTCTAAACCTTATCATTTAGAGGAAGGTGAAGTCGTAACAAGGTTTCT
## 3 GCTCCTACCGATTGAGTGGTCCGGTGAATAATTCGGACTGGTGCCGATTTCGGTTCTCCGAGTTCGGCGCTGGGAAGTCTAGTGAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 4 GCTCCTACCGATTGAATGGTCCGGTGAAGTGTTCGGATCGTGGCGACGTGGGCGGTTCGCTGCCTGCGACGTCGCGAGAAGTCCACTGAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 5 GTTTCTTCCGACTAATGCTTTATGTGAGTGTCACGGATTTTAAAGAAGTGCTGTGAACATTGAGTATCGGAGGAAGAAAAAGTCGTAACAAGGTTATC
## 6 GCTGCTACCGATTGAGTGTCCTGGTGAATTATTTGGACCGGCAGTAATTCGAGTTTCTCGATTTACAGCTGGAAAATCTTGTAAACCCTGACACTTAGAGGAAGCAGAAGTCGTAACAAGGTTTCC
## domain phylum class order
## 1 Eukaryota Arthropoda Maxillopoda <NA>
## 2 Eukaryota Diatomea Bacillariophyceae Bacillariophyceae_or
## 3 Eukaryota Dinoflagellata <NA> <NA>
## 4 Eukaryota <NA> <NA> <NA>
## 5 Eukaryota <NA> <NA> <NA>
## 6 Eukaryota Protalveolata Syndiniales <NA>
## family genus
## 1 <NA> <NA>
## 2 Bacillariophyceae_fa Pseudo-nitzschia
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## 6 <NA> <NA>
head(idtax.silva.mapped2pr2)
## svN
## 1 sv1
## 2 sv10
## 3 sv100
## 4 sv1000
## 5 sv10000
## 6 sv10001
## ASV
## 1 GCTACTACCGATTGAACATTTTAGTGAGGTCCTCGGACTGTGAGCCAGGCGGGTCGCCCTGCCTGGTCTACGGGAAGACGACCAAACTGTAGTGTTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
## 2 GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGATTAGTTTCCTTTATTGGAAGGTAGTTATGAGAACCTGTCTAAACCTTATCATTTAGAGGAAGGTGAAGTCGTAACAAGGTTTCT
## 3 GCTCCTACCGATTGAGTGGTCCGGTGAATAATTCGGACTGGTGCCGATTTCGGTTCTCCGAGTTCGGCGCTGGGAAGTCTAGTGAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 4 GCTCCTACCGATTGAATGGTCCGGTGAAGTGTTCGGATCGTGGCGACGTGGGCGGTTCGCTGCCTGCGACGTCGCGAGAAGTCCACTGAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCC
## 5 GTTTCTTCCGACTAATGCTTTATGTGAGTGTCACGGATTTTAAAGAAGTGCTGTGAACATTGAGTATCGGAGGAAGAAAAAGTCGTAACAAGGTTATC
## 6 GCTGCTACCGATTGAGTGTCCTGGTGAATTATTTGGACCGGCAGTAATTCGAGTTTCTCGATTTACAGCTGGAAAATCTTGTAAACCCTGACACTTAGAGGAAGCAGAAGTCGTAACAAGGTTTCC
## kingdom supergroup division class order
## 1 Eukaryota Opisthokonta Metazoa Arthropoda Crustacea
## 2 Eukaryota Stramenopiles Ochrophyta Bacillariophyta Bacillariophyta_X
## 3 Eukaryota Alveolata Dinoflagellata Syndiniales <NA>
## 4 Eukaryota <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA>
## family genus species
## 1 Maxillopoda <NA> <NA>
## 2 Raphid-pennate <NA> <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
Looks good. ensembleTax time:
## mapped to pr2's:
xx <- list(bayes.pr2, idtax.pr2, bayes.silva.mapped2pr2, idtax.silva.mapped2pr2)
names(xx) <- c("bayes-pr2", "idtax-pr2", "bayes-silva", "idtax-silva")
et.all4.pr2.wna <- ensembleTax(xx, count.na = TRUE,
tiebreakz = NULL, assign.threshold = 0)
et.all4.pr2.nona <- ensembleTax(xx, count.na = FALSE,
tiebreakz = c("idtax-pr2", "idtax-silva", "bayes-pr2"))
## mapped to silva
xx.silva <- list(bayes.pr2.mapped2silva, idtax.pr2.mapped2silva, bayes.silva, idtax.silva)
rn <- rownames(idtax.silva)
for (i in 1:length(xx.silva)) {
rownames(xx.silva[[i]]) <- rn
}
names(xx.silva) <- c("bayes-pr2", "idtax-pr2", "bayes-silva", "idtax-silva")
et.all4.silva.wna <- ensembleTax(xx.silva, count.na = TRUE, ranknames = colnames(idtax.silva[, 3:ncol(idtax.silva)]),
tiebreakz = NULL, assign.threshold = 0)
et.all4.silva.nona <- ensembleTax(xx.silva, count.na = FALSE, ranknames = colnames(idtax.silva[, 3:ncol(idtax.silva)]),
tiebreakz = c("idtax-silva", "idtax-pr2", "bayes-silva"))
Ok now remove proks, metazoa, fungi, streptophyta
rm.i <- unique(c(which(et.all4.pr2.nona$kingdom %in% c("Bacteria", "Archaea")),
which(et.all4.pr2.nona$kingdom %in% c("Bacteria", "Archaea")),
which(et.all4.pr2.nona$division %in% c("Metazoa", "Fungi", "Streptophyta"))))
bayes.silva.mapped2pr2 <- bayes.silva.mapped2pr2[-rm.i , ]
idtax.silva.mapped2pr2 <- idtax.silva.mapped2pr2[-rm.i , ]
bayes.pr2 <- bayes.pr2[-rm.i , ]
idtax.pr2 <- idtax.pr2[-rm.i , ]
et.all4.pr2.nona <- et.all4.pr2.nona[-rm.i , ]
et.all4.pr2.wna <- et.all4.pr2.wna[-rm.i , ]
## the ones mapped to silva too:
bayes.silva <- bayes.silva[-rm.i , ]
idtax.silva <- idtax.silva[-rm.i , ]
bayes.pr2.mapped2silva <- bayes.pr2.mapped2silva[-rm.i , ]
idtax.pr2.mapped2silva <- idtax.pr2.mapped2silva[-rm.i , ]
et.all4.silva.nona <- et.all4.silva.nona[-rm.i , ]
et.all4.silva.wna <- et.all4.silva.wna[-rm.i , ]
Compute and plot % ASVs unassigned at each rank:
library("ggplot2")
library("reshape2")
nasum <- function(taxdf){
notuz <- nrow(taxdf)
x <- is.na(taxdf)
ii <- colSums(x) / notuz
return(ii)
}
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2")
# with the individuals and 4-way ensembles:
xx.pr2 <- list(idtax.pr2, bayes.pr2,
idtax.silva.mapped2pr2, bayes.silva.mapped2pr2,
et.all4.pr2.wna, et.all4.pr2.nona)
names(xx.pr2) <- c("idtax-pr2", "bayes-pr2",
"idtax-silva","bayes-silva",
"ensemble-acc", "ensemble-res")
xx.pr2 <- lapply(xx.pr2, function(x) x[, -c(1,2)])
tina <- lapply(xx.pr2, nasum)
yaboi <- matrix(unlist(tina), nrow=length(xx.pr2), byrow=TRUE)
rownames(yaboi) <- names(xx.pr2)
colnames(yaboi) <- colnames(xx.pr2[[1]])
yaboi <- as.data.frame(t(yaboi))
yaboi$rankz <- rownames(yaboi)
yaboi <- melt(yaboi, id.vars = "rankz")
p.pr2.2 <- ggplot(yaboi, aes(fill = variable, x = rankz, y = value)) +
geom_bar(stat="identity", color = "black", position=position_dodge(width=0.8)) +
labs(x = "Taxonomic Rank", y = "Proportion of ASVs Unassigned") +
scale_x_discrete(limits = colnames(xx.pr2[[1]])) +
scale_y_continuous(limits = seq(0,1.05,0.1), expand = c(0,0)) +
coord_cartesian(ylim = c(0, 1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.title.x = element_text(size = 12, face="bold"),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 12, face="bold"),
panel.background = element_rect(fill = "white",
colour = "white",
linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1)) +
scale_fill_manual(name = "Taxonomy table",
breaks = names(xx.pr2),
values = cbPalette)
# silvas:
xx.silva <- list(idtax.pr2.mapped2silva, bayes.pr2.mapped2silva,
idtax.silva, bayes.silva,
et.all4.silva.wna, et.all4.silva.nona)
names(xx.silva) <- c("idtax-pr2", "bayes-pr2",
"idtax-silva","bayes-silva",
"ensemble-acc", "ensemble-res")
xx.silva <- lapply(xx.silva, function(x) x[, -c(1,2)])
tina <- lapply(xx.silva, nasum)
yaboi <- matrix(unlist(tina), nrow=length(xx.silva), byrow=TRUE)
rownames(yaboi) <- names(xx.silva)
colnames(yaboi) <- colnames(xx.silva[[1]])
yaboi <- as.data.frame(t(yaboi))
yaboi$rankz <- rownames(yaboi)
yaboi <- melt(yaboi, id.vars = "rankz")
p.silva.2 <- ggplot(yaboi, aes(fill = variable, x = rankz, y = value)) +
geom_bar(stat="identity", color = "black", position=position_dodge(width=0.8)) +
labs(x = "Taxonomic Rank", y = "Proportion of ASVs Unassigned") +
scale_x_discrete(limits = colnames(xx.silva[[1]])) +
scale_y_continuous(limits = seq(0,1.05,0.1), expand = c(0,0)) +
coord_cartesian(ylim = c(0, 1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.title.x = element_text(size = 12, face="bold"),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 12, face="bold"),
panel.background = element_rect(fill = "white",
colour = "white",
linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1)) +
scale_fill_manual(name = "Taxonomy",
breaks = names(xx.pr2),
values = cbPalette)
Nice. Let's do some comparisons of the assignments between the ensembles and the individual taxonomy tables:
library("dplyr")
# fcn that compares two taxonomy tables:
tblcomper <- function(y,x) {
perf <- dplyr::intersect(x, y) # perfectly-matching rows (assignments)
tmp.x <- dplyr::setdiff(x, perf)
tmp.y <- dplyr::setdiff(y, perf)
if (!identical(tmp.x[, 1], tmp.y[, 1])) {
tmp.x <- sort_my_taxtab(tmp.x, ranknames = colnames(tmp.x)[3:ncol(tmp.x)])
tmp.y <- sort_my_taxtab(tmp.y, ranknames = colnames(tmp.y)[3:ncol(tmp.x)])
}
xna <- is.na(tmp.x)
yna <- is.na(tmp.y)
# This warning happens 1 million times when there's no NA's. It doesn't matter b/c inf is always bigger so suppressing.
## Warning in min(which(z)): no non-missing arguments to min; returning Inf
x.minna <- apply(xna, MARGIN = 1, function(z) suppressWarnings(min(which(z))))
y.minna <- apply(yna, MARGIN = 1, function(z) suppressWarnings(min(which(z))))
x.mo <- which(x.minna > y.minna)
y.mo <- which(y.minna > x.minna)
yunder.i <- c()
yover.i <- c()
mis.i <- c()
# subset where x has more resolved assignments and only to cols where
# both have assignments, then see if names match
yunder <- 0
ymis <- 0
if (length(x.mo) > 0){
for (i in 1:length(x.mo)){
if ((y.minna[x.mo[i]]-1) < 3){ # if kingdom is unassigned just add to under-classification
yunder <- yunder+1
# yunder.i <- append(yunder.i, tmp.x$svN)
} else {
tmp.tmpx <- tmp.x[x.mo[i] , 3:(y.minna[x.mo[i]]-1)]
tmp.tmpy <- tmp.y[x.mo[i] , 3:(y.minna[x.mo[i]]-1)]
if (all(tmp.tmpx == tmp.tmpy)) {
yunder <- yunder+1
# yunder.i <- append(yunder.i, tmp.x$svN)
} else {
ymis <- ymis+1
# mis.i <- append(mis.i, tmp.x$svN)
}
}
}
}
# repeat above where y is more resolved than x:
yover <- 0
xmis <- 0
if (length(y.mo) > 0){
for (i in 1:length(y.mo)){
if ((x.minna[y.mo[i]]-1) < 3){ # if kingdom is unassigned just add to under-classification
yover <- yover+1
# yover.i <- append(yover.i, tmp.x$svN)
} else {
tmp.tmpx <- tmp.x[y.mo[i] , 3:(x.minna[y.mo[i]]-1)]
tmp.tmpy <- tmp.y[y.mo[i] , 3:(x.minna[y.mo[i]]-1)]
if (all(tmp.tmpx == tmp.tmpy)) {
yover <- yover+1
# yover.i <- append(yover.i, tmp.x$svN)
} else {
xmis <- xmis+1
# mis.i <- append(mis.i, tmp.x$svN)
}
}
}
}
if (yover+xmis != length(y.mo) || yunder+ymis != length(x.mo)) {
stop("somethings wrong i think")
}
perf <- nrow(perf)
# whatever's left is where both tables have the same ranks named, but different names. this is a misclassification:
moremis <- nrow(x) - (xmis+ymis+yover+yunder+perf)
result <- data.frame(all.match = c(perf), mis = c(ymis+xmis+moremis), over = c(yover), under = c(yunder))
if (sum(result) == nrow(x)) {
return(result)
# return(list(result, mis.i, yover.i, yunder.i, perf.i))
} else {
stop("noooooooooooooo")
}
}
### this is the pr2 comparisons... scroll past plot saving for silva
# make a list of all taxonomy tables:
tbl.list <- list(idtax.pr2, bayes.pr2,
idtax.silva.mapped2pr2, bayes.silva.mapped2pr2,
et.all4.pr2.wna)
all.comp.nona <- lapply(tbl.list, FUN = tblcomper, x = et.all4.pr2.nona)
all.comp.nona <- base::do.call(base::rbind.data.frame, all.comp.nona)
row.names(all.comp.nona) <- c("idtax-pr2", "bayes-pr2", "idtax-silva", "bayes-silva", "ensemble-acc")
all.comp.nona <- all.comp.nona / rowSums(all.comp.nona)
tbl.list <- list(idtax.pr2, bayes.pr2,
idtax.silva.mapped2pr2, bayes.silva.mapped2pr2,
et.all4.pr2.nona)
all.comp.wna <- lapply(tbl.list, FUN = tblcomper, x = et.all4.pr2.wna)
all.comp.wna <- base::do.call(base::rbind.data.frame, all.comp.wna)
row.names(all.comp.wna) <- c("idtax-pr2", "bayes-pr2", "idtax-silva", "bayes-silva", "ensemble-res")
all.comp.wna <- all.comp.wna / rowSums(all.comp.wna)
all.comp.nona$tbl <- row.names(all.comp.nona)
all.comp.wna$tbl <- row.names(all.comp.wna)
plt.all.comp.wna <- melt(all.comp.wna)
plt.all.comp.nona <- melt(all.comp.nona)
# plot the results:
p.comp1.pr2 <- ggplot(plt.all.comp.wna, aes(fill = tbl, x = variable, y = value)) +
geom_bar(stat="identity", color = "black", position=position_dodge(width=0.8)) +
labs(x = "Relative to ensemble-acc", y = "Proportion of ASVs") +
scale_x_discrete(breaks = c("all.match","mis","over","under"),
labels = c("Agree", "Misclassified", "Overclassified","Underclassified")) +
scale_y_continuous(breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0),
limits = c(0, 1), expand = c(0,0)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.title.x = element_text(size = 12, face="bold"),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 12, face="bold"),
panel.background = element_rect(fill = "white",
colour = "white",
linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1)) +
scale_fill_manual(name = "Taxonomy",
breaks = c("idtax-pr2", "bayes-pr2", "idtax-silva", "bayes-silva", "ensemble-res"),
values = cbPalette[c(1:4,6)])
print(p.comp1.pr2)
# plot the results:
p.comp2.pr2 <- ggplot(plt.all.comp.nona, aes(fill = tbl, x = variable, y = value)) +
geom_bar(stat="identity", color = "black", position=position_dodge(width=0.8)) +
labs(x = "Relative to ensemble-res", y = "Proportion of ASVs") +
scale_x_discrete(breaks = c("all.match","mis","over","under"),
labels = c("Agree", "Misclassified", "Overclassified","Underclassified")) +
scale_y_continuous(breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0),
limits = c(0, 1), expand = c(0,0)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.title.x = element_text(size = 12, face="bold"),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 12, face="bold"),
panel.background = element_rect(fill = "white",
colour = "white",
linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1)) +
scale_fill_manual(name = "Taxonomy",
breaks = c("idtax-pr2", "bayes-pr2", "idtax-silva", "bayes-silva", "ensemble-acc"),
values = cbPalette[c(1:5)])
print(p.comp2.pr2)
library("cowplot")
p.all <- plot_grid(
p.pr2.2 + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
p.comp1.pr2 + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
p.comp2.pr2 + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
align = 'h',
labels = c("(A)", "(B)", "(C)"),
axis = 'l',
hjust=-1,
nrow=1
)
## Warning: Removed 45 rows containing missing values (geom_bar).
legend_b <- get_legend(p.pr2.2 +
theme(legend.position="bottom",
legend.text=element_text(size=14),
legend.title = element_text(size=14),
legend.justification="center",
legend.box.margin = unit(c(0.1, 1, 0.1, 1),"cm")))
## Warning: Removed 45 rows containing missing values (geom_bar).
p.all <- plot_grid(p.all, legend_b, nrow=2, rel_heights=c(1, 0.3))
ggsave("all_pr2_plots_protists", plot = p.all, device = "pdf", width = 18, height = 7.5)
Let's do silva comparisons and make a 6-panel fig:
# make a list of all taxonomy tables:
tbl.list <- list(idtax.pr2.mapped2silva, bayes.pr2.mapped2silva,
idtax.silva, bayes.silva,
et.all4.silva.wna)
all.comp.nona <- lapply(tbl.list, FUN = tblcomper, x = et.all4.silva.nona)
all.comp.nona <- base::do.call(base::rbind.data.frame, all.comp.nona)
row.names(all.comp.nona) <- c("idtax-pr2", "bayes-pr2", "idtax-silva", "bayes-silva", "ensemble-acc")
all.comp.nona <- all.comp.nona / rowSums(all.comp.nona)
tbl.list <- list(idtax.pr2.mapped2silva, bayes.pr2.mapped2silva,
idtax.silva, bayes.silva,
et.all4.silva.nona)
all.comp.wna <- lapply(tbl.list, FUN = tblcomper, x = et.all4.silva.wna)
all.comp.wna <- base::do.call(base::rbind.data.frame, all.comp.wna)
row.names(all.comp.wna) <- c("idtax-pr2", "bayes-pr2", "idtax-silva", "bayes-silva", "ensemble-res")
all.comp.wna <- all.comp.wna / rowSums(all.comp.wna)
all.comp.nona$tbl <- row.names(all.comp.nona)
all.comp.wna$tbl <- row.names(all.comp.wna)
plt.all.comp.wna <- melt(all.comp.wna)
## Using tbl as id variables
plt.all.comp.nona <- melt(all.comp.nona)
## Using tbl as id variables
# plot the results:
p.comp1.silva <- ggplot(plt.all.comp.wna, aes(fill = tbl, x = variable, y = value)) +
geom_bar(stat="identity", color = "black", position=position_dodge(width=0.8)) +
labs(x = "Relative to ensemble-acc", y = "Proportion of ASVs") +
scale_x_discrete(breaks = c("all.match","mis","over","under"),
labels = c("Agree", "Misclassified", "Overclassified","Underclassified")) +
scale_y_continuous(breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0),
limits = c(0, 1), expand = c(0,0)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.title.x = element_text(size = 12, face="bold"),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 12, face="bold"),
panel.background = element_rect(fill = "white",
colour = "white",
linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1)) +
scale_fill_manual(name = "Taxonomy",
breaks = c("idtax-pr2", "bayes-pr2", "idtax-silva", "bayes-silva", "ensemble-res"),
values = cbPalette[c(1:4,6)])
print(p.comp1.silva)
# plot the results:
p.comp2.silva <- ggplot(plt.all.comp.nona, aes(fill = tbl, x = variable, y = value)) +
geom_bar(stat="identity", color = "black", position=position_dodge(width=0.8)) +
labs(x = "Relative to ensemble-res", y = "Proportion of ASVs") +
scale_x_discrete(breaks = c("all.match","mis","over","under"),
labels = c("Agree", "Misclassified", "Overclassified","Underclassified")) +
scale_y_continuous(breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0),
limits = c(0, 1), expand = c(0,0)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.title.x = element_text(size = 12, face="bold"),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 12, face="bold"),
panel.background = element_rect(fill = "white",
colour = "white",
linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1)) +
scale_fill_manual(name = "Taxonomy",
breaks = c("idtax-pr2", "bayes-pr2", "idtax-silva", "bayes-silva", "ensemble-acc"),
values = cbPalette[c(1:5)])
print(p.comp2.silva)
library("cowplot")
p.silva <- plot_grid(
p.silva.2 + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
p.comp1.silva + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
p.comp2.silva + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
align = 'h',
labels = c("(D)", "(E)", "(F)"),
axis = 'l',
hjust=-1,
nrow=1
)
## Warning: Removed 33 rows containing missing values (geom_bar).
legend_b <- get_legend(p.pr2.2 +
theme(legend.position="bottom",
legend.text=element_text(size=14),
legend.title = element_text(size=14),
legend.justification="center",
legend.box.margin = unit(c(0.1, 1, 0.1, 1),"cm")))
## Warning: Removed 45 rows containing missing values (geom_bar).
p.silva <- plot_grid(p.silva, legend_b, nrow=2, rel_heights=c(1, 0.3))
ggsave("all_silva_plots_protists", plot = p.silva, device = "pdf", width = 18, height = 7.5)
p.all.2 <- plot_grid(p.all, p.silva, nrow = 2, rel_heights=c(1, 1))
ggsave("enscomps_plots_pr2ANDsilva_protists", plot = p.all.2, device = "pdf", width = 18, height = 15)
re-arranging panels for better manuscript fig:
p.all <- plot_grid(
p.pr2.2 + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm"), title = element_text(face = "bold")) + ggtitle("pr2 taxonomic nomenclature") + theme(plot.title = element_text(hjust = 0.5)),
p.comp1.pr2 + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
p.comp2.pr2 + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
align = 'h',
labels = c("(A)", "(C)", "(E)"),
axis = 'l',
hjust=-1,
nrow=3
)
## Warning: Removed 45 rows containing missing values (geom_bar).
p.silva <- plot_grid(
p.silva.2 + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm"), title = element_text(face = "bold")) + ggtitle("silva taxonomic nomenclature") + theme(plot.title = element_text(hjust = 0.5)),
p.comp1.silva + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
p.comp2.silva + theme(legend.position = "none", plot.margin = unit(c(1, 0.5, 0, 0.5), "cm")),
align = 'h',
labels = c("(B)", "(D)", "(F)"),
axis = 'l',
hjust=-1,
nrow=3
)
## Warning: Removed 33 rows containing missing values (geom_bar).
legend_b <- get_legend(p.pr2.2 +
theme(legend.position="right",
legend.text=element_text(size=14),
legend.title = element_text(size=14, face = "bold"),
legend.justification="left",
legend.box.margin = unit(c(0.1, 1, 0.1, 1),"cm")))
## Warning: Removed 45 rows containing missing values (geom_bar).
p.all.2 <- plot_grid(p.all, p.silva, legend_b, nrow=1)
ggsave("enscomps_plots_pr2ANDsilva_protists_3x2", plot = p.all.2, device = "pdf", width = 11, height = 12)