-
Notifications
You must be signed in to change notification settings - Fork 2
/
FS1b.R
1923 lines (1617 loc) · 96.1 KB
/
FS1b.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#R scripts for analyzing and visualizing data from the research paper "Shifts in the nasal microbiota of swine in response to different dosing regimens of oxytetracycline administration"
#################################################################################################################################################################################################
#SECOND SECTION (after processing sequences in mothur): Creating OTU Table
#Purpose: Create OTU table with R using specific files generated from mothur
#Files needed:
#FS1bfinal.outsingletons.abund.opti_mcc.0.03.cons.taxonomy
#FS1bfinal.outsingletons.abund.taxonomy.csv
#FS1bfinal.outsingletons.abund.opti_mcc.0.03.subsample.shared
#FS1bfinal.singleton.abund.subsample.shared.csv
#The output files from mothur needed in R for this section and subsequent sections, aside from fasta files,
#are text files that can be saved as csv for ease of use in R.
#Load library package
library(tidyverse)
#Set your working directory with the command
setwd("<directory path>")
#If you have to stop midway, you can save R objects using the function
save.image(file="<yourfilename>.RData")
#When you return, you can use the following function to reload datasets written with the save.image or save functions
load("<yourfilename>.RData")
#To start creating OTU table, edit taxonomy file
#Save "FS1bfinal.outsingletons.abund.opti_mcc.0.03.cons.taxonomy" file generated from mothur as a csv file in a spreadsheet editor.
#Named file as "FS1bfinal.outsingletons.abund.taxonomy.csv"
taxonomy <- read.csv("FS1bfinal.outsingletons.abund.taxonomy.csv") #Import this csv file from working directory using "read.csv" function
taxonomy$Taxonomy <- gsub('*\\(.*?\\) *', '', taxonomy$Taxonomy) #Substitute (100) and variations of that with nothing ('') from the Taxonomy column
taxonomy[1:6,3] #Show column number 3, rows 1 through 6 in 'taxonomy' dataframe
write.csv(taxonomy, file = "FS1babundsingleton2000taxonomy.csv") #Write 'taxonomy' into a csv file
#and open the csv file in a spreadsheet editor to remove the size and numbered rows
#Edit subsample.shared file
#Save "FS1bfinal.outsingletons.abund.opti_mcc.0.03.subsample.shared" file generated from mothur as a csv file in a spreadsheet editor.
#Named file as "FS1bfinal.singleton.abund.subsample.shared.csv"
shared <- read.csv("FS1bfinal.singleton.abund.subsample.shared.csv", stringsAsFactors = FALSE)
head(shared) #Check on the first part of 'shared' dataframe
shared[1:6,1]
shared <- t(shared) #Transpose 'shared'
head(shared)
shared[1:6,1]
write.csv(shared, file = 'FS1babundsingletonshared2000.csv') #Open this csv file in a spreadsheet editor and remove the "V*", "label", and "numOtus" rows
#Read the revised FS1babundsingletonshared2000.csv file
shared <- read.csv("FS1babundsingletonshared2000.csv")
colnames(shared) [1] <- "OTU" #Rename first column of 'shared' to "OTU"
taxonomy <- read.csv("FS1babundsingleton2000taxonomy.csv")
OTUtable <- merge(shared, taxonomy, by.x ="OTU", by.y = "OTU") #Merge 'shared' and 'taxonomy' objects by OTU
head(OTUtable)
nrow(OTUtable) #Count number of rows in 'OTUtable'
ncol(OTUtable) #Count number of columns in 'OTUtable'
write.csv(OTUtable, file= "FS1babundsingleton2000OTUtable.csv")
#Open this csv file in a spreadsheet editor and remove the first row (numbered rows) and "Size" column
#Check OTU table
OTUtable <-read.csv("FS1babundsingleton2000OTUtable.csv", stringsAsFactors = FALSE)
head(OTUtable) #Check the first part of 'OTUtable' to make sure it looks ok
#################################################################################################################################################################################################
#THIRD SECTION: Creating phyloseq objects for each tissue
#Purpose: Create phyloseq objects to be used to calculate alpha and beta diversity measures for nasal and tonsil tissue samples.
#This section will also use the adonis function to determine the effect of time and treatment on the community structure of nasal and tonsil microbiota.
#Files needed:
#FS1babundsingleton2000OTUtable.csv
#FS1babundsingleton2000metadata.csv
#Load library packages
library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(tidyverse)
library(phyloseq)
library(vegdist)
#Read files
otu <- read.csv("FS1babundsingleton2000OTUtable.csv", row.names=1) #Set column 1 as row names
meta <-read.csv("FS1babundsingleton2000metadata.csv")
dim(otu) #Check dimensions of 'otu'
head(otu[,1:5]) #Check the first part of 'otu' table
head(otu[,240:243]) #Check last part of 'otu' table
dim(meta) #Determine the dimensions of 'meta' dataframe. It has 242 rows and 5 columns
head(meta[,1:5]) #Check first part of 'meta' table
#Remove taxonomy from 'otu'
tax <- otu[,(242:243)] #Remove column 243 taxonomy and 242 to copy the row names from 'otu' to 'tax'
head(tax)
colnames(tax)[1] <- "delete" #Rename column 1 of 'tax' (formerly column 242) as "delete" which will be deleted later
head(tax)
#Modify 'otu' with only OTU count data
otu <- otu[,-243] #Remove column 243 taxonomy in 'otu' to have only OTU data
head(otu[,240:242])
dim(otu) #Dimensions of 'otu' show 1637 rows 242 columns
#Transpose 'otu' to match format of 'meta'
otu.trans <- t(otu)
#Now rownames in 'otu.trans' are sample names, columns are OTUs
head(otu.trans[,1:5])
head(meta) #Row names are numbered, but we want sample names as row names
class(meta) #The type of class that 'meta' is is dataframe
class(otu) #dataframe
#Merge 'otu' and 'meta' data frames
otu.meta <- merge(meta, otu.trans, by.x=1, by.y=0) #Merge by names of the columns that are
#common to both x and y (columns with common names between the two data sets)
#by.x=1 means match by 1st column from 'meta'; y=0 means match by rownames in 'otu.trans'
head(otu.meta[,1:10])
class(otu.meta) #Check class type of 'otu.meta'. It should be a dataframe.
#Added an "All" column (combines 'Treatment' and 'Day' values) in new 'otu.meta2' dataframe
otu.meta2<- cbind(otu.meta) #Make second copy of 'otu.meta' to use to include an "All" column
otu.meta2$All <- with(otu.meta2, paste0(Day, sep=" ", Treatment)) #Combine "Day" and "Treatment" columns into an "All" column
head(otu.meta2) #Check first part of otu.meta2
dim(otu.meta2) #Check dimensions of 'otu.meta2' dataframe
head(otu.meta2[,1640:1643]) #Check the first part of the end of 'otu.meta2' dataframe
otu.meta2<- otu.meta2[,c(1:5,1643,6:1642)] #Reorder columns to have "All" column after "Treatment" column
head(otu.meta2[,1:10]) #Check the first part of the beginning of 'otu.meta2' dataframe
head(otu.meta2[,1640:1643])
write.csv(otu.meta2, file="FS1babundsingleton2000.otu.meta_all.csv")
#Subset nasal and tonsil samples
nw2 <- subset(otu.meta2, Tissue=="NW") #Return subset of 'otu.meta2' dataframe as 'nw2' where row values within 'otu.meta2' are "NW" in "Tissue" column
head(nw2[,1:10])
dim(nw2)
write.csv(nw2, file="FS1babundsingleton2000.otu.meta_all.nasalonly.csv")
tt2 <- subset(otu.meta2, Tissue=="TT")
head(tt2[,1:10])
tail(tt2[,1:10])
dim(tt2)
write.csv(tt2, file="FS1babundsingleton2000.otu.meta_all.tonsilonly.csv")
#Creating phyloseq objects for tonsil samples
#Pull out metadata from 'tt2' dataframe
head(tt2[,1:10])
meta.tt2 <- tt2[,1:6] #Take columns 1-6 ("Sample" to "All") to make 'meta.tt2'
head(meta.tt2)
row.names(meta.tt2) <- meta.tt2[,1] #Make column 1 be rownames for 'meta.tt2'
head(meta.tt2)
meta.tt2 <- meta.tt2[,-1] #Remove the extra "Sample" column
head(meta.tt2)
dim(meta.tt2)
#Create SAM metadata table phyloseq object
SAMtt2 = sample_data(meta.tt2, errorIfNULL = TRUE)
head(SAMtt2)
dim(SAMtt2)
#Pull out OTU data from 'tt2' dataframe
head(tt2[,1:10])
dim(tt2)
row.names(tt2) <- tt2[,1] #Make column 1 be rownames for 'tt2'
head(tt2[,1:10])
tt2 <- tt2[,-1] #Remove the extra "Sample" column
head(tt2[,1:10])
dim(tt2)
head(tt2[,1640:1642])
otu.tt2 <- tt2[,c(6:1642)] #Select "Sample" and "OTU" columns to create 'otu.tt2' dataframe
head(otu.tt2[,1:10])
dim(otu.tt2)
otu.tt2.trans <- t(otu.tt2) #Transpose 'otu.tt2' to have OTUs as rownames, sample names as column names
head(otu.tt2.trans[,1:10])
dim(otu.tt2.trans)
#Merge 'tax' back into 'otu.tt2.trans' for correct format and taxons
head(tax)
otu.tax.tt2 <- merge(otu.tt2.trans, tax, by=0) #Merge by rownames aka OTU rownames
dim(otu.tax.tt2) #1637 38
head(otu.tax.tt2[,35:38])
head(otu.tax.tt2[,1:5])
row.names(otu.tax.tt2) <- otu.tax.tt2[,1] #Set first row of 'otu.tax.tt2' as rownames
head(otu.tax.tt2[,1:5])
otu.tax.tt2 <- otu.tax.tt2[,-1] #Remove first row aka extraneous "OTU" column from 'otu.tax.tt2'
head(otu.tax.tt2[,1:5])
#Split 'otu.tax.tt2' again
dim(otu.tax.tt2)
head(otu.tax.tt2[,35:37])
otu.notax.tt2 <- otu.tax.tt2[,1:35] #Take rows 1-35 to make new dataframe 'otu.notax.tt2' (36 is "delete" column, 37 is "taxonomy" column)
head(otu.notax.tt2[,1:5])
head(otu.notax.tt2[,30:35])
dim(otu.notax.tt2)
class(otu.notax.tt2)
otu.notax.tt2 <- as.matrix(otu.notax.tt2) #Turn 'otu.notax.tt2' into a matrix class
class(otu.notax.tt2) #'otu.notax.tt2' is indeed a matrix
#Create OTU table phyloseq object
OTUtt2 = otu_table(otu.notax.tt2, taxa_are_rows = TRUE)
head(OTUtt2)
dim(OTUtt2)
head(otu.tax.tt2[,30:37])
tax.levels.tt2 <- separate(data = otu.tax.tt2,
col = Taxonomy,
into=c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep=";")
#"separate" function separates "Taxonomy" column into 7 separate columns labeled "Kingdom", "Phylum", "Class", etc.
head(tax.levels.tt2) #Notice that "Species" column is blank
dim(tax.levels.tt2) #1637 43
tax.only.tt2 <- tax.levels.tt2[,37:42] #Keep only taxonomy columns "Kingdom" up to "Genus"
head(tax.only.tt2)
dim(tax.only.tt2) #1637 6
class(tax.only.tt2) #dataframe
tax.m.tt2 <- as.matrix(tax.only.tt2) #Convert class of 'tax.only.tt2' from dataframe to a matrix
class(tax.m.tt2) #matrix
#Create TAX taxonomy table phyloseq object
TAXtt2 = tax_table(tax.m.tt2)
head(TAXtt2)
dim(TAXtt2) #1637 6
#Create phyloseq object containing taxonomy, metadata, and OTU table
phyloseq.tt2 <- phyloseq(OTUtt2, SAMtt2, TAXtt2)
phyloseq.tt2 #View phyloseq object
#Output:
#phyloseq-class experiment-level object
#otu_table() OTU Table: [ 1637 taxa and 35 samples ]
#sample_data() Sample Data: [ 35 samples by 5 sample variables ]
#tax_table() Taxonomy Table: [ 1637 taxa by 6 taxonomic ranks ]
save(phyloseq.tt2, file="phyloseq.tt2.RData")
#Creating phyloseq objects for nasal samples
#Pull out metadata from 'nw2' dataframe
head(nw2[,1:10])
dim(nw2) #207 1643
meta.nw2 <- nw2[,1:6] #Take columns 1-6 to make 'meta.nw2'
head(meta.nw2)
row.names(meta.nw2) <- meta.nw2[,1] #Make column 1 be rownames for 'meta.nw2'
head(meta.nw2)
meta.nw2 <- meta.nw2[,-1] #Remove the extra "Sample" column
head(meta.nw2)
dim(meta.nw2) #207 5
#Create SAM metadata table phyloseq object
SAMnw2 = sample_data(meta.nw2, errorIfNULL = TRUE)
head(SAMnw2)
dim(SAMnw2) #207 5
#Pull out OTU data from 'nw2' dataframe
head(nw2[,1:10])
dim(nw2) #207 1643
head(nw2[,1640:1643])
otu.nw2 <- nw2[,c(1,7:1643)] #Select "Sample" and "OTU" columns to create 'otu.nw2' dataframe
head(otu.nw2[,1:10])
dim(otu.nw2) #207 1638
row.names(otu.nw2) <- otu.nw2[,1] #Make first column be rownames for 'otu.nw2'
head(otu.nw2[,1:10])
otu.nw2 <- otu.nw2[,-1] #Remove the first column
head(otu.nw2[,1:10])
otu.nw2.trans <- t(otu.nw2) #Transpose 'otu.nw2' to have OTUs as rownames, sample names as column names
head(otu.nw2.trans[,1:10])
dim(otu.nw2.trans) #1637 207
head(otu.nw2.trans[,200:207])
#Merge 'tax' back into 'otu.nw2.trans' for correct format and taxons
head(tax)
otu.tax.nw2 <- merge(otu.nw2.trans, tax, by=0) #Merge by rownames aka OTU rownames
dim(otu.tax.nw2) #1637 210
head(otu.tax.nw2[,200:210])
head(otu.tax.nw2[,1:5])
row.names(otu.tax.nw2) <- otu.tax.nw2[,1] #Set first row as rownames
head(otu.tax.nw2[,1:5])
otu.tax.nw2 <- otu.tax.nw2[,-1] #Remove first row, extraneous OTU column
head(otu.tax.nw2[,1:5])
dim(otu.tax.nw2) #1637 209
#Split 'otu.tax.nw2' again
dim(otu.tax.nw2) #1637 209
head(otu.tax.nw2[,205:209])
otu.notax.nw2 <- otu.tax.nw2[,1:207] #Take rows 1-207 to make new dataframe otu.notax.nw2 (208 is "delete" column, 209 is "taxonomy" column)
head(otu.notax.nw2[,1:5])
head(otu.notax.nw2[,200:207])
class(otu.notax.nw2) #dataframe
otu.notax.nw2 <- as.matrix(otu.notax.nw2) #Turn 'otu.notax.nw2' into a matrix class
class(otu.notax.nw2) #matrix
#Create OTU table phyloseq object
OTUnw2 = otu_table(otu.notax.nw2, taxa_are_rows = TRUE)
head(OTUnw2)
dim(OTUnw2) #1637 207
head(otu.tax.nw2[,200:209])
tax.levels.nw2 <- separate(data = otu.tax.nw2,
col = Taxonomy,
into=c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep=";")
#Separate Taxonomy column into 7 separate columns labeled "Kingdom", "Phylum", "Class", etc.
head(tax.levels.nw2) #Notice that Species column is blank
dim(tax.levels.nw2) #1637 215
tax.only.nw2 <- tax.levels.nw2[,209:214] #Keep only taxonomy columns from "Kingdom" up to "Genus"
head(tax.only.nw2)
dim(tax.only.nw2) #1637 6
class(tax.only.nw2) #data.frame
tax.m.nw2 <- as.matrix(tax.only.nw2)
class(tax.m.nw2) #matrix
#Create TAX taxonomy table phyloseq object
TAXnw2 = tax_table(tax.m.nw2)
head(TAXnw2)
dim(TAXnw2) #1637 6
#Create phyloseq object containing taxonomy, metadata, and OTU table
phyloseq.nw2 <- phyloseq(OTUnw2, SAMnw2, TAXnw2)
phyloseq.nw2
#Output:
#phyloseq-class experiment-level object
#otu_table() OTU Table: [ 1637 taxa and 207 samples ]
#sample_data() Sample Data: [ 207 samples by 5 sample variables ]
#tax_table() Taxonomy Table: [ 1637 taxa by 6 taxonomic ranks ]
save(phyloseq.nw2, file="phyloseq.nw2.RData")
#Run adonis function to determine effect of time and treatment on structure of nasal, tonsil microbiota
#Use vegdist function from vegan package to run distance calculations instead of the distance function
#(original "distance" function that was used below is no longer available) and use those calculations to run through adonis test
#vegdist requires that phyloseq object's OTU table has OTUs listed in the columns and sample names listed in rows.
#Also, remove any OTUs with taxa_sums = 0 or non-numeric values. For example, this command can help remove OTUs with taxa_sums = 0:
#OTU <- prune_taxa(taxa_sums(<yourOTUtable>) > 0, <yourOTUtable>)
#If you create a separate phyloseq object with this specific OTU table setup,
#you should be able to run the vegdist function without any errors and use the output to run through adonis function
#Nasal
dist.nw2 <- distance(phyloseq.nw2, method="bray") #Distance calculation using Bray-Curtis
adonis.nw2 <- as(sample_data(phyloseq.nw2), "data.frame")
set.seed(1) #Use set.seed function when running simulations to ensure all results are reproducible
full.nw2 <- adonis(dist.nw2~Day*Treatment, data=adonis.nw2, permutations=9999)
full.nw2 #Display results
#Output:
#Call:
#adonis(formula = dist.nw2 ~ Day * Treatment, data = adonis.nw2, permutations = 9999)
#Permutation: free
#Number of permutations: 9999
#Terms added sequentially (first to last)
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# Day 4 14.499 3.6249 22.0060 0.26863 1e-04 ***
# Treatment 2 1.918 0.9591 5.8226 0.03554 1e-04 ***
# Day:Treatment 8 5.932 0.7415 4.5015 0.10990 1e-04 ***
# Residuals 192 31.626 0.1647 0.58594
# Total 206 53.976 1.00000
#---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###Day had largest effect on variation
#switched Treatment and Day order, obtained same conclusions: Day had largest effect on variation
full.nw2_2 <- adonis(dist.nw2~Treatment*Day, data=adonis.nw2, permutations=9999)
full.nw2_2
#Output:
#Call:
# adonis(formula = dist.nw2 ~ Treatment * Day, data = adonis.nw2, permutations = 9999)
#Permutation: free
#Number of permutations: 9999
#Terms added sequentially (first to last)
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# Treatment 2 1.959 0.9793 5.9449 0.03628 1e-04 ***
# Day 4 14.459 3.6148 21.9449 0.26788 1e-04 ***
# Treatment:Day 8 5.932 0.7415 4.5015 0.10990 1e-04 ***
# Residuals 192 31.626 0.1647 0.58594
# Total 206 53.976 1.00000
#---
#Tonsil
dist.tt2 <- distance(phyloseq.tt2, method="bray")
adonis.tt2 <- as(sample_data(phyloseq.tt2), "data.frame")
set.seed(1)
full.tt2 <- adonis(dist.tt2~Day*Treatment, data=adonis.tt2, permutations=9999)
full.tt2
#Output:
#Call:
#adonis(formula = dist.tt2 ~ Day * Treatment, data = adonis.tt2, permutations = 9999)
#Permutation: free
#Number of permutations: 9999
#Terms added sequentially (first to last)
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
#Day 2 0.2729 0.13644 1.1432 0.06593 0.2711
#Treatment 2 0.2616 0.13082 1.0961 0.06321 0.3268
#Day:Treatment 4 0.5012 0.12530 1.0498 0.12110 0.3719
#Residuals 26 3.1032 0.11935 0.74976
#Total 34 4.1389 1.00000
###None have a more larger effect on variation than the other.
#switched Treatment and Day order, obtained same conclusions: none had a more larger effect on variation than the others
full.tt2_2 <- adonis(dist.tt2~Treatment*Day, data=adonis.tt2, permutations=9999)
full.tt2_2
#Output:
#Call:
#adonis(formula = dist.tt2 ~ Treatment * Day, data = adonis.tt2, permutations = 9999)
#Permutation: free
#Number of permutations: 9999
#Terms added sequentially (first to last)
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
#Treatment 2 0.2662 0.13310 1.1152 0.06432 0.3049
#Day 2 0.2683 0.13416 1.1240 0.06483 0.2896
#Treatment:Day 4 0.5012 0.12530 1.0498 0.12110 0.3709
#Residuals 26 3.1032 0.11935 0.74976
#Total 34 4.1389 1.00000
#################################################################################################################################################################################################
#FOURTH SECTION: Beta diversity
#Purpose: This code generates non-metric multidimensional scaling ordination based on Bray-Curtis dissimilarities
#to create NMDS plots, and runs pairwise.adonis function to identify any significant differences in bacterial composition
#between treatment groups on a given day for each tissue.
#Files needed:
#phyloseq.nw2
#phyloseq.tt2
#Load library packages
library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(tidyverse)
library(phyloseq)
library(scales)
library(RColorBrewer)
library(philentropy)
library(cowplot)
library('wesanderson')
#Load these two functions from the funfuns R package (https://github.com/Jtrachsel/funfuns)
NMDS_ellipse
veganCovEllipse
#Load the following pairwise.adonis function, taken from https://www.researchgate.net/post/How_can_I_do_PerMANOVA_pairwise_contrasts_in_R
pairwise.adonis <- function(x,factors, sim.function = 'vegdist', sim.method = 'bray', p.adjust.m ='bonferroni')
{
library(vegan)
co = combn(unique(as.character(factors)),2)
pairs = c()
F.Model =c()
R2 = c()
p.value = c()
for(elem in 1:ncol(co)){
if(sim.function == 'daisy'){
library(cluster); x1 = daisy(x[factors %in% c(co[1,elem],co[2,elem]),],metric=sim.method)
} else{x1 = vegdist(x[factors %in% c(co[1,elem],co[2,elem]),],method=sim.method)}
ad = adonis(x1 ~ factors[factors %in% c(co[1,elem],co[2,elem])] );
pairs = c(pairs,paste(co[1,elem],'vs',co[2,elem]));
F.Model =c(F.Model,ad$aov.tab[1,4]);
R2 = c(R2,ad$aov.tab[1,5]);
p.value = c(p.value,ad$aov.tab[1,6])
}
p.adjusted = p.adjust(p.value,method=p.adjust.m)
sig = c(rep('',length(p.adjusted)))
sig[p.adjusted <= 0.05] <-'.'
sig[p.adjusted <= 0.01] <-'*'
sig[p.adjusted <= 0.001] <-'**'
sig[p.adjusted <= 0.0001] <-'***'
pairw.res = data.frame(pairs,F.Model,R2,p.value,p.adjusted,sig)
print("Signif. codes: 0 â***â 0.001 â**â 0.01 â*â 0.05 â.â 0.1 â â 1")
return(pairw.res)
}
#Nasal
#Setting up 'phyloseq.nw2' into dataframes for NMDS calculation
nw2.meta <- data.frame(phyloseq.nw2@sam_data) #Make 'phyloseq.nw2 sam_data' into dataframe
nw2.otu <- data.frame(t(phyloseq.nw2@otu_table)) #Make 'phyloseq.nw2 otu_table' into dataframe
class(nw2.meta) #data.frame
rownames(nw2.meta) == row.names(nw2.otu) #Make sure rownames between 'nw2.meta' and 'nw2.otu' match exactly
nw2.meta$numOTUS <- rowSums(nw2.otu > 1) #For rows with sums greater than 1 in 'nw2.otu', move rows and their respective sum values into "numOTUs" column in 'nw2.meta'
head(nw2.meta)
#NMDS calculation
nw2.otu[1:10,1:10]
nw_NMDS <- NMDS_ellipse(nw2.meta, nw2.otu, grouping_set = 'All')
#Output:
#Result: [1] "Ordination stress: 0.195744611098944"
#Separate meta data and ellipse data to two lists to make NMDS plot
head(nw_NMDS)
nw.metanmds <- nw_NMDS[[1]] #'nw.metanmds' has meta data + MDS calculations. Select this 1st list of 'nw_NMDS' using double brackets
nw.df_ell <- nw_NMDS[[2]] #'nw.df_ell' is accessing 2nd list from 'nw_NMDS' that has ellipse calculations
#Need two separate lists for making NMDS plot
nw.df_ell$group
head(nw.df_ell)
#Create "Day" and "Treatment" columns within 'nw.df_ell' for faceting purposes
nw.df_ell$Day <- sub('(D[0-9]+) ([A-Za-z]+)', '\\1', nw.df_ell$group) #Created "Day" column, '\\1' returns the first part of the regular expression (D[0-9]+) from 'nw.df_ell$group'
nw.df_ell$Treatment <- sub('(D[0-9]+) ([A-Za-z]+)', '\\2', nw.df_ell$group) #Created "Treatment" column, '\\2' returns the second part of the sub expression ([A-Za-z]+) from 'nw.df_ell$group'
head(nw.df_ell)
#Restructure level order for 'nw.metanmds' and 'nw.df_ell'
nw.metanmds$Day = factor(nw.metanmds$Day, levels = c("D0", "D4", "D7", "D11", "D14"))
nw.df_ell$Day = factor(nw.df_ell$Day, levels = c("D0", "D4", "D7", "D11", "D14"))
levels(nw.df_ell$Day)
#Renaming treatment groups Control, Injected and Infeed to NON, IM and IF, respectively, in 'nw.metanmds' and 'nw.df_ell' dataframes
nw.metanmds$Treatment2 = nw.metanmds$Treatment
nw.metanmds$Treatment2 <- as.character(nw.metanmds$Treatment2)
nw.metanmds$Treatment2[nw.metanmds$Treatment2 == 'Control'] <- "NON"
nw.metanmds$Treatment2[nw.metanmds$Treatment2 == 'Injected'] <- "IM"
nw.metanmds$Treatment2[nw.metanmds$Treatment2 == 'Infeed'] <- "IF"
nw.df_ell$Treatment2 = nw.df_ell$Treatment
nw.df_ell$Treatment2 <- as.character(nw.df_ell$Treatment2)
nw.df_ell$Treatment2[nw.df_ell$Treatment2 == 'Control'] <- "NON"
nw.df_ell$Treatment2[nw.df_ell$Treatment2 == 'Injected'] <- "IM"
nw.df_ell$Treatment2[nw.df_ell$Treatment2 == 'Infeed'] <- "IF"
#Creating plot from NMDS calculations
nasal.nmdsplot <- ggplot(data=nw.metanmds, aes(x=MDS1, y=MDS2, color=Treatment2)) + geom_point() +
geom_segment(aes(x=MDS1, xend=centroidX, y=MDS2, yend=centroidY), alpha = 0.5) +
geom_path(data=nw.df_ell, aes(x=NMDS1, y=NMDS2, color=Treatment2, group=group)) +
facet_wrap(~Day, scales = 'free') +
scale_color_brewer(palette="Dark2") +
theme_gray(base_size = 10) +
theme(strip.text.x = element_text(size=15), axis.text.x = element_text(size=13), axis.text.y = element_text(size=13), axis.title.x = element_text(size=14), axis.title.y = element_text(size=14), legend.text=element_text(size=14), legend.title=element_text(size=14)) +
labs(color="Treatment group")
nasal.nmdsplot2 <- nasal.nmdsplot + scale_colour_manual(values=c("#E69F00", "#56B4E9", "#999999")) + theme(legend.position = "right")
nasal.nmdsplot2
#Save 'nasal.nmdsplot2' as a .tiff for publication, 500dpi
ggsave("Nasal_NMDS_DayAndTreatment.tiff", plot=nasal.nmdsplot2, width = 10, height = 6, dpi = 500, units =c("in"))
#Using pairwise.adonis function
nw.adon <- pairwise.adonis(nw2.otu, nw2.meta$All) #Run pairwise.adonis on 'nw2.otu' OTU table and "All" column of 'nw2.meta' dataframe
#nw.adon contains all the pairwise comparisons
nw.adon$pairs #List all comparisons in the "pairs" column of 'nw.adon'
goodcomps.nw <- c(grep('D0 [A-Za-z]+ vs D0 [A-Za-z]+', nw.adon$pairs),
grep('D4 [A-Za-z]+ vs D4 [A-Za-z]+', nw.adon$pairs),
grep('D7 [A-Za-z]+ vs D7 [A-Za-z]+', nw.adon$pairs),
grep('D11 [A-Za-z]+ vs D11 [A-Za-z]+', nw.adon$pairs),
grep('D14 [A-Za-z]+ vs D14 [A-Za-z]+', nw.adon$pairs))
# "[A-Za-z]" matches all capital and lowercase letters
# "+" matches a whole word and not just one letter (if you didn't have "+", then it would match by one letter)
# "c" creates the vector, lumps all pairs of specific groups of interest together
# You want to make a vector to combine all the pairwise comparisons you're interested in (same day, different treatment group)
nw.adon.good <- nw.adon[goodcomps.nw,] #Rename 'goodcomps.nw' vector to 'nw.adon.good'
nw.adon.good
nw.adon.good$p.adjusted <- p.adjust(nw.adon.good$p.value, method = 'fdr') #"p.adjust" function returns a set of p-values adjusted with "fdr" method
nw.adon.good$p.adjusted2 <- round(nw.adon.good$p.adjusted, 3) #Round p-values to 3 decimal points and list in new "p.adjusted2" column
nw.adon.good$p.adjusted2[nw.adon.good$p.adjusted2 > 0.05] <- NA #For all p-values greater than 0.05, replace with "NA"
write.csv(nw.adon.good, file='nw.adon.good.txt', row.names=TRUE)
#Tonsil
#Setting up 'phyloseq.tt2' into dataframes for NMDS calculation
tt2.meta <- data.frame(phyloseq.tt2@sam_data) #Make phyloseq.tt2 sam_data into dataframe
tt2.otu <- data.frame(t(phyloseq.tt2@otu_table)) #Make phyloseq.tt2 otu_table into dataframe
class(tt2.meta) #data.frame
rownames(tt2.meta) == row.names(tt2.otu) #Make sure rownames between tt2.meta and tt2.otu match exactly. Yes they do.
tt2.meta$numOTUS <- rowSums(tt2.otu > 1)
head(tt2.meta)
#NMDS calculation
tt2.otu[1:10,1:10]
tt_NMDS <- NMDS_ellipse(tt2.meta, tt2.otu, grouping_set = 'All')
#Output:
#Result: [1] "Ordination stress: 0.180074413238642"
#Separate meta data and ellipse data to two lists to make NMDS plot
head(tt_NMDS)
tt.metanmds <- tt_NMDS[[1]] #'tt.metanmds' has meta data + MDS calculations. Select this 1st list of 'tt_NMDS' using double brackets
class(tt.metanmds) #data.frame
#Count number of samples in each "All" group in 'tt.metanmds' and summarize
tt.metanmds %>% group_by(All) %>% summarise(count=n())
#Output:
## A tibble: 9 x 2
#All count
#<fct> <int>
#1 D14 Control 2
#2 D14 Infeed 3
#3 D14 Injected 5
#4 D4 Control 5
#5 D4 Infeed 5
#6 D4 Injected 4
#7 D7 Control 3
#8 D7 Infeed 3
#9 D7 Injected 5
#Remove day 14 samples as there are too few sample points to plot
tt.metanmds2 <- tt.metanmds %>% filter(All != 'D14 Control')
head(tt.metanmds2)
tt.df_ell <- tt_NMDS[[2]] #'tt.df_ell' is accessing 2nd list from 'tt_NMDS' that has ellipse calculations
tt.df_ell2 <- tt.df_ell %>% filter(group != 'D14 Control')
#Need two separate lists for making NMDS plot
tt.df_ell2$group
#15 levels
head(tt.df_ell2)
#Create "Day" and "Treatment" columns within 'tt.df_ell2' for faceting
tt.df_ell2$Day <- sub('(D[0-9]+) ([A-Za-z]+)', '\\1', tt.df_ell2$group) #'\\1' returns the first part of the regular expression (D[0-9]+) from tt.df_ell$group
tt.df_ell2$Treatment <- sub('(D[0-9]+) ([A-Za-z]+)', '\\2', tt.df_ell2$group) #'\\2' returns the second part of the sub expression ([A-Za-z]+) from tt.df_ell$group
head(tt.df_ell2)
#Restructure level order for 'tt.metanmds2' and 'tt.df_ell2'
tt.metanmds2$Day = factor(tt.metanmds2$Day, levels = c("D4", "D7", "D14"))
tt.df_ell2$Day = factor(tt.df_ell2$Day, levels = c("D4", "D7", "D14"))
levels(tt.df_ell2$Day)
#Renaming treatment group names to IM and IF in 'tt.metanmds' and 'tt.df_ell' dataframes
tt.metanmds2$Treatment2 = tt.metanmds2$Treatment
tt.metanmds2$Treatment2 <- as.character(tt.metanmds2$Treatment2)
tt.metanmds2$Treatment2[tt.metanmds2$Treatment2 == 'Injected'] <- "IM"
tt.metanmds2$Treatment2[tt.metanmds2$Treatment2 == 'Infeed'] <- "IF"
tt.df_ell2$Treatment2 = tt.df_ell2$Treatment
tt.df_ell2$Treatment2 <- as.character(tt.df_ell2$Treatment2)
tt.df_ell2$Treatment2[tt.df_ell2$Treatment2 == 'Injected'] <- "IM"
tt.df_ell2$Treatment2[tt.df_ell2$Treatment2 == 'Infeed'] <- "IF"
#Creating plot from NMDS calculations
ggplot(data=tt.metanmds2, aes(x=MDS1, y=MDS2, color=Treatment2)) + geom_point() +
geom_segment(aes(x=MDS1, xend=centroidX, y=MDS2, yend=centroidY), alpha = 0.5) +
geom_path(data=tt.df_ell2, aes(x=NMDS1, y=NMDS2, color=Treatment2, group=group)) +
facet_wrap(~Day, scales = 'free') +
scale_color_brewer(palette="Dark2") +
theme_gray(base_size = 10) +
theme(strip.text.x = element_text(size=15)) +
labs(caption = 'Ordination stress = 0.18',color="Treatment group")
#Using pairwise.adonis function
tt.adon <- pairwise.adonis(tt2.otu, tt2.meta$All) #Run pairwise.adonis on 'tt2.otu' OTU table and "All" column of 'tt2.meta' dataframe
#tt.adon contains all the pairwise comparisons
tt.adon$pairs #List all comparisons in the "pairs" column of 'tt.adon'
goodcomps.tt <- c(grep('D4 [A-Za-z]+ vs D4 [A-Za-z]+', tt.adon$pairs),
grep('D7 [A-Za-z]+ vs D7 [A-Za-z]+', tt.adon$pairs),
grep('D14 [A-Za-z]+ vs D14 [A-Za-z]+', tt.adon$pairs))
tt.adon.good <- tt.adon[goodcomps.tt,] #Rename 'this'goodcomps.tt' vector to 'tt.adon.good'
tt.adon.good
tt.adon.good$p.adjusted <- p.adjust(tt.adon.good$p.value, method = 'fdr')
tt.adon.good$p.adjusted2 <- round(tt.adon.good$p.adjusted, 3)
tt.adon.good$p.adjusted2[tt.adon.good$p.adjusted2 > 0.05] <- NA
write.csv(tt.adon.good, file='tt.adon.good.txt', row.names=TRUE)
#################################################################################################################################################################################################
#FIFTH SECTION: Alpha diversity
#Purpose: This code calculates alpha diversity metrics (Shannon, Inverse Simpson) that are plotted as box and whisker plots,
#and uses wilcoxon rank sum test to assess any significant differences in diversity between treatment groups on a given day for each tissue.
#Files needed:
#phyloseq.nw2
#phyloseq.tt2
#Load library packages
library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(tidyverse)
library(phyloseq)
library(scales)
library(RColorBrewer)
library(philentropy)
library(cowplot)
library('wesanderson')
#Nasal
#Calculating alpha diversity metrics: Shannon, Inverse Simpson
nw2.meta$shannon <- diversity(nw2.otu) #"diversity" is a vegan function. The default index is set at "shannon". I added a shannon index column in 'nw2.meta'
nw2.meta$invsimpson <- diversity(nw2.otu,index = 'invsimpson') #We used 'invsimpson' since it is easier to interpret than Simpson values and won't need to "inverse" the Simpson values to understand (With Simpson values, the lower the number, the higher the diversity)
levels(sample_data(nw2.meta)$Day) # Set the level order of values in "Day" column to "D0" "D4" "D7" "D11" "D14"
#Calculate the average shannon, invsimpson, numOTUs for each "All" subtype within nw2.meta
nw.average.shannon.invsimpson.numOTUs <- aggregate(nw2.meta[, 5:7], list(nw2.meta$All), mean)
print(nw.average.shannon.invsimpson.numOTUs)
#Output:
# Group.1 shannon invsimpson numOTUS
#1 D0 Control 2.487904 6.992742 80.83333
#2 D0 Infeed 2.623665 5.260101 93.21053
#3 D0 Injected 2.493518 4.865172 74.84211
#4 D11 Control 3.567874 12.635943 130.62500
#5 D11 Infeed 1.796345 3.615875 27.71429
#6 D11 Injected 2.833157 8.368972 87.85714
#7 D14 Control 1.974733 3.377601 44.37500
#8 D14 Infeed 2.293317 7.677236 54.14286
#9 D14 Injected 1.890397 2.921545 49.83333
#10 D4 Control 3.574778 15.639098 123.27273
#11 D4 Infeed 3.065280 8.812949 101.40000
#12 D4 Injected 3.455888 12.502168 116.86364
#13 D7 Control 3.520855 13.793185 114.00000
#14 D7 Infeed 2.819091 6.328819 93.07143
#15 D7 Injected 2.822564 6.958273 93.80000
write.csv(nw.average.shannon.invsimpson.numOTUs, file="nw.average.shannon.invsimpson.num.OTUs.txt", row.names=TRUE)
nw2.pairwise.wilcox.shannon.test <- pairwise.wilcox.test(nw2.meta$shannon, nw2.meta$All, p.adjust.method = 'none') #Calculate pairwise comparisons by "All" column of the shannon indices in "Shannon" column
print(nw2.pairwise.wilcox.shannon.test) #Look at the results of 'nw2.pairwise.wilcox.shannon.test'
nw2.pairwise.wilcox.invsimpson.test <- pairwise.wilcox.test(nw2.meta$invsimpson, nw2.meta$All, p.adjust.method = 'none')
print(nw2.pairwise.wilcox.invsimpson.test)
#Change level names of 'nw2.meta' "Day" column from "D#" to "#"
levels(nw2.meta$Day)
levels(nw2.meta$Day) <- c("0", "4", "7", "11", "14")
#Renaming treatment group names from Control, Injected, and Infeed to NON, IM, and IF, respectively
nw2.meta$Treatment2 = nw2.meta$Treatment #Duplicate "Treatment" column and name as "Treatment2" column
nw2.meta$Treatment2 <- as.character(nw2.meta$Treatment2) #Covert values within "Treatment2" column to characters
nw2.meta$Treatment2[nw2.meta$Treatment2 == 'Control'] <- "NON" #Rename all "NON" characters in "Treatment2" column as "Control"
nw2.meta$Treatment2[nw2.meta$Treatment2 == 'Injected'] <- "IM"
nw2.meta$Treatment2[nw2.meta$Treatment2 == 'Infeed'] <- "IF"
#Generate a box and whisker plot of shannon (both shannon and inverse simpson diversity indices showed same trends; I chose to make a plot using shannon indices)
(nw2.shan <- ggplot(data = nw2.meta, aes(x=Treatment2, y=shannon, group=All, fill=Treatment2)) +
geom_boxplot(position = position_dodge2(preserve = 'total')) +
facet_wrap(~Day, scales = 'free') +
scale_y_continuous(name = "Shannon diversity") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
strip.text.x = element_text(size=14),
axis.title.y = element_text(size=15)) +
theme(legend.position = "none"))
# "free" within "facet_wrap" allows each plot to customize the scale to the specific data set (no forced scaling applied to all plots)
# "position = position_dodge2(preserve = 'total')" fixes the ggplot box width, making them wider, prevents narrow boxes from forming in the plot
nw2.shan <- nw2.shan + scale_fill_manual(values=c("#E69F00", "#56B4E9", "#999999"))
nw2.shan
#Save 'nw2.shan' as a .tiff for publication, 500dpi
ggsave("Nasal_Shannon.tiff", plot=nw2.shan, width = 10, height = 5, dpi = 500, units =c("in"))
#Tonsil
#Calculating alpha diversity metrics: Shannon, Inverse Simpson
tt2.meta$shannon <- diversity(tt2.otu)
tt2.meta$invsimpson <- diversity(tt2.otu,index = 'invsimpson')
levels(sample_data(tt2.meta)$Day) #Levels are: "D4" "D7" "D14"
#Calculate the average shannon, invsimpson, numOTUs for each "All" subtype within 'tt2.meta'
tt.average.shannon.invsimpson.numOTUs <- aggregate(tt2.meta[, 6:8], list(tt2.meta$All), mean)
print(tt.average.shannon.invsimpson.numOTUs)
#Output:
# Group.1 numOTUS shannon invsimpson
#1 D14 Control 53.50000 2.900513 10.421222
#2 D14 Infeed 47.66667 2.840736 10.343016
#3 D14 Injected 51.00000 2.881732 10.383069
#4 D4 Control 60.40000 2.934585 10.827341
#5 D4 Infeed 52.80000 2.745635 9.454290
#6 D4 Injected 47.00000 2.432809 6.287067
#7 D7 Control 54.66667 2.776998 11.085440
#8 D7 Infeed 54.33333 3.031819 11.932736
#9 D7 Injected 45.60000 2.684307 7.741800
write.csv(tt.average.shannon.invsimpson.numOTUs, file="tt.average.shannon.invsimpson.num.OTUs.txt", row.names=TRUE)
tt2.pairwise.wilcox.shannon.test <- pairwise.wilcox.test(tt2.meta$shannon, tt2.meta$All, p.adjust.method = 'none')
print(tt2.pairwise.wilcox.shannon.test)
tt2.pairwise.wilcox.invsimpson.test <- pairwise.wilcox.test(tt2.meta$invsimpson, tt2.meta$All, p.adjust.method = 'none')
print(tt2.pairwise.wilcox.invsimpson.test)
#################################################################################################################################################################################################
#SIXTH SECTION: Magnitude of Change in Nasal Microbiota
#Purpose: This code plots the F-statistic from PERMANOVA pairwise comparisons of NON and
#either IM or IF groups, over time (displays the magnitude of change in the nasal bacterial community structure
#of the two antibiotic treatment groups relative to control)
#Files needed:
#Nasal_MagnitudeOfChange.csv
#Load library packages
library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(tidyverse)
library(phyloseq)
library(RColorBrewer)
library(philentropy)
library(cowplot)
library(ggrepel)
#The F-values were obtained from nw.adon.good.txt data generated in the "Beta diversity" section
#Nasal_MagnitudeOfChange.csv was created from nw.adon.good.txt data that was rearranged
#To make the Nasal_MagnitudeOfChange.csv file, open nw.adon.good.txt file
#(created from FS1b_beta_diversity_each_tissue_phyloseq.R) in excel,
#copy columns "F. Model" through "p.adjusted2" and paste in a separate spreadsheet.
#Add "Day" and "Treatment" columns and save as "Nasal_MagnitudeOfChange.csv".
nasal <- read.csv("Nasal_MagnitudeOfChange.csv")
class(nasal)
nasal$Day <- factor(nasal$Day) #Encode "Day" as a factor
nasal2 <- ggplot(data=nasal, aes(x=Day, y=F.Model, group=Treatment)) +
geom_line(aes(color=Treatment)) + geom_point(aes(color=Treatment)) +
ylab("PERMANOVA F vs control \n(difference relative to control)") +
scale_fill_manual(values=c("#E69F00", "#56B4E9")) +
scale_color_manual(values=c("#E69F00", "#56B4E9")) +
geom_label_repel(aes(label=p.adjusted2), box.padding = 0.35, point.padding=0.5,segment.color = 'grey50') +
theme_classic(base_size = 12) +
theme(axis.text.y = element_text(size = 14), axis.text.x = element_text(size=14), axis.title.x = element_text(size=14), axis.title.y = element_text(size=14), legend.text=element_text(size=14), legend.title=element_text(size=14)) +
labs(color="Treatment group")
nasal2
#Save 'nasal2' as a .tiff for publication, 500dpi
ggsave("Nasal_Magnitude.tiff", plot=nasal2, width = 10, height = 5, dpi = 500, units =c("in"))
#################################################################################################################################################################################################
#SEVENTH SECTION: Differential Abundance of Genera in Nasal Microbiota using DESeq2
#Purpose: This code uses DESeq2 package to identify nasal microbial genera that were differentially abundant
#between treatment groups and control group
#Files needed:
#Mothur shared file: FS1bfinal.outsingletons.abund.opti_mcc.shared
#Mothur constaxonomy file: FS1bfinal.outsingletons.abund.opti_mcc.0.03.cons.taxonomy
#Metadata: FS1babundsingleton2000metadata.csv
#FS1b_FinalDiffAbundNasalGenus_IC.csv
#FS1b_FinalDiffAbundNasalGenus_FCnoRoseburia_final.csv
#Load library packages
library(DESeq2)
library(phyloseq)
library(ggplot2)
library(tidyr)
library("wesanderson")
library(plotly)
library(gapminder)
library(cowplot)
#Annotations
#fc = infeed, control
#ic = injected, control
#fi = infeed, injected
#if = injected, infeed
#Preparing objects for DESeq2: load files
otu2 <- import_mothur(mothur_shared_file = 'FS1bfinal.outsingletons.abund.opti_mcc.shared')
taxo2 <- import_mothur(mothur_constaxonomy_file = 'FS1bfinal.outsingletons.abund.opti_mcc.0.03.cons.taxonomy')
taxo2
meta2 <- read.table(file = 'FS1babundsingleton2000metadata.csv', sep = ',', header = TRUE)
#Organize 'meta2' meta file
rownames(meta2) <- meta2$Sample #Make names in "Sample" become rownames for 'meta2'
meta2$Day <- gsub("D", "", meta2$Day) # remove "D"
meta2$Treatment2 = meta2$Treatment
meta2$Treatment2 <- as.character(meta2$Treatment2)
meta2$Treatment2[meta2$Treatment2 == 'Control'] <- "NON"
meta2$Treatment2[meta2$Treatment2 == 'Injected'] <- "IM"
meta2$Treatment2[meta2$Treatment2 == 'Infeed'] <- "IF"
meta2$Set <- paste(meta2$Day, meta2$Tissue, meta2$Treatment2, sep = '_')
#Make phyloseq object 'FS1b' (combines taxonomy, OTU, and metadata)
phy_meta2 <- sample_data(meta2)
FS1b <- phyloseq(otu2, taxo2)
FS1b <- merge_phyloseq(FS1b, phy_meta2) #Combines the 'phy_meta2' metadata with 'FS1b' phyloseq object
colnames(tax_table(FS1b)) <- c('Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus')
FS1b
#Prune samples from 'FS1b'
FS1b <- prune_samples(sample_sums(FS1b) > 2000, FS1b) #This removes samples that have fewer than 2000 sequences associated with them.
FS1b <- prune_taxa(taxa_sums(FS1b) > 10, FS1b) #This removes OTUs that occur less than 10 times globally
tax_table(FS1b) [1:5, 1:6] #Checking what's in 'tax_table' first 5 rows, first 6 columns
#Grouping OTUs by Genus using the tax_glom function
FS1b.genus <- tax_glom(FS1b, taxrank = "Genus")
#This method merges species that have the same taxonomy at a certain taxanomic rank (in this case, by "Genus").
#Its approach is analogous to tip_glom, but uses categorical data instead of a tree.
######################################## PRIMARY COMPARISONS TO MAKE ####################################################
# NMDS plot showed that disperion is different between days, so I subsetted by day and tissue
# Important comparisons to make (significant changes in beta diversity between treatments)
# Compare Days 4, 7, 11 NON compared to each of the two treatments
# Compare Days 4, 7, 11 IM and IF
# Compare Day 14 NON and IF
# Other comparisons to make (no significant changes in beta diversity between treatments)
# Compare Days 0, 14 IM and IF
# Compare Day 0 NON compared to each of the two treatments
# Compare Day 14 NON and IM
##################################################### Day 0 ############################################################
############## Day 0 Nasal #########################
sample_data(FS1b.genus)
#Convert Phyloseq Data 'FS1b.genus' to DESeq2 object 'FS1b.D0.nw.De'
FS1b.D0.nw <- subset_samples(FS1b.genus, Day == 0 & Tissue == 'NW')
sample_sums(FS1b.D0.nw) #Returns the total number of individuals observed from each sample
FS1b.D0.nw <- prune_taxa(taxa_sums(FS1b.D0.nw) > 1, FS1b.D0.nw) #If taxa_sums is >1, then it will print that out in 'FS1b.D0.nw' object and not include any samples with taxa of sums <1.
rowSums(FS1b.D0.nw@otu_table)
FS1b.D0.nw.De <- phyloseq_to_deseq2(FS1b.D0.nw, ~ Set) # "~Set" : define the major sample covariate as the study design factor. This will be whatever you want to group data by, whatever column you used to designate ellipses with
FS1b.D0.nw.De <- DESeq(FS1b.D0.nw.De, test = "Wald", fitType = "parametric") #Differential expression analysis based on the negative binomial (aka Gamma-Poisson) distribution
######### Day 0 Nasal IF vs NON ###################
#Determine number of pigs per group from "Set" column in 'meta2' dataframe:
sum(meta2$Set == "0_NW_IF")
#Output:
#IF = 19
sum(meta2$Set == "0_NW_NON")
#Output:
#NON = 18
#Extract results from 'FS1b.D0.nw.De' DESeq object and organize into 'sigtab.D0.fc' table
res.D0.fc = results(FS1b.D0.nw.De, contrast=c("Set","0_NW_IF","0_NW_NON"),cooksCutoff = FALSE, pAdjustMethod = 'BH')
res.D0.fc
sigtab.D0.fc = res.D0.fc[which(res.D0.fc$padj < .05), ]
sigtab.D0.fc = cbind(as(sigtab.D0.fc, "data.frame"), as(tax_table(FS1b.D0.nw)[rownames(sigtab.D0.fc), ], "matrix")) #"cbind" combines all columns together, regardless of rownames (if you want to match by rownames, use merge function)
format(sigtab.D0.fc$padj, scientific = TRUE)
sigtab.D0.fc$newp <- format(round(sigtab.D0.fc$padj, digits = 3), scientific = TRUE)
sigtab.D0.fc$Treatment <- ifelse(sigtab.D0.fc$log2FoldChange >=0, "IF", "NON") #Assigning "IF" = yes, "NON" = no; it's important to make sure you have the correct group names in the "yes" and "no" position for "ifelse" function
sigtab.D0.fc
#Summarize 'sigtab.D0.fc' table
sum.sigtab.D0.fc <- summary(sigtab.D0.fc)
sum.sigtab.D0.fc
#plot 'sigtab.D0.fc'
deseq.D0.fc <- ggplot(sigtab.D0.fc, aes(x=reorder(rownames(sigtab.D0.fc), log2FoldChange), y=log2FoldChange, fill = Treatment)) +
geom_bar(stat='identity') + geom_text(aes(x=rownames(sigtab.D0.fc), y=0, label = paste(Family,Genus, sep = ' ')), size=5)+ labs(x="Family Genus")+
theme(axis.text.x=element_text(color = 'black', size = 13),
axis.text.y=element_text(color = 'black', size=13, face = 'italic'),
axis.title.x=element_text(size = 15),
axis.title.y=element_text(size = 15))+ ggtitle('Differentially Abundant OTUs in IF Relative to NON at Nasal Site on Day 0')+ coord_flip() +
theme(plot.title = element_text(size = 20, hjust=0.5), legend.text = element_text(size=12), legend.title = element_text(size=13)) +
scale_fill_manual(labels = c("IF", "NON"), values = c('#E69F00', '#999999'))
# "reorder" makes the logfold changes appear in numerical order (largest logfold value at the top and at the bottom of the plot), making it easier to see
deseq.D0.fc
#Add OTU and treatment group comparisons columns to 'sigtab.D0.fc'
sigtab.D0.fc$OTU <- rownames(sigtab.D0.fc)
sigtab.D0.fc$comp <- 'D0_nasal_IFvsNON'
#Create a final table ('final.nonsigtab') that lists all genera that were differentially abundant
#between IF and NON treatment groups from 'sigtab.D0.fc'.
#Other within-day comparisons that had no significant changes in beta diversity between two treatment groups
#will be added to 'final.nonsigtab'.
final.nonsigtab <- sigtab.D0.fc
########## Day 0 Nasal IM vs NON ####################
#Determine number of pigs per group from "Set" column in 'meta2' dataframe:
sum(meta2$Set == "0_NW_IM")
#Output:
#IM = 19
sum(meta2$Set == "0_NW_NON")
#Output:
#NON = 18
#Extract results from 'FS1b.D0.nw.De' DESeq object and organize into 'sigtab.D0.ic' table
FS1b.D0.nw.De$Set
res.D0.ic = results(FS1b.D0.nw.De, contrast=c("Set","0_NW_IM","0_NW_NON"),cooksCutoff = FALSE, pAdjustMethod = 'BH')
sigtab.D0.ic = res.D0.ic[which(res.D0.ic$padj < .05), ]
sigtab.D0.ic = cbind(as(sigtab.D0.ic, "data.frame"), as(tax_table(FS1b.D0.nw)[rownames(sigtab.D0.ic), ], "matrix"))
format(sigtab.D0.ic$padj, scientific = TRUE)
sigtab.D0.ic$newp <- format(round(sigtab.D0.ic$padj, digits = 3), scientific = TRUE)
sigtab.D0.ic$Treatment <- ifelse(sigtab.D0.ic$log2FoldChange >=0, "IM", "NON")
#Summarize 'sigtab.D0.ic' table
sum.sigtab.D0.ic <- summary(sigtab.D0.ic)
sum.sigtab.D0.ic
#Plot 'sigtab.D0.ic'
deseq.D0.ic <- ggplot(sigtab.D0.ic, aes(x=reorder(rownames(sigtab.D0.ic), log2FoldChange), y=log2FoldChange, fill = Treatment)) +
geom_bar(stat='identity') + geom_text(aes(x=rownames(sigtab.D0.ic), y=0, label = paste(Family,Genus, sep = ' ')), size=5)+ labs(x="Family genus")+
theme(axis.text.x=element_text(color = 'black', size = 13),
axis.text.y=element_text(color = 'black', size=13, face = 'italic'),
axis.title.x=element_text(size = 15),
axis.title.y=element_text(size = 15))+ ggtitle('Differentially Abundant OTUs in IM Relative to NON at Nasal Site on Day 0')+ coord_flip() +
theme(plot.title = element_text(size = 20, hjust=0.5), legend.text = element_text(size=12), legend.title = element_text(size=13)) +
scale_fill_manual(labels = c("IM", "NON"), values = c('#56B4E9', '#999999'))
deseq.D0.ic
#Add OTU and treatment group comparisons columns to 'sigtab.D0.ic'
sigtab.D0.ic$OTU <- rownames(sigtab.D0.ic)
sigtab.D0.ic
sigtab.D0.ic$comp <- 'D0_nasal_IMvsNON'