-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path16S.rmd
101 lines (90 loc) · 2.81 KB
/
16S.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
---
title: "Existing data"
output: html_notebook
---
```{r}
library(arivale.data.interface)
library(mbtools)
```
```{r}
classes <- c(public_client_id = "character", microbiome_id = "character")
cohort <- rbind(
fread("no_weight_loss.csv", colClasses=classes),
fread("successful_weight_loss.csv", colClasses=classes)
)[since_baseline == 0]
cohort[, "subset" := "controls"]
cohort[weight_change_relative < 0, "subset" := "weight loss"]
cohort[, subset := factor(subset)]
cohort[, "vendor_observation_id" := microbiome_id]
```
```{r}
ps <- readRDS("/proj/arivale/microbiome/16S_processed/phyloseq.rds")
ps <- subset_samples(ps, vendor_observation_id %in% cohort[, vendor_observation_id])
ps
```
```{r}
genus <- speedyseq::tax_glom(ps, "Genus")
sdata <- merge(
as(sample_data(genus), "data.frame"),
as.data.frame(cohort),
by = c("vendor_observation_id", "sex")
)
rownames(sdata) <- sdata$id
sdata$age <- sdata$age.y
sdata$subset <- factor(sdata$subset)
sdata$sex <- factor(sdata$sex)
sample_data(genus) <- sdata
```
```{r}
tests <- association(
genus,
presence_threshold = 1,
min_abundance = 10,
in_samples = 0.5,
confounders = c("age", "bmi", "sex"),
variables = "subset",
taxa_rank = "Genus",
method = "deseq2",
shrink=F
)
bmi <- association(
genus,
presence_threshold = 1,
min_abundance = 10,
in_samples = 0.5,
confounders = c("age", "sex"),
variables = "bmi",
taxa_rank = "Genus",
method = "deseq2",
shrink=F
)
tests <- rbind(tests, bmi)
tests <- merge.data.table(tests, as.data.frame(as(tax_table(genus), "matrix")), on="Genus")
fwrite(tests[order(padj)], "data/tests_16S_genus.csv")
tests
```
```{r, fig.width=5, fig.height=3}
tests[, "t" := log2FoldChange / lfcSE]
wide <- dcast(tests, Genus ~ variable, value.var=c("t", "padj"), fill=0)
wide[, "sig" := "none"]
wide[padj_bmi < 0.05, "sig" := "BMI"]
wide[padj_subset < 0.05, "sig" := "weight loss"]
wide[padj_bmi < 0.05 & padj_subset < 0.05, "sig" := "both"]
wide[, "sig" := factor(sig, levels=c("none", "BMI", "weight loss", "both"))]
ggplot(wide, aes(x=t_bmi, y=t_subset, color=sig)) +
geom_vline(xintercept = 0, color="gray30", lty="dashed") +
geom_hline(yintercept = 0, color="gray30", lty="dashed") +
geom_point() + stat_smooth(method="glm", aes(group = 1)) +
labs(x = "t statistic BMI", y = "t statistic weight loss") +
scale_color_manual(values=c(none = "black", BMI = "royalblue", `weight loss`="orange", both = "red"))
ggsave("figures/16S_genus_t.png", dpi=300, width=5, height=3)
cor.test(~ t_bmi + t_subset, data=wide, method="spearman")
```
## Overall explained variance
```{r}
library(vegan)
m <- as(otu_table(genus), "matrix")
m[is.na(m)] <- min(as.numeric(m), na.rm=T)
perm <- adonis(m ~ age + sex + bmi + subset, data=as(sample_data(genus), "data.frame"), method="bray")
perm
```