-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathresponse_vs_features.R
127 lines (112 loc) · 3.32 KB
/
response_vs_features.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# Univariate analysis for drug response
suppressPackageStartupMessages({
library(coin)
library(dplyr)
library(optparse)
library(readr)
})
source("univariate_common.R")
option_list <- list(
make_option("--response_pdata",
action = "store",
default = "data/response_pdata.rds",
help = "Drug response data"
),
make_option("--kraken_meta",
action = "store",
default = "data/knight_kraken_meta.rds",
help = "Metadata for the microbial features"
),
make_option("--microbial_features",
action = "store",
type = "character",
help = "List of nominally significant microbial features"
),
make_option("--feature_type",
action = "store",
default = "kraken",
help = "Perform analysis for which type of feature"
)
)
parser <- OptionParser(
usage = "%prog [options] file",
option_list = option_list
)
args <- parse_args(parser, positional_arguments = FALSE)
response_pdata <- args[["response_pdata"]]
kraken_meta <- args[["kraken_meta"]]
microbial_features <- args[["microbial_features"]]
feature_type <- args[["feature_type"]]
how <- args[["how"]]
features <- read_tsv(microbial_features, col_types = cols()) %>%
filter(features == feature_type & what == "resp") %>%
group_by(cancer, what, versus, features, genera) %>%
tally(name = "count") %>%
filter(count >= 2) %>%
select(cancer, what, versus, features, genera)
set.seed(98765)
# A seed for each iteration
eff_seeds <- sample(1:2^15, 10000)
res <- readRDS(response_pdata) %>%
select(
sample_barcode = case_submitter_id, cancer,
versus = drug.name, start_time = start.time,
response
) %>%
mutate(
response = as.factor(
ifelse(response %in% c("Complete Response", "Partial Response"),
"yes", "no"
)
),
cancer = sub("TCGA-", "", cancer)
)
kraken <- readRDS("data/knight_kraken_data.rds")
aliquots <- read_kraken_aliquots(kraken_meta)
results <- vector("list", nrow(features))
for (k in seq_len(nrow(features))) {
# For each (cancer, drug, genus) triplets
set.seed(eff_seeds[k])
microbial_sample <- find_microbial_sample(
kraken, aliquots, features[k, "genera", drop = TRUE]
)
data <- res %>%
semi_join(features[k, , drop = FALSE], by = c("cancer", "versus")) %>%
join_response_with_microbial(microbial_sample)
if (length(unique(data$response)) != 2) {
next # Don't test if there is only one response
}
p_value <- NA
direction <- NA
try({
wc <- wilcox_test(
formula = abundance ~ response, data = data,
distribution = "approximate"
)
if (TRUE || pvalue(wc) < 0.05) {
wc <- wilcox_test(
formula = abundance ~ response, data = data,
distribution = "exact"
)
}
if (pvalue(wc) < 0.05) {
ranks <- rank(data$abundance)
mean_rank_affected <- mean(ranks[data$response == "yes"], na.rm = TRUE)
mean_rank_unaffected <- mean(ranks[data$response == "no"], na.rm = TRUE)
direction <- ifelse(mean_rank_affected > mean_rank_unaffected, 1, -1)
}
p_value <- pvalue(wc)
})
results[[k]] <-
tibble(
features[k, , drop = FALSE],
univariate_direction = !!direction,
univariate_p_value = !!p_value,
seed = eff_seeds[k]
)
}
bind_rows(results) %>%
group_by(cancer, versus) %>%
mutate(univariate_fdr = p.adjust(univariate_p_value, "BH")) %>%
format_tsv() %>%
cat()