Skip to content

Commit 1465315

Browse files
committed
hyp_dots
2 parents 1e26e67 + 1aad26a commit 1465315

File tree

1 file changed

+93
-106
lines changed

1 file changed

+93
-106
lines changed

R/hyp_dots.R

Lines changed: 93 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -17,113 +17,100 @@
1717
#' @importFrom ggplot2 ggplot aes geom_point labs scale_color_continuous scale_size_continuous guides theme element_text element_blank
1818
#'
1919
#' @keywords internal
20-
.dots_multi_plot <- function(multihyp_data,
21-
top=20,
22-
abrv=50,
23-
size_by=c("genesets", "significance", "overlap", "none"),
24-
pval_cutoff=1,
25-
fdr_cutoff=1,
26-
val=c("fdr", "pval"),
27-
title="")
20+
.dots_multi_plot <- function(
21+
multihyp_data,
22+
top = 20,
23+
abrv = 50,
24+
size_by = c("genesets", "significance", "none"),
25+
pval_cutoff = 1,
26+
fdr_cutoff = 1,
27+
val = c("fdr", "pval"),
28+
title = "")
2829
{
29-
# Default arguments
30-
val <- match.arg(val)
31-
size_by <- match.arg(size_by)
32-
33-
# Count significant genesets across signatures
34-
multihyp_dfs <- lapply(multihyp_data, function(hyp_obj) {
35-
hyp_obj$data %>%
36-
dplyr::filter(pval <= pval_cutoff) %>%
37-
dplyr::filter(fdr <= fdr_cutoff) %>%
38-
dplyr::select(label)
39-
})
40-
# Take top genesets
41-
labels <- names(sort(table(unlist(multihyp_dfs)), decreasing=TRUE))
42-
if (!is.null(top)) labels <- head(labels, top)
43-
44-
# Handle empty dataframes
45-
if (length(labels) == 0) return(ggempty())
46-
47-
# Create a multihyp dataframe
48-
dfs <- lapply(multihyp_data, function(hyp_obj) {
49-
hyp_df <- hyp_obj$data
50-
hyp_df[hyp_df$label %in% labels, c("label", val), drop=FALSE]
51-
})
52-
# merge dataframes ('cbind')
53-
df <- suppressWarnings(Reduce(function(x, y) merge(x, y, by="label", all=TRUE), dfs))
54-
colnames(df) <- c("label", names(dfs))
55-
rownames(df) <- df$label
56-
df <- df[rev(labels), names(dfs)]
57-
58-
# Abbreviate labels
59-
label.abrv <- substr(rownames(df), 1, abrv)
60-
if (any(duplicated(label.abrv))) {
61-
stop("Non-unique labels after abbreviating")
62-
} else {
63-
rownames(df) <- factor(label.abrv, levels=label.abrv)
64-
}
65-
if (val == "pval") {
66-
cutoff <- pval_cutoff
67-
color.label <- "P-Value"
68-
}
69-
if (val == "fdr") {
70-
cutoff <- fdr_cutoff
71-
color.label <- "FDR"
72-
}
73-
df.melted <- reshape2::melt(as.matrix(df))
74-
colnames(df.melted) <- c("label", "signature", "significance")
75-
df.melted$size <- 1
76-
77-
if (size_by == "significance") {
78-
df.melted$size <- df.melted$significance
79-
} else if (size_by == "genesets") {
80-
geneset.sizes <- lapply(multihyp_data, function(hyp_obj) {
81-
hyp_obj$data[, c("label", "geneset")]
82-
}) %>%
83-
do.call(rbind, .) %>%
84-
dplyr::distinct(label, .keep_all=TRUE) %>%
85-
dplyr::pull(geneset, label)
86-
#df.melted$size <- geneset.sizes[df.melted$label]
87-
names(geneset.sizes) <- substr(names(geneset.sizes), 1, abrv)
88-
stopifnot( all(!is.na(match_idx <- match(df.melted$label,names(geneset.sizes)))) )
89-
df.melted$size <- geneset.sizes[match_idx]
90-
} else if (size == "overlap") {
91-
stop( "size_by overlap not implemented yet")
92-
overlap.sizes <- lapply(multihyp_data, function(hyp_obj) {
93-
hyp_obj$data[, c("label", "overlap")]
94-
}) %>%
95-
do.call(rbind, .) %>%
96-
dplyr::distinct(label, .keep_all=TRUE) %>%
97-
dplyr::pull(overlap, label)
98-
#df.melted$size <- overlap.sizes[df.melted$label]
99-
names(overlap.sizes) <- substr(names(overlap.sizes), 1, abrv)
100-
stopifnot( all(!is.na(match_idx <- match(df.melted$label,names(overlap.sizes)))) )
101-
df.melted$size <- overlap.sizes[match_idx]
102-
}
103-
p <- df.melted %>%
104-
dplyr::filter(significance <= cutoff) %>%
105-
ggplot(aes(x = signature, y = label, color = significance, size = size)) +
106-
geom_point() +
107-
scale_color_continuous(low = "#114357", high = "#E53935", trans = .reverselog_trans(10)) +
108-
labs(title = title, color = color.label) +
109-
theme(
110-
plot.title = element_text(hjust = 0.5),
111-
axis.title.y = element_blank(),
112-
axis.title.x = element_blank(),
113-
axis.text.x = element_text(angle = 45, hjust = 1)
114-
)
115-
if (size_by == "none") {
116-
p <- p + guides(size="none")
117-
} else if (size_by == "significance") {
118-
p <- p + scale_size_continuous(trans=.reverselog_trans(10)) + labs(size="Significance")
119-
} else if (size_by == "genesets" ) {
120-
p <- p + scale_size_continuous(trans=scales::log10_trans()) + labs(size="Genesets\nSize")
121-
} else if (size_by == "overlap" ) {
122-
p <- p + scale_size_continuous(trans=scales::log10_trans()) + labs(size="Overlap\nSize")
123-
} else {
124-
stop("unrecognized 'size_by':", size_by)
125-
}
126-
return(p)
30+
# Default arguments
31+
val <- match.arg(val)
32+
size_by <- match.arg(size_by)
33+
34+
# Count significant genesets across signatures
35+
multihyp_dfs <- lapply(multihyp_data, function(hyp_obj) {
36+
hyp_obj$data %>%
37+
dplyr::filter(pval <= pval_cutoff) %>%
38+
dplyr::filter(fdr <= fdr_cutoff) %>%
39+
dplyr::select(label)
40+
})
41+
# Take top genesets
42+
labels <- names(sort(table(unlist(multihyp_dfs)), decreasing = TRUE))
43+
if (!is.null(top)) labels <- head(labels, top)
44+
45+
# Handle empty dataframes
46+
if (length(labels) == 0) {
47+
return(ggempty())
48+
}
49+
# Create a multihyp dataframe
50+
dfs <- lapply(multihyp_data, function(hyp_obj) {
51+
hyp_df <- hyp_obj$data
52+
hyp_df[hyp_df$label %in% labels, c("label", val), drop = FALSE]
53+
})
54+
df <- suppressWarnings(Reduce(function(x, y) merge(x, y, by = "label", all = TRUE), dfs))
55+
colnames(df) <- c("label", names(dfs))
56+
rownames(df) <- df$label
57+
df <- df[rev(labels), names(dfs)]
58+
59+
# Abbreviate labels
60+
label.abrv <- substr(rownames(df), 1, abrv)
61+
if (any(duplicated(label.abrv))) {
62+
stop("Non-unique labels after abbreviating")
63+
} else {
64+
rownames(df) <- factor(label.abrv, levels = label.abrv)
65+
}
66+
if (val == "pval") {
67+
cutoff <- pval_cutoff
68+
color.label <- "P-Value"
69+
}
70+
if (val == "fdr") {
71+
cutoff <- fdr_cutoff
72+
color.label <- "FDR"
73+
}
74+
df.melted <- reshape2::melt(as.matrix(df))
75+
colnames(df.melted) <- c("label", "signature", "significance")
76+
df.melted$size <- 1
77+
78+
if (size_by == "significance") {
79+
df.melted$size <- df.melted$significance
80+
}
81+
if (size_by == "genesets") {
82+
geneset.sizes <- lapply(multihyp_data, function(hyp_obj) {
83+
hyp_obj$data[, c("label", "geneset")]
84+
}) %>%
85+
do.call(rbind, .) %>%
86+
dplyr::distinct(label, .keep_all = TRUE) %>%
87+
dplyr::pull(geneset, label)
88+
df.melted$size <- geneset.sizes[df.melted$label]
89+
}
90+
p <- df.melted %>%
91+
dplyr::filter(significance <= cutoff) %>%
92+
ggplot(aes(x = signature, y = label, color = significance, size = size)) +
93+
geom_point() +
94+
scale_color_continuous(low = "#114357", high = "#E53935", trans = .reverselog_trans(10)) +
95+
labs(title = title, color = color.label) +
96+
theme(
97+
plot.title = element_text(hjust = 0.5),
98+
axis.title.y = element_blank(),
99+
axis.title.x = element_blank(),
100+
axis.text.x = element_text(angle = 45, hjust = 1)
101+
)
102+
if (size_by == "none") {
103+
p <- p + guides(size = "none")
104+
} else if (size_by == "significance") {
105+
p <- p + scale_size_continuous(trans = .reverselog_trans(10)) + labs(size = "Significance")
106+
} else if (size_by == "genesets") {
107+
p <- p + scale_size_continuous(trans = scales::log10_trans()) + labs(size = "Genesets\nSize")
108+
# p <- p + scale_color_continuous(
109+
# high = "#114357", low = "#E53935", trans = scales::log10_trans(),
110+
# guide = guide_colorbar(reverse = TRUE)
111+
#)
112+
}
113+
return(p)
127114
}
128115

129116
#' Plot top enriched genesets

0 commit comments

Comments
 (0)