|
17 | 17 | #' @importFrom ggplot2 ggplot aes geom_point labs scale_color_continuous scale_size_continuous guides theme element_text element_blank
|
18 | 18 | #'
|
19 | 19 | #' @keywords internal
|
20 |
| -.dots_multi_plot <- function(multihyp_data, |
21 |
| - top=20, |
22 |
| - abrv=50, |
23 |
| - size_by=c("genesets", "significance", "overlap", "none"), |
24 |
| - pval_cutoff=1, |
25 |
| - fdr_cutoff=1, |
26 |
| - val=c("fdr", "pval"), |
27 |
| - title="") |
| 20 | +.dots_multi_plot <- function( |
| 21 | + multihyp_data, |
| 22 | + top = 20, |
| 23 | + abrv = 50, |
| 24 | + size_by = c("genesets", "significance", "none"), |
| 25 | + pval_cutoff = 1, |
| 26 | + fdr_cutoff = 1, |
| 27 | + val = c("fdr", "pval"), |
| 28 | + title = "") |
28 | 29 | {
|
29 |
| - # Default arguments |
30 |
| - val <- match.arg(val) |
31 |
| - size_by <- match.arg(size_by) |
32 |
| - |
33 |
| - # Count significant genesets across signatures |
34 |
| - multihyp_dfs <- lapply(multihyp_data, function(hyp_obj) { |
35 |
| - hyp_obj$data %>% |
36 |
| - dplyr::filter(pval <= pval_cutoff) %>% |
37 |
| - dplyr::filter(fdr <= fdr_cutoff) %>% |
38 |
| - dplyr::select(label) |
39 |
| - }) |
40 |
| - # Take top genesets |
41 |
| - labels <- names(sort(table(unlist(multihyp_dfs)), decreasing=TRUE)) |
42 |
| - if (!is.null(top)) labels <- head(labels, top) |
43 |
| - |
44 |
| - # Handle empty dataframes |
45 |
| - if (length(labels) == 0) return(ggempty()) |
46 |
| - |
47 |
| - # Create a multihyp dataframe |
48 |
| - dfs <- lapply(multihyp_data, function(hyp_obj) { |
49 |
| - hyp_df <- hyp_obj$data |
50 |
| - hyp_df[hyp_df$label %in% labels, c("label", val), drop=FALSE] |
51 |
| - }) |
52 |
| - # merge dataframes ('cbind') |
53 |
| - df <- suppressWarnings(Reduce(function(x, y) merge(x, y, by="label", all=TRUE), dfs)) |
54 |
| - colnames(df) <- c("label", names(dfs)) |
55 |
| - rownames(df) <- df$label |
56 |
| - df <- df[rev(labels), names(dfs)] |
57 |
| - |
58 |
| - # Abbreviate labels |
59 |
| - label.abrv <- substr(rownames(df), 1, abrv) |
60 |
| - if (any(duplicated(label.abrv))) { |
61 |
| - stop("Non-unique labels after abbreviating") |
62 |
| - } else { |
63 |
| - rownames(df) <- factor(label.abrv, levels=label.abrv) |
64 |
| - } |
65 |
| - if (val == "pval") { |
66 |
| - cutoff <- pval_cutoff |
67 |
| - color.label <- "P-Value" |
68 |
| - } |
69 |
| - if (val == "fdr") { |
70 |
| - cutoff <- fdr_cutoff |
71 |
| - color.label <- "FDR" |
72 |
| - } |
73 |
| - df.melted <- reshape2::melt(as.matrix(df)) |
74 |
| - colnames(df.melted) <- c("label", "signature", "significance") |
75 |
| - df.melted$size <- 1 |
76 |
| - |
77 |
| - if (size_by == "significance") { |
78 |
| - df.melted$size <- df.melted$significance |
79 |
| - } else if (size_by == "genesets") { |
80 |
| - geneset.sizes <- lapply(multihyp_data, function(hyp_obj) { |
81 |
| - hyp_obj$data[, c("label", "geneset")] |
82 |
| - }) %>% |
83 |
| - do.call(rbind, .) %>% |
84 |
| - dplyr::distinct(label, .keep_all=TRUE) %>% |
85 |
| - dplyr::pull(geneset, label) |
86 |
| - #df.melted$size <- geneset.sizes[df.melted$label] |
87 |
| - names(geneset.sizes) <- substr(names(geneset.sizes), 1, abrv) |
88 |
| - stopifnot( all(!is.na(match_idx <- match(df.melted$label,names(geneset.sizes)))) ) |
89 |
| - df.melted$size <- geneset.sizes[match_idx] |
90 |
| - } else if (size == "overlap") { |
91 |
| - stop( "size_by overlap not implemented yet") |
92 |
| - overlap.sizes <- lapply(multihyp_data, function(hyp_obj) { |
93 |
| - hyp_obj$data[, c("label", "overlap")] |
94 |
| - }) %>% |
95 |
| - do.call(rbind, .) %>% |
96 |
| - dplyr::distinct(label, .keep_all=TRUE) %>% |
97 |
| - dplyr::pull(overlap, label) |
98 |
| - #df.melted$size <- overlap.sizes[df.melted$label] |
99 |
| - names(overlap.sizes) <- substr(names(overlap.sizes), 1, abrv) |
100 |
| - stopifnot( all(!is.na(match_idx <- match(df.melted$label,names(overlap.sizes)))) ) |
101 |
| - df.melted$size <- overlap.sizes[match_idx] |
102 |
| - } |
103 |
| - p <- df.melted %>% |
104 |
| - dplyr::filter(significance <= cutoff) %>% |
105 |
| - ggplot(aes(x = signature, y = label, color = significance, size = size)) + |
106 |
| - geom_point() + |
107 |
| - scale_color_continuous(low = "#114357", high = "#E53935", trans = .reverselog_trans(10)) + |
108 |
| - labs(title = title, color = color.label) + |
109 |
| - theme( |
110 |
| - plot.title = element_text(hjust = 0.5), |
111 |
| - axis.title.y = element_blank(), |
112 |
| - axis.title.x = element_blank(), |
113 |
| - axis.text.x = element_text(angle = 45, hjust = 1) |
114 |
| - ) |
115 |
| - if (size_by == "none") { |
116 |
| - p <- p + guides(size="none") |
117 |
| - } else if (size_by == "significance") { |
118 |
| - p <- p + scale_size_continuous(trans=.reverselog_trans(10)) + labs(size="Significance") |
119 |
| - } else if (size_by == "genesets" ) { |
120 |
| - p <- p + scale_size_continuous(trans=scales::log10_trans()) + labs(size="Genesets\nSize") |
121 |
| - } else if (size_by == "overlap" ) { |
122 |
| - p <- p + scale_size_continuous(trans=scales::log10_trans()) + labs(size="Overlap\nSize") |
123 |
| - } else { |
124 |
| - stop("unrecognized 'size_by':", size_by) |
125 |
| - } |
126 |
| - return(p) |
| 30 | + # Default arguments |
| 31 | + val <- match.arg(val) |
| 32 | + size_by <- match.arg(size_by) |
| 33 | + |
| 34 | + # Count significant genesets across signatures |
| 35 | + multihyp_dfs <- lapply(multihyp_data, function(hyp_obj) { |
| 36 | + hyp_obj$data %>% |
| 37 | + dplyr::filter(pval <= pval_cutoff) %>% |
| 38 | + dplyr::filter(fdr <= fdr_cutoff) %>% |
| 39 | + dplyr::select(label) |
| 40 | + }) |
| 41 | + # Take top genesets |
| 42 | + labels <- names(sort(table(unlist(multihyp_dfs)), decreasing = TRUE)) |
| 43 | + if (!is.null(top)) labels <- head(labels, top) |
| 44 | + |
| 45 | + # Handle empty dataframes |
| 46 | + if (length(labels) == 0) { |
| 47 | + return(ggempty()) |
| 48 | + } |
| 49 | + # Create a multihyp dataframe |
| 50 | + dfs <- lapply(multihyp_data, function(hyp_obj) { |
| 51 | + hyp_df <- hyp_obj$data |
| 52 | + hyp_df[hyp_df$label %in% labels, c("label", val), drop = FALSE] |
| 53 | + }) |
| 54 | + df <- suppressWarnings(Reduce(function(x, y) merge(x, y, by = "label", all = TRUE), dfs)) |
| 55 | + colnames(df) <- c("label", names(dfs)) |
| 56 | + rownames(df) <- df$label |
| 57 | + df <- df[rev(labels), names(dfs)] |
| 58 | + |
| 59 | + # Abbreviate labels |
| 60 | + label.abrv <- substr(rownames(df), 1, abrv) |
| 61 | + if (any(duplicated(label.abrv))) { |
| 62 | + stop("Non-unique labels after abbreviating") |
| 63 | + } else { |
| 64 | + rownames(df) <- factor(label.abrv, levels = label.abrv) |
| 65 | + } |
| 66 | + if (val == "pval") { |
| 67 | + cutoff <- pval_cutoff |
| 68 | + color.label <- "P-Value" |
| 69 | + } |
| 70 | + if (val == "fdr") { |
| 71 | + cutoff <- fdr_cutoff |
| 72 | + color.label <- "FDR" |
| 73 | + } |
| 74 | + df.melted <- reshape2::melt(as.matrix(df)) |
| 75 | + colnames(df.melted) <- c("label", "signature", "significance") |
| 76 | + df.melted$size <- 1 |
| 77 | + |
| 78 | + if (size_by == "significance") { |
| 79 | + df.melted$size <- df.melted$significance |
| 80 | + } |
| 81 | + if (size_by == "genesets") { |
| 82 | + geneset.sizes <- lapply(multihyp_data, function(hyp_obj) { |
| 83 | + hyp_obj$data[, c("label", "geneset")] |
| 84 | + }) %>% |
| 85 | + do.call(rbind, .) %>% |
| 86 | + dplyr::distinct(label, .keep_all = TRUE) %>% |
| 87 | + dplyr::pull(geneset, label) |
| 88 | + df.melted$size <- geneset.sizes[df.melted$label] |
| 89 | + } |
| 90 | + p <- df.melted %>% |
| 91 | + dplyr::filter(significance <= cutoff) %>% |
| 92 | + ggplot(aes(x = signature, y = label, color = significance, size = size)) + |
| 93 | + geom_point() + |
| 94 | + scale_color_continuous(low = "#114357", high = "#E53935", trans = .reverselog_trans(10)) + |
| 95 | + labs(title = title, color = color.label) + |
| 96 | + theme( |
| 97 | + plot.title = element_text(hjust = 0.5), |
| 98 | + axis.title.y = element_blank(), |
| 99 | + axis.title.x = element_blank(), |
| 100 | + axis.text.x = element_text(angle = 45, hjust = 1) |
| 101 | + ) |
| 102 | + if (size_by == "none") { |
| 103 | + p <- p + guides(size = "none") |
| 104 | + } else if (size_by == "significance") { |
| 105 | + p <- p + scale_size_continuous(trans = .reverselog_trans(10)) + labs(size = "Significance") |
| 106 | + } else if (size_by == "genesets") { |
| 107 | + p <- p + scale_size_continuous(trans = scales::log10_trans()) + labs(size = "Genesets\nSize") |
| 108 | + # p <- p + scale_color_continuous( |
| 109 | + # high = "#114357", low = "#E53935", trans = scales::log10_trans(), |
| 110 | + # guide = guide_colorbar(reverse = TRUE) |
| 111 | + #) |
| 112 | + } |
| 113 | + return(p) |
127 | 114 | }
|
128 | 115 |
|
129 | 116 | #' Plot top enriched genesets
|
|
0 commit comments