Skip to content

Deseq plot functions #83

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion elsasserlib/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: elsasserlib
Type: Package
Title: General utilities used within Elsasser lab
Version: 1.1.1
Version: 1.1.2
Authors@R: c(
person("Carmen", "Navarro",
email = "carmen.navarro@scilifelab.se",
Expand Down
2 changes: 2 additions & 0 deletions elsasserlib/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export(bw_profile)
export(mean_ratio_norm)
export(palette_categorical)
export(plot_bw_bed_summary_heatmap)
export(plot_bw_bins_diff_scatter)
export(plot_bw_bins_diff_volcano)
export(plot_bw_bins_scatter)
export(plot_bw_bins_violin)
export(plot_bw_profile)
Expand Down
168 changes: 168 additions & 0 deletions elsasserlib/R/bwplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,174 @@ plot_bw_profile <- function(bwfiles,
}


#' Scatter plot with significance testing.
#'
#' Make a scatterplot where bins that pass a specific statistical significance
#' threshold across replicates are highlighted.
#'
#' @param bwfiles_c1 Condition 1 bigwig files.
#' @param bwfiles_c2 Condition 2 bigwig files.
#' @param label_c1 Label for condition 1.
#' @param label_c2 Label for condition 2.
#' @param bg_bwfiles_c1 Optional condition 1 bigwig input (background) files.
#' @param bg_bwfiles_c2 Optional condition 2 bigwig input (background) files.
#' @param bin_size Bin size.
#' @param genome Genome used. Must be one of supported.
#' @param estimate_size_factors If True, the estimateSizeFactors step from
#' DESeq2 is not skipped.
#' @param pval_cutoff P-value cutoff.
#' @param logfc_cutoff Log fold change cutoff. If logfc_cutoff is negative,
#' bins under logfc_cutoff will be highlighted.
#' @inheritParams plot_bw_bins_scatter
#' @return ggplot object
#' @export
plot_bw_bins_diff_scatter <- function(bwfiles_c1,
bwfiles_c2,
label_c1,
label_c2,
bg_bwfiles_c1 = NULL,
bg_bwfiles_c2 = NULL,
bin_size = 10000,
genome = "mm9",
estimate_size_factors = FALSE,
pval_cutoff = NULL,
logfc_cutoff = NULL,
norm_func_x = identity,
norm_func_y = identity) {

stats <- bw_bins_diff_analysis(bwfiles_c1,
bwfiles_c2,
label_c1,
label_c2,
bin_size = bin_size,
genome = genome,
estimate_size_factors = estimate_size_factors,
keep_values = TRUE
)

# Mean replicates
stats$mean_c1 <- rowMeans(data.frame(stats[, basename(bwfiles_c1)]))
stats$mean_c2 <- rowMeans(data.frame(stats[, basename(bwfiles_c2)]))

if (! is.null(bg_bwfiles_c1) ) {
bg_values_x <- bw_bins(bg_bwfiles_c1,
bin_size = bin_size,
genome = genome)

bg_values_x$mean <- rowMeans(data.frame(bg_values_x)[, basename(bg_bwfiles_c1)])

stats$mean_c1 <- norm_func_x(stats$mean_c1 / bg_values_x$mean)
}

if (! is.null(bg_bwfiles_c2) ) {
bg_values_y <- bw_bins(bg_bwfiles_c2,
bin_size = bin_size,
genome = genome)
bg_values_y$mean <- rowMeans(data.frame(bg_values_y)[, basename(bg_bwfiles_c2)])

stats$mean_c2 <- norm_func_x(stats$mean_c2 / bg_values_y$mean)
}

# Replace NAs with 1s so rows are not removed from matrix
stats_filtered <- stats
stats_filtered$padj <- ifelse(is.na(stats$padj), 1, stats$padj)

if (!is.null(pval_cutoff)) {
stats_filtered <- stats_filtered[stats_filtered$padj < pval_cutoff, ]
}
if (!is.null(logfc_cutoff)) {
if (logfc_cutoff > 0) {
stats_filtered <- stats_filtered[stats_filtered$log2FoldChange > logfc_cutoff, ]
}
else {
stats_filtered <- stats_filtered[stats_filtered$log2FoldChange < logfc_cutoff, ]
}
}

stats_df <- data.frame(stats)
stats_filtered_df <- data.frame(stats_filtered)

ggplot(stats_df, aes_string(x = "mean_c1", y = "mean_c2")) +
geom_point(color = "#cccccc", alpha = 0.6) +
theme_elsasserlab_screen(base_size = 18) +
geom_point(data = stats_filtered_df, aes_string(x = "mean_c1", y = "mean_c2"), color = "#F08080") +
xlab(label_c1) +
ylab(label_c2) +
ggtitle(paste("Bin coverage (bin_size = ", bin_size, ")", sep = ""))
}


#' Volcano plot for differential analysis of genome-wide bins
#'
#' Make a volcano plot where bins that pass a specific statistical significance
#' threshold across replicates are highlighted.
#'
#' @param bwfiles_c1 Condition 1 bigwig files.
#' @param bwfiles_c2 Condition 2 bigwig files.
#' @param label_c1 Label for condition 1.
#' @param label_c2 Label for condition 2.
#' @param bin_size Bin size.
#' @param genome Genome used. Must be one of supported.
#' @param estimate_size_factors If True, the estimateSizeFactors step from
#' DESeq2 is not skipped.
#' @param pval_cutoff P-value cutoff.
#' @param logfc_cutoff Log fold change cutoff. In volcano plot this cutoff is
#' used as absolute value (-logfc cutoff and + logfc cutoff are selected).
#' @return ggplot object
#' @export
plot_bw_bins_diff_volcano <- function(bwfiles_c1,
bwfiles_c2,
label_c1,
label_c2,
bin_size = 10000,
genome = "mm9",
estimate_size_factors = FALSE,
pval_cutoff = NULL,
logfc_cutoff = NULL) {

stats <- bw_bins_diff_analysis(bwfiles_c1,
bwfiles_c2,
label_c1,
label_c2,
bin_size = bin_size,
genome = genome,
estimate_size_factors = estimate_size_factors,
keep_values = TRUE
)

# Replace NAs with 1s so rows are not removed from matrix
stats$padj <- ifelse(is.na(stats$padj), 1, stats$padj)
stats$log10padj <- -log10(stats$padj)

stats_filtered <- stats
extra_lines_pval <- NULL
extra_lines_fc <- NULL

if (!is.null(pval_cutoff)) {
stats_filtered <- stats_filtered[stats_filtered$padj < pval_cutoff, ]
extra_lines_pval <- geom_hline(yintercept = -log10(pval_cutoff), linetype = "dashed")
}
if (!is.null(logfc_cutoff)) {
stats_filtered <- stats_filtered[abs(stats_filtered$log2FoldChange) > logfc_cutoff, ]
extra_lines_fc <- geom_vline(xintercept = c(-logfc_cutoff, logfc_cutoff), linetype = "dashed")
}

stats_df <- data.frame(stats)
stats_filtered_df <- data.frame(stats_filtered)

ggplot(stats_df, aes_string(x = "log2FoldChange", y = "log10padj")) +
geom_point(color = "#cccccc", alpha = 0.6) +
theme_elsasserlab_screen(base_size = 18) +
geom_point(data = stats_filtered_df, aes_string(x = "log2FoldChange", y = "log10padj"), color = "#F08080") +
xlab("log2 fold change") +
ylab("-log10 p-value") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle(paste("Volcano plot - ", label_c1, "vs", label_c2, "(bin_size = ", bin_size, ")", sep = "")) +
extra_lines_pval + extra_lines_fc

}


#' Plot a pretty heatmap using pheatmap library
#'
#' This function ignores NA values to calculate min and max values.
Expand Down
27 changes: 21 additions & 6 deletions elsasserlib/R/bwstats.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @param bwfiles_c2 Path or array of paths to the bigWig files for second condition.
#' @param genome Genome. Available choices are mm9, hg38.
#' @param bin_size Bin size.
#' @param keep_values Keep individually calculated values.
#' @inheritParams bw_granges_diff_analysis
#' @return a DESeqResults object as returned by DESeq2::results function
#' @export
Expand All @@ -18,13 +19,15 @@ bw_bins_diff_analysis <- function(bwfiles_c1,
label_c2,
bin_size = 10000,
genome = "mm9",
estimate_size_factors = FALSE) {
estimate_size_factors = FALSE,
keep_values = FALSE) {

bins_c1 <- bw_bins(bwfiles_c1, genome = genome, bin_size = bin_size)
bins_c2 <- bw_bins(bwfiles_c2, genome = genome, bin_size = bin_size)

bw_granges_diff_analysis(bins_c1, bins_c2, label_c1, label_c2,
estimate_size_factors = estimate_size_factors)
estimate_size_factors = estimate_size_factors,
keep_values = keep_values)
}

#' Run DESeq2 analysis on bed file
Expand All @@ -45,13 +48,15 @@ bw_bed_diff_analysis <- function(bwfiles_c1,
bedfile,
label_c1,
label_c2,
estimate_size_factors = FALSE) {
estimate_size_factors = FALSE,
keep_values = FALSE) {

loci_c1 <- bw_bed(bwfiles_c1, bedfile = bedfile)
loci_c2 <- bw_bed(bwfiles_c2, bedfile = bedfile)

bw_granges_diff_analysis(loci_c1, loci_c2, label_c1, label_c2,
estimate_size_factors = estimate_size_factors)
estimate_size_factors = estimate_size_factors,
keep_values = keep_values)
}


Expand All @@ -69,14 +74,18 @@ bw_bed_diff_analysis <- function(bwfiles_c1,
#' @param label_c2 Condition name for condition 2.
#' @param estimate_size_factors If TRUE, normal DESeq2 procedure is done. Set it
#' to true to analyze non-MINUTE data.
#' @param keep_values If true, results also include the separate bin values.
#' This is used to avoid recalculation in plotting functions, but set to
#' false by default.
#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateDispersions nbinomWaldTest `sizeFactors<-` results estimateSizeFactors
#' @return a DESeqResults object as returned by DESeq2::results function
#' @export
bw_granges_diff_analysis <- function(granges_c1,
granges_c2,
label_c1,
label_c2,
estimate_size_factors = FALSE) {
estimate_size_factors = FALSE,
keep_values = FALSE) {

# Bind first, get numbers after (drop complete cases separately could cause error)
granges_c1 <- sortSeqlevels(granges_c1)
Expand Down Expand Up @@ -111,7 +120,13 @@ bw_granges_diff_analysis <- function(granges_c1,
dds <- estimateDispersions(dds)
dds <- nbinomWaldTest(dds)

results(dds)
stats_results <- results(dds)

if (keep_values == TRUE) {
stats_results[, colnames(cts_df)] <- cts_df
}

stats_results
}


Expand Down
7 changes: 6 additions & 1 deletion elsasserlib/man/bw_bed_diff_analysis.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion elsasserlib/man/bw_bins_diff_analysis.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion elsasserlib/man/bw_granges_diff_analysis.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 58 additions & 0 deletions elsasserlib/man/plot_bw_bins_diff_scatter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading