Skip to content

Commit

Permalink
fixed smaller bugs about the display of the limma result category 2 a…
Browse files Browse the repository at this point in the history
…nd 3 in the cluster_hits report, and fixed bug aboug sorting the feature names for those categories
  • Loading branch information
Thomas-Rauter committed Oct 2, 2024
1 parent 1e907b7 commit 3a4b58f
Show file tree
Hide file tree
Showing 21 changed files with 291 additions and 107 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
^docs$
^tools$
^doc$
^SplineOmics\.BiocCheck$
^Meta$
^docker$
^dev$
Expand Down
40 changes: 31 additions & 9 deletions R/cluster_hits.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ cluster_hits <- function(
genes = genes,
spline_params = spline_params,
adj_pthresholds = adj_pthresholds,
adj_pthresh_avrg_diff_conditions = adj_pthresh_avrg_diff_conditions,
adj_pthresh_interaction_condition_time =
adj_pthresh_interaction_condition_time,
report_dir = report_dir,
mode = mode,
report_info = report_info,
Expand Down Expand Up @@ -438,6 +441,8 @@ perform_clustering <- function(
#' @param adj_pthresholds Numeric vector, containing a float < 1 > 0 as each
#' value. There is one float for every level, and this is
#' the adj. p-value threshold.
#' @param adj_pthresh_avrg_diff_conditions Float
#' @param adj_pthresh_interaction_condition_time Float
#' @param report_dir A character string specifying the report directory.
#' @param mode A character string specifying the mode
#' ('isolated' or 'integrated').
Expand Down Expand Up @@ -489,6 +494,8 @@ make_clustering_report <- function(
genes,
spline_params,
adj_pthresholds,
adj_pthresh_avrg_diff_conditions,
adj_pthresh_interaction_condition_time,
report_dir,
mode,
report_info,
Expand Down Expand Up @@ -727,6 +734,9 @@ make_clustering_report <- function(
topTables = topTables,
enrichr_format = enrichr_format,
adj_pthresholds = adj_pthresholds,
adj_pthresh_avrg_diff_conditions = adj_pthresh_avrg_diff_conditions,
adj_pthresh_interaction_condition_time =
adj_pthresh_interaction_condition_time,
report_type = "cluster_hits",
feature_name_columns = feature_name_columns,
mode = mode,
Expand Down Expand Up @@ -1194,7 +1204,7 @@ remove_batch_effect_cluster_hits <- function(
level <- unique_levels[level_index]
level_columns <- which(meta[[condition]] == level)
data_copy <- data[, level_columns]
meta_copy <- subset(meta, meta[[condition]] == level)
meta_copy <- meta[meta[[condition]] == level, , drop = FALSE]
} else {
data_copy <- data # Take the full data for mode == "integrated"
meta_copy <- meta
Expand Down Expand Up @@ -1229,7 +1239,7 @@ remove_batch_effect_cluster_hits <- function(
if (mode == "isolated") {

level <- unique_levels[level_index]
meta_copy <- subset(meta, meta[[condition]] == level)
meta_copy <- meta[meta[[condition]] == level, , drop = FALSE]

if (!is.null(meta_batch2_column) &&
length(unique(meta_copy[[meta_batch2_column]])) > 1) {
Expand All @@ -1243,10 +1253,16 @@ remove_batch_effect_cluster_hits <- function(
}
}

data_copy <- do.call(limma::removeBatchEffect, args)
data_copy <- do.call(
limma::removeBatchEffect,
args
)

# For mode == "integrated", all elements are identical
datas <- c(datas, list(data_copy))
datas <- c(
datas,
list(data_copy)
)
}

} else { # no meta batch column specified, just return right data
Expand Down Expand Up @@ -2071,9 +2087,13 @@ plot_spline_comparisons <- function(
adj_pthresh_interaction
) {

# Sort and prepare data
# Sort and prepare data (sorting based on feature name for easy navigation)
time_effect_1 <- time_effect_1 |> dplyr::arrange(.data$feature_names)
time_effect_2 <- time_effect_2 |> dplyr::arrange(.data$feature_names)
avrg_diff_conditions <-
avrg_diff_conditions |> dplyr::arrange(.data$feature_names)
interaction_condition_time <-
interaction_condition_time |> dplyr::arrange(.data$feature_names)

# Get relevant parameters
DoF <- which(names(time_effect_1) == "AveExpr") - 1
Expand Down Expand Up @@ -2350,6 +2370,8 @@ prepare_gene_lists_for_enrichr <- function(
#' @param level_headers_info A list of header information for each level.
#' @param spline_params A list of spline parameters.
#' @param adj_pthresholds Float vector with values for any level for adj.p.tresh
#' @param adj_pthresh_avrg_diff_conditions Float
#' @param adj_pthresh_interaction_condition_time Float
#' @param mode A character string specifying the mode
#' ('isolated' or 'integrated').
#' @param report_info A named list containg the report info fields. Here used
Expand All @@ -2370,6 +2392,8 @@ build_cluster_hits_report <- function(
level_headers_info,
spline_params,
adj_pthresholds,
adj_pthresh_avrg_diff_conditions,
adj_pthresh_interaction_condition_time,
mode,
report_info,
output_file_path
Expand Down Expand Up @@ -2578,11 +2602,9 @@ build_cluster_hits_report <- function(

pb$tick()
}

# Add sections for limma_result_2_and_3_plots
if (!is.null(limma_result_2_and_3_plots)) {
adj_pthresh_avrg_diff_conditions <- 0.05
adj_pthresh_interaction_condition_time <- 0.05
if (length(limma_result_2_and_3_plots) > 0) {
# Create a new main header for the limma result plots
header_index <- header_index + 1

Expand Down
8 changes: 5 additions & 3 deletions R/run_limma_splines.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ between_level <- function(
condition,
compared_levels[2]
)

condition_only <- limma::topTable(
fit,
coef = factor_only_contrast_coeff,
Expand All @@ -258,6 +259,7 @@ between_level <- function(
top_table = condition_only,
fit = fit
)

top_table_condition_only <- process_top_table(
condition_only_resuls,
feature_names
Expand All @@ -277,7 +279,7 @@ between_level <- function(
":X",
seq_len(num_matching_columns)
)

condition_time <- limma::topTable(
fit,
coef = factor_time_contrast_coeffs,
Expand Down Expand Up @@ -342,11 +344,11 @@ within_level <- function(
padjust_method,
mode
) {

if (mode == "isolated") {
samples <- which(meta[[condition]] == level)
data_copy <- data[, samples]
meta_copy <- subset(meta, meta[[condition]] == level)
meta_copy <- meta[meta[[condition]] == level, , drop = FALSE]
} else { # mode == "integrated"
data_copy <- data
meta_copy <- meta
Expand Down
52 changes: 43 additions & 9 deletions R/utils_input_validation.R
Original file line number Diff line number Diff line change
Expand Up @@ -422,10 +422,17 @@ InputControl <- R6::R6Class("InputControl",
check_design_formula = function() {

formula <- self$args[["design"]]
meta <- self$args$meta
meta_index <- self$args$meta_index
meta <- self$args[["meta"]]
meta_index <- self$args[["meta_index"]]

# Not strictly required
meta_batch_column <- self$args[["meta_batch_column"]]
meta_batch2_column <- self$args[["meta_batch2_column"]]

required_args <- list(formula, meta)
required_args <- list(
formula,
meta
)

if (any(sapply(required_args, is.null))) {
return(NULL)
Expand Down Expand Up @@ -475,16 +482,43 @@ InputControl <- R6::R6Class("InputControl",
missing_columns <- setdiff(formula_terms, names(meta))
if (length(missing_columns) > 0) {
if (!is.null(meta_index)) {
stop(sprintf("%s (data/meta pair index: %s): %s",
stop_call_false(sprintf("%s (data/meta pair index: %s): %s",
"The following design columns are missing in meta",
meta_index,
paste(missing_columns, collapse = ", ")),
call. = FALSE)
paste(missing_columns, collapse = ", ")))

} else {
stop(paste("The following design columns are missing in meta:",
paste(missing_columns, collapse = ", ")),
call. = FALSE)
stop_call_false(paste("The following design columns are missing in meta:",
paste(missing_columns, collapse = ", ")))
}
}

# Convert formula to string for regex checking
formula_str <- as.character(formula)

# Check if batch column is provided and validate its presence in the formula
if (!is.null(meta_batch_column)) {
if (!grepl(meta_batch_column, formula_str)) {
stop_call_false(
paste("The batch effect column", meta_batch_column,
"is provided but not present in the design formula. ",
"Please ensure that if you specify a batch column, ",
"it is included in the design formula to",
"remove batch effects.")
)
}
}

# Check if the second batch column is provided and validate its presence
if (!is.null(meta_batch2_column)) {
if (!grepl(meta_batch2_column, formula_str)) {
stop_call_false(
paste("The second batch effect column", meta_batch2_column,
"is provided but not present in the design formula.",
"Please ensure that if you specify a second batch column,",
"it is included in the design formula",
"to remove batch effects.")
)
}
}

Expand Down
27 changes: 22 additions & 5 deletions R/utils_report_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#' @param spline_params A list of spline parameters, such as dof and type.
#' @param adj_pthresholds Numeric vector with the values for the adj.p.tresholds
#' for each level.
#' @param adj_pthresh_avrg_diff_conditions Float, only for cluster_hits()
#' @param adj_pthresh_interaction_condition_time Float, only for cluster_hits()
#' @param report_type A character string specifying the report type
#' ('screen_limma_hyperparams' or 'cluster_hits').
#' @param feature_name_columns Character vector with the column names of the
Expand Down Expand Up @@ -69,6 +71,8 @@ generate_report_html <- function(
level_headers_info = NA,
spline_params = NA,
adj_pthresholds = NA,
adj_pthresh_avrg_diff_conditions = NA, # only for cluster_hits()
adj_pthresh_interaction_condition_time = NA, # only for cluster_hits()
report_type = "explore_data",
feature_name_columns = NA, # only for cluster_hits()
mode = NA,
Expand All @@ -79,7 +83,7 @@ generate_report_html <- function(
) {

feature_names_formula <- ""

if (report_type == "explore_data") {
if (filename == "explore_data") {
title <- "explore data"
Expand Down Expand Up @@ -142,6 +146,7 @@ generate_report_html <- function(
"meta_condition",
"meta_batch",
"limma_design",
"mode",
"analyst_name",
"contact_info",
"project_name",
Expand Down Expand Up @@ -187,7 +192,9 @@ generate_report_html <- function(
'<hr><h2 style="font-size: 48px;">',
'Downloads \U0001F4E5</h2><table>'
)


report_info[["mode"]] <- mode # Because mode is not part of report_info

for (field in report_info_fields) {
base64_df <- process_field(
field,
Expand All @@ -200,7 +207,12 @@ generate_report_html <- function(
enrichr_format
)

field_display <- sprintf("%-*s", max_field_length, gsub("_", " ", field))
field_display <- sprintf(
"%-*s",
max_field_length,
gsub("_", " ", field)
)

report_info_section <- paste(
report_info_section,
sprintf('<tr><td style="text-align: right;
Expand Down Expand Up @@ -320,6 +332,9 @@ generate_report_html <- function(
level_headers_info = level_headers_info,
spline_params = spline_params,
adj_pthresholds = adj_pthresholds,
adj_pthresh_avrg_diff_conditions = adj_pthresh_avrg_diff_conditions,
adj_pthresh_interaction_condition_time =
adj_pthresh_interaction_condition_time,
mode = mode,
report_info = report_info,
output_file_path = output_file_path
Expand Down Expand Up @@ -1189,7 +1204,8 @@ process_field <- function(
# Create ZIP file for Enrichr_clustered_genes
zip_base64 <- create_enrichr_zip(enrichr_format)
base64_df <- sprintf(
'<a href="data:application/zip;base64,%s" download="Enrichr_clustered_genes.zip" class="embedded-file">
'<a href="data:application/zip;base64,%s"
download="Enrichr_clustered_genes.zip" class="embedded-file">
<button>Download Enrichr_clustered_genes.zip</button></a>',
zip_base64
)
Expand All @@ -1199,7 +1215,8 @@ process_field <- function(
!is.null(enrichr_format$background)) {

base64_df <- sprintf(
'<a href="data:text/plain;base64,%s" download="Enrichr_background.txt" class="embedded-file">
'<a href="data:text/plain;base64,%s" download="Enrichr_background.txt"
class="embedded-file">
<button>Download Enrichr_background.txt</button></a>',
base64enc::base64encode(charToRaw(enrichr_format$background))
)
Expand Down
12 changes: 4 additions & 8 deletions dev/function_testing_ground.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ splineomics <- create_splineomics(
data = data,
# rna_seq_data = voom_obj,
meta = meta,
mode = "integrated",
mode = "isolated",
# annotation = annotation,
# feature_name_columns = feature_name_columns,
report_info = report_info,
Expand Down Expand Up @@ -190,17 +190,13 @@ spline_test_configs <- data.frame(

splineomics <- update_splineomics(
splineomics = splineomics,
design = "~ 1 + Phase*X + Reactor",
design = "~ 1 + X + Reactor",
# data = data2,
# meta = meta2,
spline_params = list(spline_type = c("n"), # Chosen spline parameters
dof = c(2L))
spline_params = list(spline_type = c("n", "n"), # Chosen spline parameters
dof = c(2L, 2L))
)

# methods(print)
# getS3method("print", "SplineOmics")

class(splineomics)
print(splineomics)


Expand Down
7 changes: 6 additions & 1 deletion doc/get-started.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ designs <- c(
"~ 1 + X + Reactor"
)

# 'Integrated means' limma will use the full dataset to generate the results for
# each condition. 'Isolated' means limma will use only the respective part of
# the dataset for each condition. Designs that contain the condition column
# (here Phase) must have mode 'integrated', because the full data is needed to
# include the different conditions into the design formula.
modes <- c(
"integrated",
"isolated"
Expand Down Expand Up @@ -174,7 +179,7 @@ print(spline_test_configs)
splineomics <- SplineOmics::update_splineomics(
splineomics = splineomics,
design = "~ 1 + Phase*X + Reactor", # best design formula
mode = "integrated",
mode = "integrated", # means limma uses the full data for each condition.
data = data2, # data without "outliers" was better
meta = meta2,
spline_params = list(
Expand Down
Loading

0 comments on commit 3a4b58f

Please sign in to comment.