Merge branch 'assay_dev' into 9000

akoyabio · Aug 18, 2021 · 3d8c12b · 3d8c12b
2 parents 97e32b9 + 939810d
commit 3d8c12b
Show file tree

Hide file tree

Showing 77 changed files with 40,333 additions and 5 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -6,7 +6,7 @@ Authors@R: c(
     person("Kent S", "Johnson", role = c("aut", "cre"),
            email = "kjohnson@akoyabio.com"),
     person("Carla", "Coltharp", role="ctb"),
-    person("Akoya Biosciences", role="cph"))
+    person("Akoya Biosciences", role=c("cph", "fnd")))
 Copyright: Akoya Biosciences
 Description: Creates diagnostic and summary reports from inForm data.
 Language: en-US
@@ -21,6 +21,7 @@ Imports:
     forcats,
     fs,
     ggplot2 (>= 3.3.2),
+    glue,
     jpeg,
     knitr,
     magrittr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,8 @@ export(addin_30_analysis_app)
 export(addin_35_spatial_map_viewer)
 export(addin_40_unmixing_quality)
 export(addin_50_component_levels)
+export(addin_60_mean_of_top_and_bottom_cells)
+export(addin_70_staining_consistency_report)
 export(choose_directory)
 export(choose_files)
 export(component_levels_report)
@@ -22,19 +24,23 @@ export(count_phenotypes)
 export(count_within_summary)
 export(counts_to_percents)
 export(discrete_colors)
+export(mean_of_top_and_bottom_cells)
+export(mean_of_top_and_bottom_cells_report)
 export(merge_cell_seg_files)
 export(nearest_neighbor_map)
 export(nearest_neighbor_summary)
 export(order_by_slide_and_tissue_category)
 export(spatial_map_viewer)
 export(split_phenotypes)
+export(staining_consistency_report)
 export(unmixing_quality_report)
 export(upset_plot)
 export(write_count_within_sheet)
 export(write_counts_sheet)
 export(write_density_sheet)
 export(write_expression_sheet)
 export(write_h_score_sheet)
+export(write_mean_of_top_and_bottom_charts)
 export(write_nearest_neighbor_summary_sheet)
 export(write_percents_sheet)
 export(write_plot_sheet)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,15 @@
 # phenoptrReports 0.2.13.9000
 
+Staining consistency report:
+- New addin creates a report showing mean expression of a selected marker
+  across multiple samples.
+
+Mean of top 20 / bottom 10 addin:
+- New addin helps to evaluate whether the staining quality is
+  likely to produce good unmixing. See 
+  https://akoyabio.github.io/phenoptrReports/articles/top_20_bottom_10_report.html
+  for details.
+
 Analysis addin:
 - Fix problem computing mean expression for phenotypes with formulas 
   in their definition #47

diff --git a/R/mean_of_top_and_bottom_cells.R b/R/mean_of_top_and_bottom_cells.R
@@ -0,0 +1,226 @@
+# Top 20 and bottom 10%ile
+
+#' Create reports showing mean expression of top and bottom cells
+#' @param merge_file Path to a merged or consolidated cell seg data file
+#' @param config_file Path to a configuration file containing
+#' column names in the merge file, one name per line.
+#' @param by Column to aggregate by, e.g. "Slide ID" or "Annotation ID".
+#' @param tissue_categories Tissue categories to report, or NULL to use all.
+#' @param adjacent_max The maximum ratio between expression of adjacent fluors.
+#' @return None
+#' @export
+mean_of_top_and_bottom_cells_report =
+  function(merge_file, config_file, by, tissue_categories, adjacent_max) {
+    # Read the configuration. Each line should be a column name
+    cols = readLines(config_file) %>%
+      purrr::map_chr(stringr::str_trim) %>%
+      purrr::discard(~.x=="")
+
+    # Create the Excel file
+    excel_path = mean_of_top_and_bottom_cells(
+      merge_file, cols, adjacent_max=adjacent_max,
+      tissue_categories=tissue_categories,
+      .by=by)
+
+    # Create the charts
+    charts_path = stringr::str_replace(excel_path, 'xlsx$', 'docx')
+    write_mean_of_top_and_bottom_charts(excel_path, charts_path, .by=by)
+
+    cat('Reports written to\n', excel_path, '\n', charts_path, '\n')
+}
+
+#' Compute mean expression levels of the top and bottom expressing cells,
+#' per Slide ID or field. This creates a quality report.
+#'
+#' Report the results in an Excel workbook in the same directory
+#'   as `csd_path`.
+#' @param csd_path Path to a merged cell seg data, or NULL. If NULL, a file
+#'   chooser is opened to allow file selection.
+#' @param expression_cols Vector of column names to report.
+#' @param top_count The number of top-expressing cells to select for
+#'   averaging.
+#' @param bottom_percentile The cutoff for the bottom percentile cells.
+#' @param adjacent_max The maximum ratio between expression of adjacent fluors
+#' @param tissue_categories Tissue categories to include, or NULL for all
+#' @param .by The column to aggregate by
+#' @param out_path The path to the output file; if `NULL`, a date-stamped file
+#' will be written to the same directory as `csd_path`.
+#' @return The path to the generated file.
+#' @importFrom magrittr %>%
+#' @importFrom rlang .data
+#' @export
+mean_of_top_and_bottom_cells =
+  function(csd_path=NULL, expression_cols,
+           top_count=20, bottom_percentile=0.1, adjacent_max = 3,
+           tissue_categories=NULL, .by='Slide ID', out_path=NULL)
+{
+  if (is.null(csd_path))
+    csd_path = file.choose()
+
+  # Read the merged cell seg data file
+  csd = phenoptr::read_cell_seg_data(csd_path)
+
+  if (!is.null(tissue_categories) && length(tissue_categories) > 0)
+    csd = csd %>% dplyr::filter(.data$`Tissue Category` %in% tissue_categories)
+
+  # Define phenotypes and expression
+  phenotypes = phenoptr::parse_phenotypes('Total Cells')
+
+  # Check that all the expression columns exist
+  missing = !(expression_cols %in% names(csd))
+  if (any(missing)) {
+    missing = expression_cols[missing]
+    stop('Columns missing from cell seg data: ', paste(missing, collapse=', '))
+  }
+
+
+  params = rlang::set_names(expression_cols,
+                            rep('Total Cells', length(expression_cols))) %>%
+    as.list()
+
+  # Compute top and bottom means
+  top20 = phenoptrReports::compute_mean_expression_many(
+    csd, phenotypes, params, count=top_count, .by=.by) %>%
+    dplyr::select(!!.by, dplyr::everything())
+  bottom10 = phenoptrReports::compute_mean_expression_many(
+    csd, phenotypes, params,  percentile=-bottom_percentile, .by=.by) %>%
+    dplyr::select(!!.by, dplyr::everything())
+
+  # Clean up column names, remove everything before the compartment and ' Mean'
+  clean_names = function(n) {
+    n %>% stringr::str_remove('^.*(?=Nucleus|Cytoplasm|Membrane)') %>%
+      stringr::str_remove(' Mean')
+  }
+
+  names(top20) = clean_names(names(top20))
+  names(bottom10) = clean_names(names(bottom10))
+
+  # Ratio of means, top / bottom
+  ratio_top_to_bottom = purrr::map2(top20, bottom10, function(t, b) {
+    if (!is.numeric(t)) return(t)
+    t / b
+  }) %>% tibble::as_tibble()
+
+  # Ratio of means of top cells, adjacent fluors
+  ratio_data = top20 %>%
+    dplyr::select(-!!.by)
+  first_fluors = ratio_data %>% dplyr::select(-ncol(ratio_data))
+  second_fluors = ratio_data %>% dplyr::select(-1)
+  ratio_adjacent = purrr::map2(first_fluors, second_fluors, ~.x/.y) %>%
+    purrr::set_names(purrr::map2(names(first_fluors), names(second_fluors),
+                                 ~paste0(.x, ' / ', .y))) %>%
+    tibble::as_tibble() %>%
+    tibble::add_column(top20[.by], .before=1)
+
+  # Write it out
+  red_style = openxlsx::createStyle(fontColour='red')
+
+  # How many non-data columns are there?
+  # The main header starts in the next column
+  header_col = 2
+
+  if (is.null(out_path))
+    out_path = file.path(dirname(csd_path),
+      stringr::str_glue(
+        "{format(Sys.Date(), '%Y%m%d')}_Top{top_count}_Bottom{bottom_percentile*100}_Data.xlsx"))
+  wb = openxlsx::createWorkbook()
+
+  write_and_style_sheet(wb, top20,
+            stringr::str_glue('Top {top_count} data'),
+            stringr::str_glue('Mean expression of top {top_count} cells'),
+            header_col)
+
+  write_and_style_sheet(wb, bottom10,
+    stringr::str_glue('Bottom {bottom_percentile*100}%ile data'),
+    stringr::str_glue(
+      'Mean expression of bottom {bottom_percentile*100}%ile cells'),
+    header_col)
+
+  sheet_name = 'Ratio top to bottom'
+  write_and_style_sheet(wb, ratio_top_to_bottom,
+    sheet_name,
+    stringr::str_glue(
+      'Ratio of means, top {top_count} cells / bottom {bottom_percentile*100}%ile cells'),
+    header_col)
+
+  # Mark out-of-range rows for Opal 780 ratio if present
+  col_780 = stringr::str_subset(names(ratio_top_to_bottom), '780')
+  if (length(col_780)==1) {
+    top_to_bottom_min = 30 # Opal 780 ratio should be greater than this
+
+    red_rows = which(ratio_top_to_bottom[[col_780]] < top_to_bottom_min)
+    if (length(red_rows) > 0) {
+      red_rows = red_rows + 2 # skip header rows
+      red_cols = which(names(ratio_top_to_bottom) == col_780)
+      openxlsx::addStyle(wb, sheet_name, red_style,
+                         rows=red_rows, cols=red_cols,
+                         gridExpand=TRUE, stack=TRUE)
+    }
+  }
+
+  sheet_name = 'Ratio adjacent fluors'
+  write_and_style_sheet(wb, ratio_adjacent,
+     sheet_name,
+     stringr::str_glue(
+       'Ratio of means, top {top_count} cells of adjacent fluors'),
+     header_col)
+
+  red_cells = (ratio_adjacent[-1] > adjacent_max
+               | ratio_adjacent[-1] < 1/adjacent_max)
+  red_cells = which(as.matrix(red_cells), arr.ind=TRUE)
+  if (dim(red_cells)[1] > 0) {
+    openxlsx::addStyle(wb, sheet_name, red_style,
+                       rows=red_cells[,'row']+2, cols=red_cells[,'col']+1,
+                       gridExpand=FALSE, stack=TRUE)
+  }
+
+  openxlsx::saveWorkbook(wb, out_path, overwrite = TRUE)
+
+  return(out_path)
+}
+
+write_and_style_sheet =
+    function(wb, d, sheet_name, sheet_title, header_col) {
+  phenoptrReports::write_sheet(wb, d, sheet_name, sheet_title, header_col)
+  data_cols = header_col:ncol(d)
+  openxlsx::setColWidths(wb, sheet_name, data_cols, 14)
+
+  number_style=openxlsx::createStyle(numFmt='0.0000')
+  openxlsx::addStyle(wb, sheet_name, number_style,
+                     rows=seq_len(nrow(d))+2, cols=data_cols,
+                     gridExpand=TRUE, stack=TRUE)
+}
+
+#' Create summary charts from the results of `mean_of_top_and_bottom_cells`
+#'
+#' Create a Microsoft Word file or HTML document containing summary charts
+#' derived from the output of [mean_of_top_and_bottom_cells]
+#' with default parameters.
+#' The file type is determined by the file extension
+#' of `output_path`, which must be either `.docx` or `.html`.
+#' @param worksheet_path Path to an Excel file containing sheets written
+#'   by [mean_of_top_and_bottom_cells].
+#' @param output_path Path to write the resulting file.
+#' @param .by The aggregation column name
+#' @export
+write_mean_of_top_and_bottom_charts =
+    function(worksheet_path, output_path, .by='Slide ID') {
+  stopifnot(file.exists(worksheet_path))
+
+  if (is.null(output_path))
+    stop('You must provide an output path.')
+
+  output_format = switch(tools::file_ext(output_path),
+                         docx='word_document',
+                         html='html_vignette')
+
+  if (is.null(output_format))
+    stop('Unsupported output format')
+
+  rmd_path = system.file("rmd", "Mean_of_top_and_bottom_charts.Rmd",
+                         package="phenoptrReports")
+
+  rmarkdown::render(rmd_path, output_file=output_path, quiet=TRUE,
+                    output_format=output_format,
+                    params=list(data_path=worksheet_path, .by=.by))
+}