-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
65 changed files
with
6,578 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
Package: Klebanoff0606T1 | ||
Type: Package | ||
Title: Single-Cell-Sequencing Data Generated by The Klebanoff Lab for Sample 0606T1 | ||
Version: 0.1.3 | ||
Author: c(person("Friederike", "Dündar", email = "frd2007@med.cornell.edu", role = c("aut","cre")), | ||
person("Paul","Zumbo", email="paz2005@med.cornell.edu", role = c("aut","ctb"))) | ||
Description: SingleCellExperiment object and list of differentially expressed genes | ||
as determined using 5'-DGE and V(D)J sequencing of tumor-antigen-specific T cells | ||
and corresponding control cells; all obtained from donor 0606T1. The tumor- | ||
specific antigen is a mutant form of PIK3CA. | ||
Depends: | ||
R (>= 3.5.0) | ||
Imports: | ||
data.table, | ||
EnsDb.Hsapiens.v86, | ||
ggplot2, | ||
magrittr, | ||
scater, | ||
SingleCellExperiment | ||
Suggests: | ||
stringr, | ||
usethis | ||
License: MIT | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 7.1.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(check_columns) | ||
export(extract_markers) | ||
export(load_0606T1filt) | ||
export(load_0606T1merged) | ||
export(load_0606T1shared) | ||
export(load_DE_results) | ||
export(load_RDSdata_from_Box) | ||
export(load_data_from_Box) | ||
export(load_sce) | ||
export(make_long_dt) | ||
export(my_table) | ||
export(plot_tgram) | ||
export(prep_data4tgram) | ||
export(run_DE) | ||
import(data.table) | ||
import(kableExtra) | ||
import(magrittr) | ||
import(scater) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
Klebanoff0606T1 v.0.1.3 (2022-02-09) | ||
==================================== | ||
|
||
* removed dependencies on `ABCUtilities` and `scABC2` | ||
|
||
Klebanoff0606T1 v.0.1.2 (2021-07-14) | ||
==================================== | ||
|
||
* updated use of `scABC2::make_long_dt()` in `tgrams.R` to reflect that function's change of parameter names (genes --> features) | ||
|
||
Klebanoff0606T1 v.0.1.1 (2020-04-15) | ||
==================================== | ||
|
||
* added list of clonotypes of interest (`cdrs0606T1`) | ||
* re-worked the addition of the clonotype ID and clonotype frequency calculation | ||
because I had formerly neglected to take into account the doublet removal | ||
from sce.shared, i.e. the clonotype counts weren't reflecting the cell numbers | ||
after that filtering step | ||
* added additional entry to colData: `fit_for_test` that assesses whether a | ||
given clonotype has sufficient cells in *both* conditions to be used for a | ||
t-test | ||
* `add_frequencies()` now allows to specify what type of feature should be counted | ||
(sensibly, cdr3s_aa or id) | ||
* `abbreviations_of_clonotypes.Rmd` has been removed as it is now part of the first | ||
vignette (01_FilteringAndProcessing.Rmd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#' Load the list of DE test results (any direction) for all clonotypes present | ||
#' in both conditions ('shared' clonotypes) | ||
#' | ||
#' @details The p- and q-values here represent a two-tailed test for any direction | ||
#' of the logFC. | ||
#' For details on how the DE analysis was done, see the vignette "DE_genes" | ||
#' and the wrapper function \code{\link{run_DE}}. | ||
#' | ||
#' @format Nested list where names correspond to the abbreviated clonotype IDs. | ||
#' For every clonotype, there's a list that contains: | ||
#' \describe{ | ||
#' \item{findMarkers_results:}{A SimpleDataFrameList, i.e. the original result | ||
#' of \code{scran::findMarkers()}, but only for the MUT comparison} | ||
#' \item{marker_IDs:}{a data.table with the genes that passed the FDR threshold; if that is NULL, this implies that there were no DEG for that particular clonotype comparing MUT vs WT} | ||
#' } | ||
#' @usage load_DE_results("0606T1") | ||
#' @examples \dontrun{ | ||
#' library(Klebanoff0606T1) | ||
#' | ||
#' sce.shared <- load_0606T1shared() | ||
#' sce.shared$antigen <- factor(gsub("\\..*","",sce.shared$Sample), | ||
#' levels = c("WT", "MUT"), ordered = TRUE) | ||
#' | ||
#' delist.both <- lapply( unique(sce.shared$id), function(x){ | ||
#' run_DE( | ||
#' sce.shared[, sce.shared$id == x], | ||
#' group_identifier = "antigen", | ||
#' direction = "any", | ||
#' FDR = 0.05, rank = Inf, | ||
#' comp_name = paste0(x, "_")) | ||
#' }) | ||
#' names(delist.both) <- unique(sce.shared$id) | ||
#' } | ||
#' | ||
#'@export | ||
#' | ||
load_DE_results <- function(sample = "0606T1"){ | ||
fn <- system.file("extdata", "delist.both", | ||
package = paste0("Klebanoff", sample), mustWork = TRUE) | ||
|
||
fin <- read.table(fn, stringsAsFactor = FALSE)[[1]] | ||
load_data_from_Box(fin, load_rds = FALSE) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#' List of the winning and unspecifically reactive clonotypes for 21LT2 | ||
#' | ||
#' @format Each level of the list contains the CDR3s_aa entry for the clonotype | ||
#' that showed the highest reactivity towards the MUT antigen ("winner") as | ||
#' well as a non-reactive clonotype as well as a clonotype with stronger IFNG | ||
#' expression in the WT compared to the MUT situation. | ||
"cdrs0606T1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
#' Load the SCE for the batch-corrected merged data set | ||
#' | ||
#' @description Load the SingleCellExperiment object that holds the batch-corrected | ||
#' reduced dimensionality results (MUT/WT = batch). | ||
#' | ||
#' @usage sce.filt <- load_0606T1filt() | ||
#' | ||
#' @seealso \code{\link{load_0606T1shared}}, \code{\link{load_0606T1merged}} | ||
#' @return an SCE object that needs to be assigned to an object in the environment | ||
#' | ||
#' @export | ||
#' | ||
load_0606T1filt <- function(){ | ||
out <- load_sce(which_assays = "all", sample = "0606T1") | ||
|
||
} | ||
|
||
#' Load the SCE for the batch-corrected merged data set | ||
#' | ||
#' @description Load the SingleCellExperiment object that holds the batch-corrected | ||
#' reduced dimensionality results (MUT/WT = batch). | ||
#' | ||
#' @usage sce.merged <- load_0606T1merged() | ||
#' @return an SCE object that needs to be assigned to an object in the environment | ||
#' | ||
#' @export | ||
#' | ||
load_0606T1merged <- function(){ | ||
out <- load_sce(which_assays = "reconstructed", sample = "0606T1Merged") | ||
return(out) | ||
} | ||
|
||
#' Load the SCE for the batch-corrected merged data set | ||
#' | ||
#' @description Load the SingleCellExperiment object that holds the SCE | ||
#' representing cells with clonotypes that are present in both conditions. | ||
#' The UMAP coordinates were re-calculated on the reduced subset after removal | ||
#' of suspected doublets (see the vignette about the filtering). | ||
#' | ||
#' @return an SCE object that needs to be assigned to an object in the environment | ||
#' | ||
#' @usage sce.shared <- load_0606T1shared() | ||
#' | ||
#' @export | ||
load_0606T1shared <- function(){ | ||
out <- load_sce(which_assays = "logcounts", sample = "0606T1Shared") | ||
return(out) | ||
} | ||
|
||
|
||
#' Filtered cells ' ' @format Named list of cell numbers following the filtering steps | ||
#described in this vignette. | ||
"cell_filt" | ||
|
||
#' Filtered genes ' ' @format Named list of gene numbers following the filtering steps | ||
#described in this vignette. | ||
"gene_filt" | ||
|
||
|
||
#' Load the filtered and processed SingleCellExperiment object | ||
#' | ||
#' @description Use this function to load the processed and filtered gene expression | ||
#' data plus the clonotype information stored within one SingleCellExperiment | ||
#' object. | ||
#' | ||
#' | ||
#' @param which_assays can be "all" (default) or individual assays, e.g. c("logcounts", | ||
#' "counts") etc. If space and memory are problematic, definitely limit the selection here! | ||
#' assays that are available are: "counts", "logcounts" | ||
#' @param ... Additional parameters passed on to \code{load_RDSdata_from_Box}, e.g. | ||
#' \code{check_for_updates = TRUE} | ||
#' | ||
#' @details | ||
#' For the entire code of the filtering and processing, see the vignette | ||
#' \code{01_processing.Rmd}. | ||
#' | ||
#' The resulting SCE object contains the usual content: colData with information | ||
#' about individidual cells, rowData with info about individual genes, reducedDims, | ||
#' etc. | ||
#' | ||
#' The \code{colData} includes: | ||
#' | ||
#' \describe{ | ||
#' \item{Barcode:}{Each cell's barcode used for keeping track of its identity during sequencing.} | ||
#' \item{Sample:}{'WT' or 'MUT'} | ||
#' \item{raw_clonotype_id:}{e.g. 'clonotype94'} | ||
#' \item{cdr3s_aa:}{The amino acid sequence of the CDR3 portion, e.g. "TRA:CIARGGGGADGLTF;TRA:CGADRNGNEKLTF;TRB:CASSLTTDREPYEQYF"} | ||
#' \item{multiTRA:}{TRUE/FALSE entries based on whether \code{cdr3s_aa} contained more than one entry for TRA} | ||
#' \item{multiTRB:}{TRUE/FALSE entries based on whether \code{cdr3s_aa} contained more than one entry for TRB} | ||
#' \item{numTRA:}{Number of TRA sequences within \code{cdr3s_aa}} | ||
#' \item{numTRB:}{Number of TRB sequences within \code{cdr3s_aa}} | ||
#' \item{cluster:}{clustering results of all cells} | ||
#' } | ||
#' | ||
#' The object also containes the coordinates from dimensionality reductions (see | ||
#' examples for more details). | ||
#' | ||
#' | ||
#' @usage sce.filt <- load_sce(which_assays = "logcounts", sample = "21LT2") | ||
#' | ||
#' @return A SingleCellExperiment object with cells from 'mutant' samples | ||
#' (stimulation with tumor antigen) and from the 'wt' sample (stimulation | ||
#' with an irrelevant antigen)). | ||
#' | ||
#' | ||
#' @examples \dontrun{ | ||
#' | ||
#' > library(SingleCellExperiment) | ||
#' > sce.21LT2 <- load_sce(which_assays = "all", sample = "21LT2") | ||
#' | ||
#' > reducedDimNames(sce.21LT2) | ||
#' "corrected" "TSNE" "UMAP" | ||
#' | ||
#' > assayNames(sce.21LT2) | ||
#' [1] "counts" "logcounts" | ||
#' } | ||
#' | ||
#' @return SCE object | ||
#' | ||
#' | ||
#' @export | ||
#' | ||
load_sce <- function(which_assays = "all", sample = "Sample", ...){ | ||
|
||
## the Box links are noted in the text file | ||
fl <- system.file("extdata", paste0("sce_storage_", sample, ".txt"), | ||
package = "Klebanoff0606T1") | ||
if(fl == ""){stop(paste("sce_storage_", sample, ".txt does not exist in package 'Klebanoff0606T1'."))} | ||
|
||
inf <- read.table(fl,stringsAsFactors = FALSE) | ||
|
||
if(unique(inf$V3) != sample){stop("The sce_storage.txt file must contain a third column holding the sample name. Which should be the same as the one specified via sample = .")} | ||
|
||
## DOWNLOAD AND CACHE THE FILES FROM THE BOX ============================== | ||
## note: using the default cache of BioC here, we may want to change that | ||
## to something more specific via the `cache_path` option of `load_RDSdata_fromBox()` | ||
|
||
## load colData | ||
cold <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == "colData",]$V2, data_name = paste0("KlebColData",sample), ...) | ||
|
||
## load rowData | ||
rowd <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == "rowData",]$V2, data_name = paste0("KlebRowData", sample) , ...) | ||
|
||
## get reducedDims | ||
rdms <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == "reducedDims",]$V2, data_name = paste0("KlebRedDims", sample), ... ) | ||
|
||
## metadata | ||
metd <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == "metadata",]$V2, data_name = paste0("KlebMetadata", sample), ... ) | ||
|
||
## get assayData | ||
if(which_assays == "all"){ | ||
## extract corresponding assay entry from the text file | ||
asss <- grep("^assay:", unique(inf$V1), value = TRUE) | ||
}else{ | ||
asss <- unlist(lapply(which_assays, function(x) grep(paste0(":",x,"$"), unique(inf$V1), ignore.case = TRUE, value=TRUE))) | ||
if(length(which_assays) == 0){ | ||
warning("None of the assays you specified are part of the file stored in inst/extdata, i.e. we can't find the links.") | ||
} | ||
} | ||
|
||
assl <- list() | ||
for(i in asss){ | ||
j <- gsub("^assay:","", i) | ||
assl[[j]] <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == i,]$V2, data_name = paste0("Kleb",j, sample), ...) | ||
} | ||
|
||
## construct the SCE object ============================================= | ||
return(SingleCellExperiment::SingleCellExperiment(assays = assl, | ||
colData = cold, rowData = rowd, | ||
metadata = metd, reducedDims = rdms)) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#' Shared clonotypes | ||
#' | ||
#' @description \code{data.table} of the clonotypes that are found in both | ||
#' conditions MUT and WT in the 0606T1 data set. | ||
#' | ||
#' @usage data(shared_clonotypes) | ||
#' | ||
#' @examples \dontrun{ | ||
#' | ||
#' ## count TRA/TRB | ||
#' clono_freq <- colData(sce.filt)[, c("Sample" ,"cdr3s_aa")] %>% | ||
#' as.data.frame %>% data.table(., keep.rownames = TRUE) %>% | ||
#' .[!is.na(cdr3s_aa), .N, c("cdr3s_aa","Sample")] | ||
#' setorder(clono_freq, N) | ||
#' | ||
#' ## formatting the TRA/TRB notations | ||
#' ## will only work if there's just one TRA | ||
#' ct <- dcast(clono_freq, cdr3s_aa ~ Sample, value.var = "N") %>% | ||
#' .[!is.na(MUT.0606T1) & !is.na(WT.0606T1)] | ||
#' | ||
#' ct[, TRA := gsub(";*TRB:[A-Z]+", "", cdr3s_aa)] | ||
#' ct[, TRA := ifelse(TRA == "", NA, TRA)] | ||
#' ct[, TRB := gsub(".*(TRB:[A-Z]+)", "\\1", cdr3s_aa)] | ||
#' ct[, TRB := ifelse(grepl("^TRA", TRB), NA, TRB)] # if only TRB was present, | ||
#' I need to fill in the NA | ||
#' setorder(ct, -MUT.0606T1, -WT.0606T1 ) | ||
#' shared_clonotypes <- copy(ct) | ||
#' } | ||
#' | ||
#' @seealso \code{clonotype_ids} | ||
"shared_clonotypes" | ||
|
||
|
||
|
||
#' Table of customized clonotype IDs for sample 0606T1 | ||
#' | ||
#' @description \code{data.table} with our customized clonotype IDs for ease of | ||
#' visualization and comparison. I.e., the CDR3s amino acid sequences are re- | ||
#' placed with arbitrary IDs. Note thate these clonotypes are those that are | ||
#' found in both conditions of patient 0606T1, i.e. MUT and WT. | ||
#' | ||
#' @details See the section "Adding the clonotype IDs" in the vignette "Filtering and Processing" | ||
#' about how the consolidation and clean up of the TRA/TRB sequences was done. | ||
#' | ||
#' @seealso \code{shared_clonotypes} | ||
"clonotype_ids" |
Oops, something went wrong.