Skip to content

Commit

Permalink
move to github
Browse files Browse the repository at this point in the history
  • Loading branch information
friedue committed Mar 3, 2022
1 parent 369f267 commit 1d8f25e
Show file tree
Hide file tree
Showing 65 changed files with 6,578 additions and 2 deletions.
26 changes: 26 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Package: Klebanoff0606T1
Type: Package
Title: Single-Cell-Sequencing Data Generated by The Klebanoff Lab for Sample 0606T1
Version: 0.1.3
Author: c(person("Friederike", "Dündar", email = "frd2007@med.cornell.edu", role = c("aut","cre")),
person("Paul","Zumbo", email="paz2005@med.cornell.edu", role = c("aut","ctb")))
Description: SingleCellExperiment object and list of differentially expressed genes
as determined using 5'-DGE and V(D)J sequencing of tumor-antigen-specific T cells
and corresponding control cells; all obtained from donor 0606T1. The tumor-
specific antigen is a mutant form of PIK3CA.
Depends:
R (>= 3.5.0)
Imports:
data.table,
EnsDb.Hsapiens.v86,
ggplot2,
magrittr,
scater,
SingleCellExperiment
Suggests:
stringr,
usethis
License: MIT
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
20 changes: 20 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by roxygen2: do not edit by hand

export(check_columns)
export(extract_markers)
export(load_0606T1filt)
export(load_0606T1merged)
export(load_0606T1shared)
export(load_DE_results)
export(load_RDSdata_from_Box)
export(load_data_from_Box)
export(load_sce)
export(make_long_dt)
export(my_table)
export(plot_tgram)
export(prep_data4tgram)
export(run_DE)
import(data.table)
import(kableExtra)
import(magrittr)
import(scater)
25 changes: 25 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Klebanoff0606T1 v.0.1.3 (2022-02-09)
====================================

* removed dependencies on `ABCUtilities` and `scABC2`

Klebanoff0606T1 v.0.1.2 (2021-07-14)
====================================

* updated use of `scABC2::make_long_dt()` in `tgrams.R` to reflect that function's change of parameter names (genes --> features)

Klebanoff0606T1 v.0.1.1 (2020-04-15)
====================================

* added list of clonotypes of interest (`cdrs0606T1`)
* re-worked the addition of the clonotype ID and clonotype frequency calculation
because I had formerly neglected to take into account the doublet removal
from sce.shared, i.e. the clonotype counts weren't reflecting the cell numbers
after that filtering step
* added additional entry to colData: `fit_for_test` that assesses whether a
given clonotype has sufficient cells in *both* conditions to be used for a
t-test
* `add_frequencies()` now allows to specify what type of feature should be counted
(sensibly, cdr3s_aa or id)
* `abbreviations_of_clonotypes.Rmd` has been removed as it is now part of the first
vignette (01_FilteringAndProcessing.Rmd)
43 changes: 43 additions & 0 deletions R/data_DEresults.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Load the list of DE test results (any direction) for all clonotypes present
#' in both conditions ('shared' clonotypes)
#'
#' @details The p- and q-values here represent a two-tailed test for any direction
#' of the logFC.
#' For details on how the DE analysis was done, see the vignette "DE_genes"
#' and the wrapper function \code{\link{run_DE}}.
#'
#' @format Nested list where names correspond to the abbreviated clonotype IDs.
#' For every clonotype, there's a list that contains:
#' \describe{
#' \item{findMarkers_results:}{A SimpleDataFrameList, i.e. the original result
#' of \code{scran::findMarkers()}, but only for the MUT comparison}
#' \item{marker_IDs:}{a data.table with the genes that passed the FDR threshold; if that is NULL, this implies that there were no DEG for that particular clonotype comparing MUT vs WT}
#' }
#' @usage load_DE_results("0606T1")
#' @examples \dontrun{
#' library(Klebanoff0606T1)
#'
#' sce.shared <- load_0606T1shared()
#' sce.shared$antigen <- factor(gsub("\\..*","",sce.shared$Sample),
#' levels = c("WT", "MUT"), ordered = TRUE)
#'
#' delist.both <- lapply( unique(sce.shared$id), function(x){
#' run_DE(
#' sce.shared[, sce.shared$id == x],
#' group_identifier = "antigen",
#' direction = "any",
#' FDR = 0.05, rank = Inf,
#' comp_name = paste0(x, "_"))
#' })
#' names(delist.both) <- unique(sce.shared$id)
#' }
#'
#'@export
#'
load_DE_results <- function(sample = "0606T1"){
fn <- system.file("extdata", "delist.both",
package = paste0("Klebanoff", sample), mustWork = TRUE)

fin <- read.table(fn, stringsAsFactor = FALSE)[[1]]
load_data_from_Box(fin, load_rds = FALSE)
}
7 changes: 7 additions & 0 deletions R/data_clonotypelist.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#' List of the winning and unspecifically reactive clonotypes for 21LT2
#'
#' @format Each level of the list contains the CDR3s_aa entry for the clonotype
#' that showed the highest reactivity towards the MUT antigen ("winner") as
#' well as a non-reactive clonotype as well as a clonotype with stronger IFNG
#' expression in the WT compared to the MUT situation.
"cdrs0606T1"
177 changes: 177 additions & 0 deletions R/data_sce.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#' Load the SCE for the batch-corrected merged data set
#'
#' @description Load the SingleCellExperiment object that holds the batch-corrected
#' reduced dimensionality results (MUT/WT = batch).
#'
#' @usage sce.filt <- load_0606T1filt()
#'
#' @seealso \code{\link{load_0606T1shared}}, \code{\link{load_0606T1merged}}
#' @return an SCE object that needs to be assigned to an object in the environment
#'
#' @export
#'
load_0606T1filt <- function(){
out <- load_sce(which_assays = "all", sample = "0606T1")

}

#' Load the SCE for the batch-corrected merged data set
#'
#' @description Load the SingleCellExperiment object that holds the batch-corrected
#' reduced dimensionality results (MUT/WT = batch).
#'
#' @usage sce.merged <- load_0606T1merged()
#' @return an SCE object that needs to be assigned to an object in the environment
#'
#' @export
#'
load_0606T1merged <- function(){
out <- load_sce(which_assays = "reconstructed", sample = "0606T1Merged")
return(out)
}

#' Load the SCE for the batch-corrected merged data set
#'
#' @description Load the SingleCellExperiment object that holds the SCE
#' representing cells with clonotypes that are present in both conditions.
#' The UMAP coordinates were re-calculated on the reduced subset after removal
#' of suspected doublets (see the vignette about the filtering).
#'
#' @return an SCE object that needs to be assigned to an object in the environment
#'
#' @usage sce.shared <- load_0606T1shared()
#'
#' @export
load_0606T1shared <- function(){
out <- load_sce(which_assays = "logcounts", sample = "0606T1Shared")
return(out)
}


#' Filtered cells ' ' @format Named list of cell numbers following the filtering steps
#described in this vignette.
"cell_filt"

#' Filtered genes ' ' @format Named list of gene numbers following the filtering steps
#described in this vignette.
"gene_filt"


#' Load the filtered and processed SingleCellExperiment object
#'
#' @description Use this function to load the processed and filtered gene expression
#' data plus the clonotype information stored within one SingleCellExperiment
#' object.
#'
#'
#' @param which_assays can be "all" (default) or individual assays, e.g. c("logcounts",
#' "counts") etc. If space and memory are problematic, definitely limit the selection here!
#' assays that are available are: "counts", "logcounts"
#' @param ... Additional parameters passed on to \code{load_RDSdata_from_Box}, e.g.
#' \code{check_for_updates = TRUE}
#'
#' @details
#' For the entire code of the filtering and processing, see the vignette
#' \code{01_processing.Rmd}.
#'
#' The resulting SCE object contains the usual content: colData with information
#' about individidual cells, rowData with info about individual genes, reducedDims,
#' etc.
#'
#' The \code{colData} includes:
#'
#' \describe{
#' \item{Barcode:}{Each cell's barcode used for keeping track of its identity during sequencing.}
#' \item{Sample:}{'WT' or 'MUT'}
#' \item{raw_clonotype_id:}{e.g. 'clonotype94'}
#' \item{cdr3s_aa:}{The amino acid sequence of the CDR3 portion, e.g. "TRA:CIARGGGGADGLTF;TRA:CGADRNGNEKLTF;TRB:CASSLTTDREPYEQYF"}
#' \item{multiTRA:}{TRUE/FALSE entries based on whether \code{cdr3s_aa} contained more than one entry for TRA}
#' \item{multiTRB:}{TRUE/FALSE entries based on whether \code{cdr3s_aa} contained more than one entry for TRB}
#' \item{numTRA:}{Number of TRA sequences within \code{cdr3s_aa}}
#' \item{numTRB:}{Number of TRB sequences within \code{cdr3s_aa}}
#' \item{cluster:}{clustering results of all cells}
#' }
#'
#' The object also containes the coordinates from dimensionality reductions (see
#' examples for more details).
#'
#'
#' @usage sce.filt <- load_sce(which_assays = "logcounts", sample = "21LT2")
#'
#' @return A SingleCellExperiment object with cells from 'mutant' samples
#' (stimulation with tumor antigen) and from the 'wt' sample (stimulation
#' with an irrelevant antigen)).
#'
#'
#' @examples \dontrun{
#'
#' > library(SingleCellExperiment)
#' > sce.21LT2 <- load_sce(which_assays = "all", sample = "21LT2")
#'
#' > reducedDimNames(sce.21LT2)
#' "corrected" "TSNE" "UMAP"
#'
#' > assayNames(sce.21LT2)
#' [1] "counts" "logcounts"
#' }
#'
#' @return SCE object
#'
#'
#' @export
#'
load_sce <- function(which_assays = "all", sample = "Sample", ...){

## the Box links are noted in the text file
fl <- system.file("extdata", paste0("sce_storage_", sample, ".txt"),
package = "Klebanoff0606T1")
if(fl == ""){stop(paste("sce_storage_", sample, ".txt does not exist in package 'Klebanoff0606T1'."))}

inf <- read.table(fl,stringsAsFactors = FALSE)

if(unique(inf$V3) != sample){stop("The sce_storage.txt file must contain a third column holding the sample name. Which should be the same as the one specified via sample = .")}

## DOWNLOAD AND CACHE THE FILES FROM THE BOX ==============================
## note: using the default cache of BioC here, we may want to change that
## to something more specific via the `cache_path` option of `load_RDSdata_fromBox()`

## load colData
cold <- load_RDSdata_from_Box(
shared_link = inf[inf$V1 == "colData",]$V2, data_name = paste0("KlebColData",sample), ...)

## load rowData
rowd <- load_RDSdata_from_Box(
shared_link = inf[inf$V1 == "rowData",]$V2, data_name = paste0("KlebRowData", sample) , ...)

## get reducedDims
rdms <- load_RDSdata_from_Box(
shared_link = inf[inf$V1 == "reducedDims",]$V2, data_name = paste0("KlebRedDims", sample), ... )

## metadata
metd <- load_RDSdata_from_Box(
shared_link = inf[inf$V1 == "metadata",]$V2, data_name = paste0("KlebMetadata", sample), ... )

## get assayData
if(which_assays == "all"){
## extract corresponding assay entry from the text file
asss <- grep("^assay:", unique(inf$V1), value = TRUE)
}else{
asss <- unlist(lapply(which_assays, function(x) grep(paste0(":",x,"$"), unique(inf$V1), ignore.case = TRUE, value=TRUE)))
if(length(which_assays) == 0){
warning("None of the assays you specified are part of the file stored in inst/extdata, i.e. we can't find the links.")
}
}

assl <- list()
for(i in asss){
j <- gsub("^assay:","", i)
assl[[j]] <- load_RDSdata_from_Box(
shared_link = inf[inf$V1 == i,]$V2, data_name = paste0("Kleb",j, sample), ...)
}

## construct the SCE object =============================================
return(SingleCellExperiment::SingleCellExperiment(assays = assl,
colData = cold, rowData = rowd,
metadata = metd, reducedDims = rdms))
}

46 changes: 46 additions & 0 deletions R/data_sharedClonotypes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Shared clonotypes
#'
#' @description \code{data.table} of the clonotypes that are found in both
#' conditions MUT and WT in the 0606T1 data set.
#'
#' @usage data(shared_clonotypes)
#'
#' @examples \dontrun{
#'
#' ## count TRA/TRB
#' clono_freq <- colData(sce.filt)[, c("Sample" ,"cdr3s_aa")] %>%
#' as.data.frame %>% data.table(., keep.rownames = TRUE) %>%
#' .[!is.na(cdr3s_aa), .N, c("cdr3s_aa","Sample")]
#' setorder(clono_freq, N)
#'
#' ## formatting the TRA/TRB notations
#' ## will only work if there's just one TRA
#' ct <- dcast(clono_freq, cdr3s_aa ~ Sample, value.var = "N") %>%
#' .[!is.na(MUT.0606T1) & !is.na(WT.0606T1)]
#'
#' ct[, TRA := gsub(";*TRB:[A-Z]+", "", cdr3s_aa)]
#' ct[, TRA := ifelse(TRA == "", NA, TRA)]
#' ct[, TRB := gsub(".*(TRB:[A-Z]+)", "\\1", cdr3s_aa)]
#' ct[, TRB := ifelse(grepl("^TRA", TRB), NA, TRB)] # if only TRB was present,
#' I need to fill in the NA
#' setorder(ct, -MUT.0606T1, -WT.0606T1 )
#' shared_clonotypes <- copy(ct)
#' }
#'
#' @seealso \code{clonotype_ids}
"shared_clonotypes"



#' Table of customized clonotype IDs for sample 0606T1
#'
#' @description \code{data.table} with our customized clonotype IDs for ease of
#' visualization and comparison. I.e., the CDR3s amino acid sequences are re-
#' placed with arbitrary IDs. Note thate these clonotypes are those that are
#' found in both conditions of patient 0606T1, i.e. MUT and WT.
#'
#' @details See the section "Adding the clonotype IDs" in the vignette "Filtering and Processing"
#' about how the consolidation and clean up of the TRA/TRB sequences was done.
#'
#' @seealso \code{shared_clonotypes}
"clonotype_ids"
Loading

0 comments on commit 1d8f25e

Please sign in to comment.