Nesvilab · hsiaoyi0504 · Apr 30, 2024 · Apr 6, 2024 · Apr 7, 2024 · Apr 15, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: FragPipeAnalystR
 Type: Package
 Title: FragPipe downstream analysis in R
-Version: 0.1.0
+Version: 0.1.2
 Author: Who wrote it
 Maintainer: Yi Hsiao <yihsiao@umich.edu>
 Description: More about what it does (maybe more than one line)
@@ -36,7 +36,8 @@ Imports:
     stringr,
     SummarizedExperiment,
     tibble,
-    tidyr
+    tidyr,
+    vsn
 Suggests: 
     clusterProfiler,
     devtools,

diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,7 @@ export(HeatmapAnnotation)
 export(MD_normalization)
 export(PSM_barplot)
 export(PTM_normalization)
+export(VSN_normalization)
 export(add_rejections)
 export(assay)
 export(average_samples)
@@ -31,6 +32,7 @@ export(plot_GSEA)
 export(plot_correlation_heatmap)
 export(plot_cvs)
 export(plot_feature)
+export(plot_glycan_distribution)
 export(plot_missval_heatmap)
 export(plot_or)
 export(plot_pca)
@@ -128,3 +130,5 @@ importFrom(tibble,rownames_to_column)
 importFrom(tidyr,gather)
 importFrom(tidyr,spread)
 importFrom(tidyr,unite)
+importFrom(vsn,predict)
+importFrom(vsn,vsnMatrix)
diff --git a/R/FragPipeAnalystR-package.R b/R/FragPipeAnalystR-package.R
@@ -95,5 +95,7 @@
 #' @importFrom tidyr gather
 #' @importFrom tidyr spread
 #' @importFrom tidyr unite
+#' @importFrom vsn predict
+#' @importFrom vsn vsnMatrix
 ## usethis namespace: end
 NULL
diff --git a/R/glyco_QC.R b/R/glyco_QC.R
@@ -0,0 +1,58 @@
+N_glycan_property <- function(glycan_string){
+  # from https://www.sciencedirect.com/science/article/pii/S1535947620351070, Table II
+
+  glycan_composition <- list()
+  monosaccharides <- c('HexNAc', 'Hex', 'Fuc',  'NeuAc', 'NeuGc')
+  for (i in 1:length(monosaccharides)){
+
+    temp <- as.numeric(gsub(paste0(monosaccharides[i],"(\\d+)(\\w+)?"), "\\1", glycan_string))
+    if (is.na(temp)) {
+      glycan_composition[monosaccharides[i]] <- 0
+    } else {
+      glycan_composition[monosaccharides[i]] <- temp
+    }
+    glycan_string <- gsub(paste0(monosaccharides[i],"\\d+"), "", glycan_string)
+  }
+
+  if (glycan_composition$Hex >= 5 & glycan_composition$HexNAc <= 2 & glycan_composition$Fuc <= 1){
+    return('oligomannose')
+  }
+  if (glycan_composition$Fuc > 0){
+    if (glycan_composition$NeuAc > 0 | glycan_composition$NeuGc > 0){
+      return('fuco-sialylated')
+    } else {
+      return('fucosylated')
+    }
+  } else {
+    if (glycan_composition$NeuAc > 0 | glycan_composition$NeuGc > 0)
+      return('sialylated')
+    else{
+      return('neutral')
+    }
+  }
+}
+
+# generate a barplot for number of glycoforms based on categories
+#' @export
+plot_glycan_distribution <- function(se) {
+  df <- as.data.frame(table(sapply(gsub(" _.*", "", gsub(".*_Hex", "Hex", rownames(se))), N_glycan_property)))
+  colnames(df)[1] <- "Category"
+  df$Category <- factor(df$Category, levels = c("sialylated",
+                                                "fuco-sialylated",
+                                                "fucosylated",
+                                                "neutral",
+                                                "oligomannose"))
+  p <- ggplot(df, aes(x=Category, y=Freq, fill=Category)) +
+    geom_bar(stat = "identity") +
+    scale_fill_manual(values = c("sialylated"="#BC8867",
+                                 "fuco-sialylated"="#5C7099",
+                                 "fucosylated"="#699870",
+                                 "neutral"="#A45F61",
+                                 "oligomannose"="#8077A1")) +
+    theme_bw() +
+    theme(panel.border = element_blank(), panel.grid.major = element_blank(),
+          panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
+
+  return(p)
+}
+
diff --git a/R/io.R b/R/io.R
@@ -28,7 +28,7 @@ make.unique.2 <- function(x, sep = ".") {
 }
 
 # internal function to read quantification table
-readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2transform = F) {
+readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2transform = F, exp_type=NULL) {
   temp_data <- read.table(quant_table_path,
     header = TRUE,
     fill = TRUE, # to fill any missing data
@@ -53,7 +53,9 @@ readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2trans
       # validate(fragpipe_input_test(temp_data))
       # remove contam
       temp_data <- temp_data[!grepl("contam", temp_data$Protein),]
-      temp_data$Index <- paste0(temp_data$`Protein ID`, "_", temp_data$`Peptide Sequence`)
+      if (is.null(exp_type)) {
+        temp_data$Index <- paste0(temp_data$`Protein ID`, "_", temp_data$`Peptide Sequence`)
+      }
     } else {
       # handle - (dash) in experiment column
       colnames(temp_data) <- gsub("-", ".", colnames(temp_data))
@@ -170,12 +172,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
     llog2transform <- F
   }
 
-  if (!level %in% c("gene", "protein", "peptide")) {
+  if (!level %in% c("gene", "protein", "peptide", "glycan")) {
     cat(paste0("The specified level: ", level, " is not a valid level. Available levels are gene, protein, and peptide.\n"))
     return(NULL)
   }
 
-  quant_table <- readQuantTable(quant_table_path, type = type, level=level)
+  quant_table <- readQuantTable(quant_table_path, type = type, level=level, exp_type=exp_type)
   exp_design <- readExpDesign(exp_anno_path, type = type, lfq_type = lfq_type)
   if (type == "LFQ") {
     if (level != "peptide") {
@@ -215,12 +217,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
         lfq_columns <- setdiff(lfq_columns, grep("Total Intensity", colnames(data_unique)))
         lfq_columns <- setdiff(lfq_columns, grep("Unique Intensity", colnames(data_unique)))
       } else if (lfq_type == "MaxLFQ") {
-        lfq_columns<-grep("MaxLFQ", colnames(data_unique))
+        lfq_columns <- grep("MaxLFQ", colnames(data_unique))
         if (length(lfq_columns) == 0) {
           stop(safeError("No MaxLFQ column available. Please make sure your files have MaxLFQ intensity columns."))
         }
       } else if (lfq_type == "Spectral Count") {
-        lfq_columns<-grep("Spectral", colnames(data_unique))
+        lfq_columns <- grep("Spectral", colnames(data_unique))
         lfq_columns <- setdiff(lfq_columns, grep("Total Spectral Count", colnames(data_unique)))
         lfq_columns <- setdiff(lfq_columns, grep("Unique Spectral Count", colnames(data_unique)))
       }
@@ -286,6 +288,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
       temp_exp_design <- temp_exp_design[temp_exp_design$label %in% overlapped_samples, ]
       cols <- colnames(data_unique)
       selected_cols <- which(!(cols %in% interest_cols))
+    } else {
+      interest_cols <- c("Index", "Gene", "ProteinID", "Peptide", "SequenceWindow", "Start", "End", "MaxPepProb", "ReferenceIntensity", "name", "ID")
+      data_unique <- data_unique[, colnames(data_unique) %in% c(interest_cols, overlapped_samples)]
+      temp_exp_design <- temp_exp_design[temp_exp_design$label %in% overlapped_samples, ]
+      cols <- colnames(data_unique)
+      selected_cols <- which(!(cols %in% interest_cols))
     }
     data_unique[selected_cols] <- apply(data_unique[selected_cols], 2, as.numeric)
 

diff --git a/R/normalization.R b/R/normalization.R
@@ -15,6 +15,17 @@ MD_normalization <- function(se) {
   return(se)
 }
 
+#' @export
+VSN_normalization <- function(se) {
+  assertthat::assert_that(inherits(se, "SummarizedExperiment"))
+  data <- assay(se)
+  if (metadata(se)$level %in% c("LFQ", "DIA")) {
+    vsn.fit <- vsn::vsnMatrix(2 ^ assay(se))
+    assay(se) <- vsn::predict(vsn.fit, 2 ^ assay(se))
+  }
+  return(se)
+}
+
 #' @export
 PTM_normalization <- function(ptm_se, se, print_progress=F) {
   pprot <- gsub("_.*", "", rowData(ptm_se)$Index)

diff --git a/R/pca.R b/R/pca.R
@@ -1,6 +1,10 @@
 #' @export
 plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"),
-                     label = FALSE, n = 500, point_size = 8, label_size = 3, plot = TRUE, ID_col = "ID", exp = "LFQ", scale=F, interactive = F) {
+                     label = FALSE, n = 500, point_size = 8, label_size = 3, plot = TRUE, ID_col = "label", exp = NULL, scale=F, interactive = F) {
+  if (is.null(exp)) {
+    exp <- metadata(dep)$exp
+  }
+
   if (is.integer(x)) x <- as.numeric(x)
   if (is.integer(y)) y <- as.numeric(y)
   if (is.integer(n)) n <- as.numeric(n)

diff --git a/README.md b/README.md
@@ -14,6 +14,9 @@ renv::install("bioc::SummarizedExperiment")
 renv::install("bioc::cmapR")
 renv::install("bioc::ConsensusClusterPlus")
 renv::install("Nesvilab/FragPipeAnalystR")
+
+# optional
+renv::install("nicolerg/ssGSEA2")
 ```
 
 ## Example