Skip to content

Commit 76f0a02

Browse files
authored
Merge pull request #3 from Nesvilab/dev
functions for glyco QC and combined site report
2 parents e5d49ff + a2a6be4 commit 76f0a02

File tree

8 files changed

+100
-9
lines changed

8 files changed

+100
-9
lines changed

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: FragPipeAnalystR
22
Type: Package
33
Title: FragPipe downstream analysis in R
4-
Version: 0.1.0
4+
Version: 0.1.2
55
Author: Who wrote it
66
Maintainer: Yi Hsiao <yihsiao@umich.edu>
77
Description: More about what it does (maybe more than one line)
@@ -36,7 +36,8 @@ Imports:
3636
stringr,
3737
SummarizedExperiment,
3838
tibble,
39-
tidyr
39+
tidyr,
40+
vsn
4041
Suggests:
4142
clusterProfiler,
4243
devtools,

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export(HeatmapAnnotation)
1111
export(MD_normalization)
1212
export(PSM_barplot)
1313
export(PTM_normalization)
14+
export(VSN_normalization)
1415
export(add_rejections)
1516
export(assay)
1617
export(average_samples)
@@ -31,6 +32,7 @@ export(plot_GSEA)
3132
export(plot_correlation_heatmap)
3233
export(plot_cvs)
3334
export(plot_feature)
35+
export(plot_glycan_distribution)
3436
export(plot_missval_heatmap)
3537
export(plot_or)
3638
export(plot_pca)
@@ -128,3 +130,5 @@ importFrom(tibble,rownames_to_column)
128130
importFrom(tidyr,gather)
129131
importFrom(tidyr,spread)
130132
importFrom(tidyr,unite)
133+
importFrom(vsn,predict)
134+
importFrom(vsn,vsnMatrix)

R/FragPipeAnalystR-package.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,5 +95,7 @@
9595
#' @importFrom tidyr gather
9696
#' @importFrom tidyr spread
9797
#' @importFrom tidyr unite
98+
#' @importFrom vsn predict
99+
#' @importFrom vsn vsnMatrix
98100
## usethis namespace: end
99101
NULL

R/glyco_QC.R

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
N_glycan_property <- function(glycan_string){
2+
# from https://www.sciencedirect.com/science/article/pii/S1535947620351070, Table II
3+
4+
glycan_composition <- list()
5+
monosaccharides <- c('HexNAc', 'Hex', 'Fuc', 'NeuAc', 'NeuGc')
6+
for (i in 1:length(monosaccharides)){
7+
8+
temp <- as.numeric(gsub(paste0(monosaccharides[i],"(\\d+)(\\w+)?"), "\\1", glycan_string))
9+
if (is.na(temp)) {
10+
glycan_composition[monosaccharides[i]] <- 0
11+
} else {
12+
glycan_composition[monosaccharides[i]] <- temp
13+
}
14+
glycan_string <- gsub(paste0(monosaccharides[i],"\\d+"), "", glycan_string)
15+
}
16+
17+
if (glycan_composition$Hex >= 5 & glycan_composition$HexNAc <= 2 & glycan_composition$Fuc <= 1){
18+
return('oligomannose')
19+
}
20+
if (glycan_composition$Fuc > 0){
21+
if (glycan_composition$NeuAc > 0 | glycan_composition$NeuGc > 0){
22+
return('fuco-sialylated')
23+
} else {
24+
return('fucosylated')
25+
}
26+
} else {
27+
if (glycan_composition$NeuAc > 0 | glycan_composition$NeuGc > 0)
28+
return('sialylated')
29+
else{
30+
return('neutral')
31+
}
32+
}
33+
}
34+
35+
# generate a barplot for number of glycoforms based on categories
36+
#' @export
37+
plot_glycan_distribution <- function(se) {
38+
df <- as.data.frame(table(sapply(gsub(" _.*", "", gsub(".*_Hex", "Hex", rownames(se))), N_glycan_property)))
39+
colnames(df)[1] <- "Category"
40+
df$Category <- factor(df$Category, levels = c("sialylated",
41+
"fuco-sialylated",
42+
"fucosylated",
43+
"neutral",
44+
"oligomannose"))
45+
p <- ggplot(df, aes(x=Category, y=Freq, fill=Category)) +
46+
geom_bar(stat = "identity") +
47+
scale_fill_manual(values = c("sialylated"="#BC8867",
48+
"fuco-sialylated"="#5C7099",
49+
"fucosylated"="#699870",
50+
"neutral"="#A45F61",
51+
"oligomannose"="#8077A1")) +
52+
theme_bw() +
53+
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
54+
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
55+
56+
return(p)
57+
}
58+

R/io.R

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ make.unique.2 <- function(x, sep = ".") {
2828
}
2929

3030
# internal function to read quantification table
31-
readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2transform = F) {
31+
readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2transform = F, exp_type=NULL) {
3232
temp_data <- read.table(quant_table_path,
3333
header = TRUE,
3434
fill = TRUE, # to fill any missing data
@@ -53,7 +53,9 @@ readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2trans
5353
# validate(fragpipe_input_test(temp_data))
5454
# remove contam
5555
temp_data <- temp_data[!grepl("contam", temp_data$Protein),]
56-
temp_data$Index <- paste0(temp_data$`Protein ID`, "_", temp_data$`Peptide Sequence`)
56+
if (is.null(exp_type)) {
57+
temp_data$Index <- paste0(temp_data$`Protein ID`, "_", temp_data$`Peptide Sequence`)
58+
}
5759
} else {
5860
# handle - (dash) in experiment column
5961
colnames(temp_data) <- gsub("-", ".", colnames(temp_data))
@@ -170,12 +172,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
170172
llog2transform <- F
171173
}
172174

173-
if (!level %in% c("gene", "protein", "peptide")) {
175+
if (!level %in% c("gene", "protein", "peptide", "glycan")) {
174176
cat(paste0("The specified level: ", level, " is not a valid level. Available levels are gene, protein, and peptide.\n"))
175177
return(NULL)
176178
}
177179

178-
quant_table <- readQuantTable(quant_table_path, type = type, level=level)
180+
quant_table <- readQuantTable(quant_table_path, type = type, level=level, exp_type=exp_type)
179181
exp_design <- readExpDesign(exp_anno_path, type = type, lfq_type = lfq_type)
180182
if (type == "LFQ") {
181183
if (level != "peptide") {
@@ -215,12 +217,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
215217
lfq_columns <- setdiff(lfq_columns, grep("Total Intensity", colnames(data_unique)))
216218
lfq_columns <- setdiff(lfq_columns, grep("Unique Intensity", colnames(data_unique)))
217219
} else if (lfq_type == "MaxLFQ") {
218-
lfq_columns<-grep("MaxLFQ", colnames(data_unique))
220+
lfq_columns <- grep("MaxLFQ", colnames(data_unique))
219221
if (length(lfq_columns) == 0) {
220222
stop(safeError("No MaxLFQ column available. Please make sure your files have MaxLFQ intensity columns."))
221223
}
222224
} else if (lfq_type == "Spectral Count") {
223-
lfq_columns<-grep("Spectral", colnames(data_unique))
225+
lfq_columns <- grep("Spectral", colnames(data_unique))
224226
lfq_columns <- setdiff(lfq_columns, grep("Total Spectral Count", colnames(data_unique)))
225227
lfq_columns <- setdiff(lfq_columns, grep("Unique Spectral Count", colnames(data_unique)))
226228
}
@@ -286,6 +288,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
286288
temp_exp_design <- temp_exp_design[temp_exp_design$label %in% overlapped_samples, ]
287289
cols <- colnames(data_unique)
288290
selected_cols <- which(!(cols %in% interest_cols))
291+
} else {
292+
interest_cols <- c("Index", "Gene", "ProteinID", "Peptide", "SequenceWindow", "Start", "End", "MaxPepProb", "ReferenceIntensity", "name", "ID")
293+
data_unique <- data_unique[, colnames(data_unique) %in% c(interest_cols, overlapped_samples)]
294+
temp_exp_design <- temp_exp_design[temp_exp_design$label %in% overlapped_samples, ]
295+
cols <- colnames(data_unique)
296+
selected_cols <- which(!(cols %in% interest_cols))
289297
}
290298
data_unique[selected_cols] <- apply(data_unique[selected_cols], 2, as.numeric)
291299

R/normalization.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,17 @@ MD_normalization <- function(se) {
1515
return(se)
1616
}
1717

18+
#' @export
19+
VSN_normalization <- function(se) {
20+
assertthat::assert_that(inherits(se, "SummarizedExperiment"))
21+
data <- assay(se)
22+
if (metadata(se)$level %in% c("LFQ", "DIA")) {
23+
vsn.fit <- vsn::vsnMatrix(2 ^ assay(se))
24+
assay(se) <- vsn::predict(vsn.fit, 2 ^ assay(se))
25+
}
26+
return(se)
27+
}
28+
1829
#' @export
1930
PTM_normalization <- function(ptm_se, se, print_progress=F) {
2031
pprot <- gsub("_.*", "", rowData(ptm_se)$Index)

R/pca.R

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#' @export
22
plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"),
3-
label = FALSE, n = 500, point_size = 8, label_size = 3, plot = TRUE, ID_col = "ID", exp = "LFQ", scale=F, interactive = F) {
3+
label = FALSE, n = 500, point_size = 8, label_size = 3, plot = TRUE, ID_col = "label", exp = NULL, scale=F, interactive = F) {
4+
if (is.null(exp)) {
5+
exp <- metadata(dep)$exp
6+
}
7+
48
if (is.integer(x)) x <- as.numeric(x)
59
if (is.integer(y)) y <- as.numeric(y)
610
if (is.integer(n)) n <- as.numeric(n)

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ renv::install("bioc::SummarizedExperiment")
1414
renv::install("bioc::cmapR")
1515
renv::install("bioc::ConsensusClusterPlus")
1616
renv::install("Nesvilab/FragPipeAnalystR")
17+
18+
# optional
19+
renv::install("nicolerg/ssGSEA2")
1720
```
1821

1922
## Example

0 commit comments

Comments
 (0)