vignettes/lfq.Rmd

---
title: "Label Free Quantification (LFQ)"
author: "Christian Panse / Jonas Grossmann"
date: "2016/01/24"
output: 
  - slidy_presentation
  - rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{LabelFreeQuantification}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---


# load reqired R libraries

```{r}
library(ISCB2017)
library(knitr)
library(lattice)
```

# focus on proteins (peptides)

```{r}
unique(XIC$protein)
unique(XIC$peptide
       )
```

# Howto extract XIC data?

https://biblio.ugent.be/publication/8502633

```{r fig.retina=3, fig.height=4}
dim(XIC)
table(XIC$filename)
table(XIC$protein)
table(XIC$peptide)

xic.filter <- XIC$filename == '20161115_13_G1_b.raw' & XIC$peptide == 'IFTVPTETLQAVTK'

lattice::xyplot(count ~ rt | as.character(charge) * filename, 
    data = XIC, 
    subset = xic.filter, #(XIC$peptide == 'IFTVPTETLQAVTK'),
    type = 'h')

lattice::xyplot(count ~ rt | as.character(charge) * filename, 
    data = XIC, 
    subset = xic.filter, #(XIC$peptide == 'IFTVPTETLQAVTK'),
    xlim=c(62,65),
    type = 'h')
```

# plot filtered LC-MS map 

```{r fig.retina=3, fig.height=20}
peptide.rt$pim <- sapply(as.character(peptide.rt$peptide), parentIonMass)
lattice::xyplot(pim ~ rt, 
       group = filename, 
       data=peptide.rt,
       auto.key =TRUE)
```


# XIC

reqirement: query RT for every peptide and every LC-MS run

```{r fig.retina=3, fig.width=16, fig.height=12}
lattice::xyplot(peptide ~ rt | filename, 
       group=grepl('GE', filename),
       data = peptide.rt)
```


# Find for each LC-MS run the RT of the corresponding peptide

```{r}
pmax <- aggregate(score ~ filename + peptide, 
                  FUN=max, 
                  data=peptide.rt)

peptide.rt.max <- merge(peptide.rt, pmax, by=c('peptide', 'filename', 'score'))
```

```{r eval=FALSE}
View(peptide.rt.max)
```

# merge and filter XIC and RT

```{r}
dim(XIC)
dim(peptide.rt.max)
XIC.rt <- merge(XIC, peptide.rt.max, by=c('peptide', 'filename'))
dim(XIC.rt)

eps <- 0.5
filter <- XIC.rt$rt.y - eps < XIC.rt$rt.x & XIC.rt$rt.x < XIC.rt$rt.y + eps
XIC.rt.filtered <- XIC.rt[filter, ] 
dim(XIC.rt.filtered)

#library(lattice())
#xyplot(log(count,2) ~ rt|protein ,group=charge, data=XIC.f[grepl("20161115_12_GE1_b.raw", XIC.f$filename),], type='h')
```

# Determine the max peak of each peptide

```{r fig.retina=3}
XIC.max <- aggregate(count ~ protein + peptide + charge + filename, 
                     data=XIC.rt.filtered, FUN=max)

bwplot(count ~ filename  | protein * grepl('GE', filename),
        data=XIC.max,
        scales = list(x = list(rot = 45)))


bwplot(log(count) ~ filename  | protein * grepl('GE', filename),
        data=XIC.max,
        scales = list(x = list(rot = 45)))


bwplot( (log(count)-mean(log(count)))/sd(log(count)) ~ filename  | protein * grepl('GE', filename),
        data=XIC.max,
        scales = list(x = list(rot = 45)))
```


# long to wide

```{r}
head(XIC.max)
M <- reshape2::dcast(XIC.max, formula = protein + peptide + charge ~ filename)
head(M)
rownames(M) <-paste(M[,1], M[,2], M[,3])
M[,1:3] <- NULL
M[is.na(M)] <- 0
```

```{r eval=FALSE}
View(M)
```


```{r}
S.sum <- aggregate(count ~ protein + grepl('GE', filename) + filename, data=XIC.max, FUN=sum)  

S.max <- aggregate(count ~ protein + grepl('GE', filename) + filename, data=XIC.max, FUN=max)

S.mean <- aggregate(count ~ protein + grepl('GE', filename) + filename, data=XIC.max, FUN=mean)  

bwplot(log(S.sum$count) ~ S.sum$protein| grepl('GE', S.sum$filename),layout=c(2,1))
bwplot(log(S.mean$count) ~ S.mean$protein| grepl('GE', S.mean$filename),layout=c(2,1))
bwplot(log(S.max$count) ~ S.max$protein| grepl('GE', S.max$filename),layout=c(2,1))
```

```{r fig.retina=3}
image(t(as.matrix(asinh(M))))
```

```{r fig.retina=3, fig.height=8}
library(gplots)
gplots::heatmap.2(asinh(as.matrix((M))), margins = c(20,15), trace = "none")
```

# RT normalization

http://bioconductor.org/packages/release/data/experiment/vignettes/msqc1/inst/doc/chromatography.html

# Two Group

lets switch to mq data ...


```{r fig.retina=3}
gg <- c(rep('G', 6), rep('GE', 6))
dim(prxmq)
table(gg)

op <- par(mfrow=c(1,3))
# example 0
idx <- 2
x <- log(unlist(prxmq[idx, ]),2)
boxplot(x ~ gg, main=row.names(prxmq)[idx])
t.test(x ~ gg,  alternative = "two.sided")

# example 1
idx <- which(row.names(prxmq) == "sp|P00331|ADH2_YEAST")
x <- log(unlist(prxmq[idx, ]),2)
boxplot(x ~ gg, main=row.names(prxmq)[idx])
t.test(x ~ gg,  alternative = "two.sided")

#example 2
idx <- which(row.names(prxmq) == "sp|P33302|PDR5_YEAST")
x <- log(unlist(prxmq[idx, ]),2)
boxplot(x ~ gg, main=row.names(prxmq)[idx])
t.test(x ~ gg,  alternative = "two.sided")
```


```{r}
expression_analysis <- function(S, groups){
  S.t.test <- lapply(1:nrow(S), function(idx){
    x <- unlist(S[idx, ])
    x[x==0] <- NA
    if (min(aggregate(x, by=list(groups), function(x){sum(!is.na(x))})$x) > 5){
      t <- t.test(log(x, 2) ~ groups, alternative = "two.sided")
      r <- cbind(FC = diff(t$estimate), p.value = t$p.value, idx=idx)
      row.names(r) <- row.names(S)[idx]
      r
    }else{NULL}
  })
  
  S.t.test <- as.data.frame(do.call('rbind', S.t.test))
 
  S.t.test
}

SwissProtID <- sapply(strsplit (as.character(unique(XIC$protein)), split='_'), function(x){x[1]}) 
sp_filter <- grepl(paste(SwissProtID, collapse ='|'), rownames(prxmq))

S.t.test <- expression_analysis(prxmq[sp_filter,], gg)
S.t.test
```

# volcano plot - p.value vs FC

```{r fig.retina=3}
plot_volcano <- function(S.t.test, label=FALSE, ...){
  plot(-log(p.value,10) ~ FC, data=S.t.test,
       main="volcano plot",
       sub="p1946 prx", ...)
  abline(v=c(-0.5,0.5), col='grey')
  abline(h=-log(0.025, 10), col='grey')
  
  points(-log(p.value,10) ~ FC, 
         data=S.t.test[abs(S.t.test$FC)>0.5 & S.t.test$p.value<0.025,], 
         col='red',cex=0.5)
  
  points(-log(p.value,10) ~ FC,
         data=S.t.test[grepl('ZZ', rownames(S.t.test)),],
         col='cyan')
  
  points(-log(p.value,10) ~ FC,
         data=S.t.test[grepl('REV', rownames(S.t.test)),],
         cex=2, lwd=2,
         col='green')
  if(label){
    text(S.t.test$FC, -log(S.t.test$p.value,10), rownames(S.t.test), cex=0.5, pos=3)
  }
}

plot_volcano(S.t.test, xlim=c(-10,10))
```

# take all proteins


```{r}
S.t.test <- expression_analysis(prxmq, gg)
plot_volcano(S.t.test, xlim=c(-10,10), label = FALSE)
```

# What happen if you mess up your annotation?
```{r}
random_swap <- function(x){
   a <- sample(length(x),2)
   tmp <- x[a[1]]
   x[a[1]] <- x[a[2]]
   x[a[2]] <- tmp
  
   return(x)
   }
```

```{r fig.retina=5}
op<-par(mfrow=c(2,2))
plot_volcano(S.t.test)
(gg <- random_swap(gg)); S.t.test <- expression_analysis(prxmq, gg); plot_volcano(S.t.test)
(gg <- random_swap(gg)); S.t.test <- expression_analysis(prxmq, gg); plot_volcano(S.t.test)
(gg <- random_swap(gg)); S.t.test <- expression_analysis(prxmq, gg); plot_volcano(S.t.test)
```

* a viewpoint on volcano plot:
Uses and misuses of the fudge factor in
quantitative discovery proteomics, Proteomics 2016, 16, 1955–1960 
http://onlinelibrary.wiley.com/doi/10.1002/pmic.201600132/epdf