-
Notifications
You must be signed in to change notification settings - Fork 0
/
GSE54563.R
95 lines (66 loc) · 2.9 KB
/
GSE54563.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#GSE54563
library(GEOquery)
library(ggplot2)
library(sva)
library(limma)
setwd("~/data/MDD_review/metaanalysis")
run_pca <- function(exp_mat) {
pca <- prcomp(x = t(exp_mat), retx = TRUE, center = TRUE, scale. = TRUE)
return(pca)
}
gseid<-"GSE54563"
gse <- getGEO(gseid, GSEMatrix =TRUE, AnnotGPL=TRUE)
if (length(gse) > 1) idx <- grep("GPL570", attr(gse, "names")) else idx <- 1
gse <- gse[[idx]]
# make proper column names to match toptable
fvarLabels(gse) <- make.names(fvarLabels(gse))
ex <- exprs(gse)
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||
(qx[6]-qx[1] > 50 && qx[2] > 0)
if (LogC) { ex[which(ex <= 0)] <- NaN
exprs(gse) <- log2(ex) }
gse.pheno<-pData(gse)
gse.pheno<-data.frame(gse.pheno[gse.pheno$`disease state:ch1`=="Control" | gse.pheno$`disease state:ch1`=="MDD case",c("geo_accession", "characteristics_ch1.1", "characteristics_ch1")])
colnames(gse.pheno)<-c("geo_accession", "label", "tissue")
gse.pheno$tissue<-gsub("tissue: brain anterior cingulate cortex", "ACC", gse.pheno$tissue)
gse.pheno$label<-gsub("disease state: Control", "control", gse.pheno$label)
gse.pheno$label<-gsub("disease state: MDD case", "depression", gse.pheno$label)
gse.pheno$batch<-"A"
gse.pheno$batch[pca_raw$x[,1]<0]<-"B"
gse.pheno<-gse.pheno[pca_raw$x[,2] > -190,]
gse<-gse[,gse.pheno$geo_accession]
mod = model.matrix(~as.factor(label)+batch, data=gse.pheno)
mod0 = model.matrix(~1,data=gse.pheno)
svobj = sva(exprs(gse),mod,mod0)
#
glm.sv1 <- glm(svobj$sv[,1]~gse.pheno[,"batch"])
summary(glm.sv1)
#
gse.raw<-exprs(gse)
gse.sva<-removeBatchEffect(x = gse, batch = gse.pheno$batch, covariates = svobj$sv, design = mod[,1:2])
exprs(gse)<-gse.sva
dat_lm<-foreach (i = 1:nrow(gse.raw), .combine=rbind) %dopar% {
lm_model <- lm(gse.raw[i, ] ~ label + batch, data = gse.pheno)
resids <- residuals(lm_model)
resids + lm_model$coefficients[1] # Add back intercept term
}
pca_raw <- run_pca(gse.raw)
pca_lm <- run_pca(dat_lm)
pca_sva <- run_pca(gse.sva)
#
par(mfrow=c(1,2))
ggplot(as.data.frame(pca_raw$x), aes(PC1, PC2, col=gse.pheno$label))+geom_point()
ggplot(as.data.frame(pca_lm$x), aes(PC1, PC2, col=gse.pheno$label))+geom_point()
ggplot(as.data.frame(pca_sva$x), aes(PC1, PC2, col=gse.pheno$label))+geom_point()
design <- model.matrix(~0 + label + batch, gse.pheno)
fit <- lmFit(gse, design) # fit linear model
# set up contrasts of interest and recalculate model coefficients
cts <- paste("labeldepression", "labelcontrol", sep="-")
cont.matrix <- makeContrasts(contrasts=cts, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
# compute statistics and table of top significant genes
fit2 <- eBayes(fit2, 0.01)
tT <- topTable(fit2, adjust="fdr", sort.by="B", number = Inf)
tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","t","B","logFC","Gene.symbol","Gene.title"))
write.table(tT, file=paste(gseid,".txt",sep=""), row.names=F, sep="\t")