-
Notifications
You must be signed in to change notification settings - Fork 0
/
_TopGO.R
89 lines (72 loc) · 2.74 KB
/
_TopGO.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
## usage: Rscript topGO.R [gene ids] [go annotation]
## for i in *.genes; do Rscript topGO.R $i [annotation file]; done
library("topGO")
# this is the file containing gene IDs
args = commandArgs(trailingOnly=TRUE)
ids <- read.table(args[1], header=F)
#ids <- read.table("pzq.genes", header=F)
myGenes <- as.character(ids$V1)
ndSize <- 5
goCat <- 'MF'
nTerms <-50
# GO annotation file
refGO <- read.table(file=args[2], sep=" ", stringsAsFactor=F)
#
# Smp_000030 GO:0000502,GO:0005488,GO:0030234,GO:0042176
# Smp_000040 GO:0003777,GO:0005515,GO:0005871
#
names(refGO) = c('id', 'go')
ref.vec = strsplit(refGO$go, split=',', fixed=T)
names(ref.vec) <- refGO$id
allAnnotated <- refGO$id
geneList <- factor(as.integer(allAnnotated %in% myGenes))
names(geneList) <- allAnnotated
myGOdata <- new("topGOdata",
description = "topGO",
ontology = goCat,
allGenes = geneList, # specifies all annotated genes and of which those are yours
annot = annFUN.gene2GO,
gene2GO = ref.vec,
nodeSize = ndSize, # can change this!!
)
myGOdata
resultClassic <- runTest(myGOdata, algorithm="classic", statistic="Fisher") # classic algorithm doesn't consider GO hierarchy
resultTopgo <- runTest(myGOdata,algorithm="weight01",statistic="Fisher") #
#resultElim <- runTest(myGOdata,algorithm="elim",statistic="Fisher") # even less true positives
#resultClassic
resultTopgo
#resultElim
allRes <- GenTable(
myGOdata,
# classic = resultClassic,
topGO = resultTopgo,
#elim = resultElim,
orderBy = "topGO",
ranksOf = "topGO",
topNodes = nTerms # check:--> resultClassic --> "711 GO terms scored"
)
# add column with genes; add to table
allRes$genes <- sapply(allRes$GO.ID, function(x)
{
genes<-genesInTerm(myGOdata, x)
genes[[1]][genes[[1]] %in% myGenes]
})
#allRes$genes[which(allRes$topGO<0.05)]
# convert the gene list to a character vector
allRes$genes <-vapply(allRes$genes, paste, collapse = ",", character(1L))
# only write significant terms
allRes<-subset(allRes, as.numeric(allRes[,"topGO"])<0.05 | grepl("e-", allRes[,"topGO"]))
#allRes<-allRes[ which(as.numeric(allRes[,"topGO"])<0.05 | grepl("e-", allRes[,"topGO"])),]
outfile <- paste("topgo_", args[1], "_", goCat, "_", ndSize, ".txt", sep="")
write.table(allRes, outfile,sep="\t", quote=F, row.names = F) #
## visualise
# showSigOfNodes(myGOdata, score(resultTopgo), firstSigNodes = 5, useInfo ='all')
# printGraph(myGOdata, resultTopgo, firstSigNodes = 5, fn.prefix = "tGO", useInfo = "all", pdfSW = TRUE)
## retrive genes in a GO term
#allGO = genesInTerm(myGOdata)
#allGO["GO:0006732"]
## all annotated genes in significantly enriched terms
#sel.go <- names(score(resultTopgo))[1:5]
#selcTerm <- allRes$GO.ID[which(allRes$topGO<0.05)]
#selcGenes <- genesInTerm(myGOdata, whichGO=selcTerm)
#selcGenes