-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScanR_modified.R
120 lines (104 loc) · 5.37 KB
/
ScanR_modified.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Author: Layla Hosseini-Gerami
# This R code snipped runs a modified CausalR ScanR process, but instead of taking the top n nodes from each path length (1-5), will instead take nodes with a p.value <= 0.05 from each path length (consensus nodes).
# The consensus nodes are then connected to the input nodes via concordant interactions to form subnetworks which can be concatenated to form a consensus subnetwork
# The ranked table at path length 5 is also output
# import packages
library(CausalR)
library(dplyr)
# define functions
rank_hypotheses <- function(ccg,expdata,delta) {
hypothesis <- RankTheHypotheses(ccg, expdata,delta,correctPredictionsThreshold=1,doParallel=TRUE,numCores=32,writeFile=FALSE)
hypothesis <- hypothesis[complete.cases(hypothesis), ]
hypothesis <- data.frame(uniprot = row.names(hypothesis), hypothesis)
return(hypothesis[!(hypothesis$'p.value'>0.05),])
}
rank_hypotheses_5 <- function(ccg,expdata,delta,results_dir) {
hypothesis <- RankTheHypotheses(ccg, expdata,delta,correctPredictionsThreshold=1,doParallel=TRUE,numCores=32,writeFile=TRUE,outputDir=results_dir)
hypothesis <- hypothesis[complete.cases(hypothesis), ]
hypothesis <- data.frame(uniprot = row.names(hypothesis), hypothesis)
return(hypothesis[!(hypothesis$'p.value'>0.05),])
}
# load network and input data
ccg <- CreateCCG("path_to_network.sif")
expData <- ReadExperimentalData("path_to_experimental_data.txt",ccg)
# scan the path lengths 1-5
run1 <- rank_hypotheses(ccg,expData,1)
colnames(run1)[3:9] <- paste("PL1_", colnames(run1[,c(3:9)]), sep = "")
run2 <- rank_hypotheses(ccg,expData,2)
colnames(run2)[3:9] <- paste("PL2_", colnames(run2[,c(3:9)]), sep = "")
run3 <- rank_hypotheses(ccg,expData,3)
colnames(run3)[3:9] <- paste("PL3_", colnames(run3[,c(3:9)]), sep = "")
run4 <- rank_hypotheses(ccg,expData,4)
colnames(run4)[3:9] <- paste("PL4_", colnames(run4[,c(3:9)]), sep = "")
run5 <- rank_hypotheses_5(ccg,expData,5,results_dir="path_to_resuts_dir")
colnames(run5)[3:9] <- paste("PL5_", colnames(run5[,c(3:9)]), sep = "")
# put all of the results together
big_run <- setNames(data.frame(matrix(ncol = 30, nrow = 0)), c("PL1_Score","PL1_Correct","PL1_Incorrect","PL1_Ambiguous","PL1_p.value","PL1_Enrichment.p.value","PL2_Score","PL2_Correct","PL2_Incorrect","PL2_Ambiguous","PL2_p.value","PL2_Enrichment.p.value","PL3_Score","PL3_Correct","PL3_Incorrect","PL3_Ambiguous","PL3_p.value","PL3_Enrichment.p.value","PL4_Score","PL4_Correct","PL4_Incorrect","PL4_Ambiguous","PL4_p.value","PL4_Enrichment.p.value","PL5_Score","PL5_Correct","PL5_Incorrect","PL5_Ambiguous","PL5_p.value","PL5_Enrichment.p.value"))
all_sig_hyp_2 <- c(row.names(run1),row.names(run2),row.names(run3),row.names(run4),row.names(run5))
all_sig_hyp_2 <- unique(all_sig_hyp_2)
for (hyp in all_sig_hyp_2){
big_run[hyp,"PL1_Score"]<-run1[hyp,"PL1_Score"]
big_run[hyp,"PL1_Correct"]<-run1[hyp,"PL1_Correct"]
big_run[hyp,"PL1_Incorrect"]<-run1[hyp,"PL1_Incorrect"]
big_run[hyp,"PL1_Ambiguous"]<-run1[hyp,"PL1_Ambiguous"]
big_run[hyp,"PL1_p.value"]<-run1[hyp,"PL1_p.value"]
big_run[hyp,"PL1_Enrichment.p.value"]<-run1[hyp,"PL1_Enrichment.p.value"]
big_run[hyp,"PL2_Score"]<-run2[hyp,"PL2_Score"]
big_run[hyp,"PL2_Correct"]<-run2[hyp,"PL2_Correct"]
big_run[hyp,"PL2_Incorrect"]<-run2[hyp,"PL2_Incorrect"]
big_run[hyp,"PL2_Ambiguous"]<-run2[hyp,"PL2_Ambiguous"]
big_run[hyp,"PL2_p.value"]<-run2[hyp,"PL2_p.value"]
big_run[hyp,"PL2_Enrichment.p.value"]<-run2[hyp,"PL2_Enrichment.p.value"]
big_run[hyp,"PL3_Score"]<-run3[hyp,"PL3_Score"]
big_run[hyp,"PL3_Correct"]<-run3[hyp,"PL3_Correct"]
big_run[hyp,"PL3_Incorrect"]<-run3[hyp,"PL3_Incorrect"]
big_run[hyp,"PL3_Ambiguous"]<-run3[hyp,"PL3_Ambiguous"]
big_run[hyp,"PL3_p.value"]<-run3[hyp,"PL3_p.value"]
big_run[hyp,"PL3_Enrichment.p.value"]<-run3[hyp,"PL3_Enrichment.p.value"]
big_run[hyp,"PL4_Score"]<-run4[hyp,"PL4_Score"]
big_run[hyp,"PL4_Correct"]<-run4[hyp,"PL4_Correct"]
big_run[hyp,"PL4_Incorrect"]<-run4[hyp,"PL4_Incorrect"]
big_run[hyp,"PL4_Ambiguous"]<-run4[hyp,"PL4_Ambiguous"]
big_run[hyp,"PL4_p.value"]<-run4[hyp,"PL4_p.value"]
big_run[hyp,"PL4_Enrichment.p.value"]<-run4[hyp,"PL4_Enrichment.p.value"]
big_run[hyp,"PL5_Score"]<-run5[hyp,"PL5_Score"]
big_run[hyp,"PL5_Correct"]<-run5[hyp,"PL5_Correct"]
big_run[hyp,"PL5_Incorrect"]<-run5[hyp,"PL5_Incorrect"]
big_run[hyp,"PL5_Ambiguous"]<-run5[hyp,"PL5_Ambiguous"]
big_run[hyp,"PL5_p.value"]<-run5[hyp,"PL5_p.value"]
big_run[hyp,"PL5_Enrichment.p.value"]<-run5[hyp,"PL5_Enrichment.p.value"]
# get consensus nodes
k <- 0
if (!is.na(big_run[hyp,"PL1_Score"])){
k <- k + 1
}
if (!is.na(big_run[hyp,"PL2_Score"])){
k <- k + 1
}
if (!is.na(big_run[hyp,"PL3_Score"])){
k <- k + 1
}
if (!is.na(big_run[hyp,"PL4_Score"])){
k <- k + 1
}
if (!is.na(big_run[hyp,"PL5_Score"])){
k <- k + 1
}
big_run[hyp,"Total"] <- k
top_nodes <- big_run[which(big_run$Total == max(big_run$Total)), ]
biggest_pl <- max(big_run$Total)
if(biggest_pl == 0){
print("no consensus nodes were found")
}
# write the results
list_top_nodes <- rownames(top_nodes)
for(node in list_top_nodes){
if (grepl('+',node,fixed=TRUE) == TRUE){
protein <- unlist(strsplit(node,'+',fixed=TRUE))
sign <- '+1'
} else {
protein <- unlist(strsplit(node,'-',fixed=TRUE))
sign <- '-1'
}
WriteExplainedNodesToSifFile(outputDir="path_to_results_dir",protein,sign,ccg,expData,delta=biggest_pl,correctlyExplainedOnly = TRUE)
}