-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGCN.R
175 lines (162 loc) · 5.09 KB
/
GCN.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# snp wise correlation
#packages used
if(!("devtools" %in% rownames(installed.packages()))) install.packages("devtools"); .rootdir <- "/home/unfated/Tim"; source("/home/unfated/Tim/Tmisc/inst/Startup.R")
#17
## fixed.mem = 40gb
# a package
Tmisc()
Tim.load("Rplink",lib.dir="/home/unfated/Tim")
filename <- Rfilename("sim.10000.1000", seed = "prompt")
1
#0
# test.n <- 2000
# validation.n <- 2000
# m <- as.integer(nameoption(filename, 2))
# n.causal <- as.integer(nameoption(filename, 3))
library("RcppArmadillo")
library("data.table")
library("Matrix")
Tim.load("UKBBscripts", "/psychipc01/disk3/data/UKBB")
Tim.load("lassosum","/home/unfated/Tim")
Tim.load("crosspred", "/home/unfated/Tim")
#cl <- startParallel(10)
fixed.mem <- 40000
#bfile <- attachroot("~/DATA/UKBB2/data/genotype/whiteBritish1_train/whiteBritish1_train")
#bfile <- attachroot("/psychipc01/disk3/data/UKBB/data/genotype/whiteBritish1_train/whiteBritish1_train")
# bfile<-'/psychipc01/disk3/data/UKBB/data/genotype/imputed_0.01/maf_0.01_1'
#bfile<-"/psychipc01/disk3/data/UKBB/data/genotype/whiteBritish1_train/whiteBritish1_train1"
bfile<-'/home/unfated/chr1_block/block_1'
#bfile <- useWTCCC(.bfile = F)
N <- nrow.bfile(bfile)
m <- 10000
set.seed(1)
discovery.sample <- sample(N, m)
# others.sample <- setdiff(1:N, discovery.sample)
# validation.sample <- sample(others.sample, validation.n)
# others.sample <- setdiff(1:N, c(discovery.sample, validation.sample))
#
discovery <- logical.vector(discovery.sample, N)
# validation <- logical.vector(validation.sample, N)
p <- ncol.bfile(bfile)
prop = c(0.01,0.125,0.025,0.05,0.1,0.25,0.5,0.75,0.9,0.95)
# beta <- rep(0, p)
option=c('lasso')
holder=c('/home/unfated/Tim/')
holder2=c('/home/unfated/simulation/')
input=c('/home/unfated/chr1_block/block_')
impute=c('median')
i=1
library('KRIS')
library('glmnet')
library('monomvn')
library('DMwR')
IID=list()
data1<-NA
data3<-NA
bed=paste0(input,i,'.bed')
bim=paste0(input,i,'.bim')
fam=paste0(input,i,'.fam')
block=read.bed(bed,bim,fam)
z_1=block$snp[discovery,]
data1=data.frame(z_1)
id=c()
for(k in 1:ncol(data1)){
if(sum(is.na(data1[,k]))>nrow(data1)/2)
id=c(id,k)
}
IID[[i]]=id
data3=data1
if(length(id>0))
data3=data1[,-id]
if(impute=='1'){
for(j in 1:ncol(data3)){
data3[is.na(data3[,j]),j]=1
}}
if(impute=='2'){
for(j in 1:ncol(data3)){
data3[is.na(data3[,j]),j]=2
}}
if(impute=='mean'){
for(j in 1:ncol(data3)){
me=mean(data3[,j],na.rm=T)
data3[is.na(data3[,j]),j]=me
}}
if(impute=='median'){
for(j in 1:ncol(data3)){
md=median(data3[,j],na.rm=T)
data3[is.na(data3[,j]),j]=md
}}
if(impute=='0'){
for(j in 1:ncol(data3)){
data3[is.na(data3[,j]),j]=0
}}
if(impute=='KNN_avg'){
data2=knnImputation(data3, k = 5, scale = T, meth = "weighAvg", distData = NULL)
data3=data2}
if(impute=='KNN_med'){
data2=knnImputation(data3, k = 5, scale = T, meth = "median", distData = NULL)
data3=data2}
x1=as.matrix(data3)
for(i in 1:10){
n.causal = ceiling(p*prop[i])
library(STAT)
mu=rep(0,n.causal)
set.seed(1)
V=rWishart(1,n.causal,diag(n.causal))
V2=rWishart(1,n.causal,V[,,1])
corr=cov2cor(as.matrix(V2[,,1]))
library(MASS)
#cov=matrix(c(1,.5,.15,.15,0,.5,1,0.15,0.15,0,.15,.15,1,.25,0,.15,.15,.25,1,0,0,0,0,0,1),nrow=5,ncol=5)
set.seed(1)
effect=mvrnorm(n = 100, mu=mu, Sigma=corr, tol = 1e-6, empirical = FALSE)
# write.table(effect,'/home/unfated/simulation/effect.txt')
colnames(corr)<-paste0('SNP',1:ncol(corr))
rownames(corr)<-paste0('SNP',1:ncol(corr))
write.table(corr,'/home/unfated/simulation/correlation.txt')
p=ncol(data3)
traitnum=100
Betalasso=list()
for(j in 1:traitnum){
beta <- rep(0, p)
d=n.causal
beta[c(1:d)]<- effect[j,]
Xb<-x1%*%beta
herit <- 0.6
error <- rnorm(m, sd=sqrt(var(Xb)/herit*(1-herit)))
y <- Xb + error
cv1 = cv.glmnet(x1, y)
beta.ls_fit1=coef(cv1, s = "lambda.1se")
write.table(as.matrix(beta.ls_fit1),paste0(holder2,'beta_',impute,'.ls_fit',i,'_trait',j,'.txt'),col.names = F)
Betalasso[[j]]=data.frame(as.matrix(beta.ls_fit1))[,1][-1]
print(j)}
write.table(BETA,'/home/unfated/simulation/lassobeta.txt')
l=nrow(effect)*ncol(effect)
BETA=t(as.data.frame(Betalasso))
effect1=cbind(effect,data.frame(matrix(0,nrow=100,ncol=451-93)))
TN=0
TP=0
FP=0
FN=0
for(i in 1:nrow(effect1)){
for(j in 1:ncol(effect1)){
if(effect1[i,j]==0&& BETA[i,j]==0)
TN=TN+1
if(effect1[i,j]==0&& BETA[i,j]!=0)
FP=FP+1
if(effect1[i,j]!=0&& BETA[i,j]==0)
FN=FN+1
if(effect1[i,j]!=0&& BETA[i,j]!=0)
TP=TP+1
}}
print(paste0('FPR=',FP/(FP+TN)))
print(paste0('FNR=',FN/(FN+TP)))
print(paste0('FDR=',FP/(FP+TP)))
print(paste0('Power=',1-FN/(FN+TP)))
FPR=FP/(FP+TN)
FNR=FN/(FN+TP)
FDR=FP/(FP+TP)
c(FPR,FNR,FDR)
length(which(BETA[,1]==0))
length(which(BETA[,1]!=0))
}
}