-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRnormfinderFunction.txt
253 lines (250 loc) · 6.79 KB
/
RnormfinderFunction.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# Version history: see bottom of file
#
Normfinder=function(filename,Groups=TRUE,ctVal=TRUE,pStabLim=0.25){
#
# If Groups is TRUE the last row contains the group identifier,
# and the last row must be started by a name for that row.
# No spaces are allowed in the gene names, sample names and group identifier.
#
dat0=read.table(filename,header=TRUE,row.names=1,colClasses="character")
#
ntotal=dim(dat0)[2] # number of samples
k0=dim(dat0)[1] # number of rows
#
if (Groups){
ngenes=k0-1 # number of genes
genenames=rownames(dat0)[-k0]
grId=dat0[k0,]
dat0=dat0[-k0,]
} else {
ngenes=k0 # number of genes
genenames=rownames(dat0)
grId=rep(1,ntotal)
}
#
dat=matrix(as.numeric(unlist(dat0)),ngenes,ntotal) # matrix instead of list
#
if (!ctVal){dat=log2(dat)} # transform to log2 values
#
samplenames=colnames(dat0)
grId=factor(unlist(grId)) # group identifier
groupnames=levels(grId) # group names
ngr=length(levels(grId)) # number of groups
# Number of samples in each group:
nsamples=rep(0,ngr)
for (group in 1:ngr){nsamples[group]=sum(grId==groupnames[group])}
#
#
MakeStab=function(da){
ngenes=dim(da)[1]
# Sample averages
sampleavg=apply(da,2,mean)
# Gene averages within group
genegroupavg=matrix(0,ngenes,ngr)
for (group in 1:ngr){
genegroupavg[,group]=apply(da[,grId==groupnames[group]],1,mean)}
# Group averages
groupavg=rep(0,ngr)
for (group in 1:ngr){groupavg[group]=mean(da[,grId==groupnames[group]])}
# Variances
GGvar=matrix(0,ngenes,ngr)
for (group in 1:ngr){
grset=(grId==groupnames[group])
a=rep(0,ngenes)
for (gene in 1:ngenes){
a[gene]=sum((da[gene,grset]-genegroupavg[gene,group]-
sampleavg[grset]+groupavg[group])^2)/(nsamples[group]-1)
}
GGvar[,group]=(a-sum(a)/(ngenes*ngenes-ngenes))/(1-2/ngenes)
}
#
# Change possible negative values
genegroupMinvar=matrix(0,ngenes,ngr)
for (group in 1:ngr){
grset=(grId==groupnames[group])
z=da[,grset]
for (gene in 1:ngenes){
varpair=rep(0,ngenes)
for (gene1 in 1:ngenes){varpair[gene1]=var(z[gene,]-z[gene1,])}
genegroupMinvar[gene,group]=min(varpair[-gene])/4
}
}
#
# Final variances
GGvar=ifelse(GGvar<0,genegroupMinvar,GGvar)
#
# Old stability measure for each gene is calculated:
#
dif=genegroupavg
difgeneavg=apply(dif,1,mean)
difgroupavg=apply(dif,2,mean)
difavg=mean(dif)
for (gene in 1:ngenes){
for (group in 1:ngr){
dif[gene,group]=dif[gene,group]-difgeneavg[gene]-difgroupavg[group]+difavg
}
}
#
nsampMatrix=matrix(rep(nsamples,ngenes),ngenes,ngr,byrow=T)
vardif=GGvar/nsampMatrix
gamma=sum(dif*dif)/((ngr-1)*(ngenes-1))-sum(vardif)/(ngenes*ngr)
gamma=ifelse(gamma<0,0,gamma)
#
difnew=dif*gamma/(gamma+vardif)
varnew=vardif+gamma*vardif/(gamma+vardif)
Ostab0=abs(difnew)+sqrt(varnew)
Ostab=apply(Ostab0,1,mean)
#
# Measure of group differences:
mud=rep(0,ngenes)
for (gene in 1:ngenes){
mud[gene]=2*max(abs(dif[gene,]))
}
# Common variance:
genevar=rep(0,ngenes)
for (gene in 1:ngenes){
genevar[gene]=sum((nsamples-1)*GGvar[gene,])/(sum(nsamples)-ngr)
}
Gsd=sqrt(genevar)
#
# Return results:
#
return(cbind(mud,Gsd,Ostab,rep(gamma,ngenes),GGvar,dif))
} # End of function MakeStab
#
#
MakeComb2=function(g1,g2,res){
gam=res[1,4]
d1=res[g1,(4+ngr+1):(4+ngr+ngr)]; d2=res[g2,(4+ngr+1):(4+ngr+ngr)]
s1=res[g1,(4+1):(4+ngr)]; s2=res[g2,(4+1):(4+ngr)]
rho=abs(gam*d1/(gam+s1/nsamples)+gam*d2/(gam+s2/nsamples))*
sqrt(ngenes/(ngenes-2))/2
rho=rho+sqrt(s1/nsamples+gam*s1/(nsamples*gam+s1)+
s2/nsamples+gam*s2/(nsamples*gam+s2))/2
return(mean(rho))
}
#
#
MakeStabOne=function(da){
ngenes=dim(da)[1]
# Sample averages
sampleavg=apply(da,2,mean)
# Gene averages
geneavg=apply(da,1,mean)
totalavg=mean(da)
#
# Variances
genevar0=rep(0,ngenes)
for (gene in 1:ngenes){
genevar0[gene]=
sum((dat[gene,]-geneavg[gene]-sampleavg+totalavg)^2)/
((ntotal-1)*(1-2/ngenes))
}
genevar=genevar0-sum(genevar0)/(ngenes*ngenes-ngenes)
#
# Change possible negative values
geneMinvar=rep(0,ngenes)
z=da
for (gene in 1:ngenes){
varpair=rep(0,ngenes)
for (gene1 in 1:ngenes){varpair[gene1]=var(z[gene,]-z[gene1,])}
geneMinvar[gene]=min(varpair[-gene])/4
}
# Final variances
genevar=ifelse(genevar<0,geneMinvar,genevar)
#
return(genevar)
}
# End of function MakeStabOne
#
#################################################
#
# Main part
#
if (ngr>1){ # More than one group.
#
res=MakeStab(dat)
#
gcand=c(1:ngenes)[res[,3]<pStabLim]
ncand=length(gcand)
if (ncand<4){
if (ngenes>3){
li=sort(res[,3])[4]
gcand=c(1:ngenes)[res[,3]<=li]
ncand=length(gcand)
} else {
gcand=c(1:ngenes)
ncand=length(gcand)
}
}
#
vv2=c()
#
for (g1 in 1:(ncand-1)){
for (g2 in (g1+1):ncand){
qmeas=MakeComb2(gcand[g1],gcand[g2],res)
vv2=rbind(vv2,c(gcand[g1],gcand[g2],qmeas))
}}
#
ord=order(res[,3])
FinalRes=list(Ordered=
data.frame("GroupDif"=round(res[ord,1],2),"GroupSD"=round(res[ord,2],2),
"Stability"=round(res[ord,3],2),row.names=genenames[ord]),
UnOrdered=
data.frame("GroupDif"=round(res[,1],2),"GroupSD"=round(res[,2],2),
"Stability"=round(res[,3],2),
"IGroupSD"=round(sqrt(res[,(4+1):(4+ngr)]),2),
"IGroupDif"=round(res[,(4+ngr+1):(4+ngr+ngr)],2),
row.names=genenames),
PairOfGenes=
data.frame("Gene1"=genenames[vv2[,1]],"Gene2"=genenames[vv2[,2]],
"Stability"=round(vv2[,3],2)))
#
return(FinalRes)
#
} else { # End of more than one group: next is for one group only.
#
#
sigma=sqrt(MakeStabOne(dat))
#
siglim=(min(sigma)+0.1)
gcand=c(1:ngenes)[sigma<siglim]
ncand=length(gcand)
#
if ((ncand>=2)&(ngenes>3)){
#
vv2=c()
#
for (g1 in 1:(ncand-1)){
for (g2 in (g1+1):ncand){
dat1=rbind(dat[-c(gcand[g1],gcand[g2]),],
apply(dat[c(gcand[g1],gcand[g2]),],2,mean))
qmeas=sqrt(MakeStabOne(dat1))
vv2=rbind(vv2,c(gcand[g1],gcand[g2],qmeas[ngenes-1]))
}}
ord=order(sigma)
FinalRes=list(Ordered=
data.frame("GroupSD"=round(sigma[ord],2),row.names=genenames[ord]),
PairOfGenes=
data.frame("Gene1"=genenames[vv2[,1]],"Gene2"=genenames[vv2[,2]],
"GroupSD"=round(vv2[,3],2)))
} else { # No combined genes to consider
ord=order(sigma)
FinalRes=list(Ordered=
data.frame("GroupSD"=round(sigma[ord],2),row.names=genenames[ord]))
} # End ncand<2 or ngenes<=3
#
return(FinalRes)
#
} # End one group only
#
} # End of main function
# Version history:
# Version dated 17/12-2013.
# Version dated 18/06-2014: Correction in line 121: s2 is squared.
# Version dated 23/08-2014: Correction to above correction in line 121:
# s1 and s2 are not squared.
# We thank John R. Young for pointing to the error in line 121.
# Version dated 05/01-2015: sum()/2 changed to mean() in line 126.
# This misprint appears in the Supplementary Information as well.
# We thank Mark Birtwistle for pointing out this error.