This repository has been archived by the owner on May 4, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsubsampleRichness.R
225 lines (219 loc) · 8.13 KB
/
subsampleRichness.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# Custom functions are camelCase. Arrays, parameters, and arguments are PascalCase
# Dependency functions are not embedded in master functions
#############################################################################################################
########################################## Function for Calculating SQS #####################################
#############################################################################################################
# Load Required Libraries
if (require("doMC")==FALSE) {
install.packages("doMC")
library("doMC")
}
# Steve Holland's optimized SQS function
subsampleEvenness<-function(Abundance,Quota=0.9,Trials=100,IgnoreSingletons=FALSE,ExcludeDominant=FALSE) {
Abundance<-Abundance[Abundance>0]
if ((Quota <= 0 || Quota >= 1)) {
stop('The SQS Quota must be greater than 0.0 and less than 1.0')
}
# compute basic statistics
Specimens<-sum(Abundance)
NumTaxa<-length(Abundance)
Singletons<-sum(Abundance==1)
Doubletons<-sum(Abundance==2)
Highest<-max(Abundance)
MostFrequent<-which(Abundance==Highest)[1]
if (ExcludeDominant==FALSE) {
Highest<-0
MostFrequent<-0
}
# compute Good's u
U<-0
if (ExcludeDominant==TRUE) {
U<-1-Singletons/(Specimens-Highest)
}
else {
U<-1-Singletons/Specimens
}
if (U==0) {
stop('Coverage is zero because all taxa are singletons')
}
# re-compute taxon frequencies for SQS
FrequencyInitial<-Abundance-(Singletons+Doubletons/2)/NumTaxa
Frequency<-FrequencyInitial/(Specimens-Highest)
# return if the quorum target is higher than estimated coverage
if ((Quota>sum(Frequency)) || (Quota >= sum(Abundance))) {
stop('SQS Quota is too large, relative to the estimated coverage')
}
# create a vector, length equal to total number of specimens,
# each value is the index of that species in the Abundance array
IDS<-rep(1:NumTaxa,times=Abundance)
# subsampling trial loop
Richness<-rep(0,Trials) # subsampled taxon richness
for (Trial in 1:Trials) {
Pool<-IDS # pool from which specimens will be sampled
SpecimensRemaining<- length(Pool) # number of specimens remaining to be sampled
Seen<-rep(0,NumTaxa) # keeps track of whether taxa have been sampled
SubsampledFrequency<-rep(0,NumTaxa) # subsampled frequencies of the taxa
Coverage<-0
while (Coverage<Quota) {
# draw a specimen
DrawnSpecimen<-sample(1:SpecimensRemaining,size=1)
DrawnTaxon<-Pool[DrawnSpecimen]
# increment frequency for this taxon
SubsampledFrequency[DrawnTaxon]<-SubsampledFrequency[DrawnTaxon]+1
# if taxon has not yet been found, increment the coverage
if (Seen[DrawnTaxon]==0) {
if (DrawnTaxon!=MostFrequent&&(IgnoreSingletons==0||Abundance[DrawnTaxon]>1)) {
Coverage<-Coverage+Frequency[DrawnTaxon]
}
Seen[DrawnTaxon]<-1
# increment the richness if the Quota hasn't been exceeded,
# and randomly throw back some draws that put the coverage over Quota
if (Coverage<Quota || runif(1)<=Frequency[DrawnTaxon]) {
Richness[Trial]<-Richness[Trial]+1
}
else {
SubsampledFrequency[DrawnTaxon]<-SubsampledFrequency[DrawnTaxon]-1
}
}
# decrease pool of specimens not yet drawn
Pool[DrawnSpecimen]<-Pool[SpecimensRemaining]
SpecimensRemaining<-SpecimensRemaining-1
}
}
# compute subsampled richness
S2<-Richness[Richness>0]
SubsampledRichness<-exp(mean(log(S2)))*length(S2)/length(Richness)
return(round(SubsampledRichness,1))
}
# A multicore version of subsampleEvenness
multicoreEvenness<-function(Abundance,Quota=0.9,Trials=1000,IgnoreSingletons=FALSE,ExcludeDominant=FALSE,Cores=4) {
Abundance<-Abundance[Abundance>0]
if ((Quota <= 0 || Quota >= 1)) {
stop('The SQS Quota must be greater than 0.0 and less than 1.0')
}
# compute basic statistics
Specimens<-sum(Abundance)
Singletons<-sum(Abundance==1)
Doubletons<-sum(Abundance==2)
Highest<-max(Abundance)
MostFrequent<-which(Abundance==Highest)[1]
if (ExcludeDominant==FALSE) {
Highest<-0
MostFrequent<-0
}
# compute Good's u
U<-0
if (ExcludeDominant==TRUE) {
U<-1-Singletons/(Specimens-Highest)
}
else {
U<-1-Singletons/Specimens
}
if (U==0) {
stop('Coverage is zero because all taxa are singletons')
}
# re-compute taxon frequencies for SQS
FrequencyInitial<-Abundance-(Singletons+Doubletons/2)/length(Abundance)
Frequency<-FrequencyInitial/(Specimens-Highest)
# return if the quorum target is higher than estimated coverage
if ((Quota>sum(Frequency)) || (Quota >= sum(Abundance))) {
stop('SQS Quota is too large, relative to the estimated coverage')
}
# create a vector, length equal to total number of specimens,
# each value is the index of that species in the Abundance array
registerDoMC(cores=Cores)
Richness<-times(Trials) %dopar% {
multicoreResample(Abundance,Quota,MostFrequent,IgnoreSingletons,Frequency)
}
# compute subsampled richness
S2<-Richness[Richness>0]
SubsampledRichness<-exp(mean(log(S2)))*length(S2)/length(Richness)
return(round(SubsampledRichness,1))
}
# A dependency of multicoreEvenness
multicoreResample<-function(Abundance,Quota,MostFrequent,IgnoreSingletons,Frequency) {
Richness<-0
Pool<-rep(1:length(Abundance),times=Abundance) # pool from which specimens will be sampled
SpecimensRemaining<-length(Pool) # number of specimens remaining to be sampled
Seen<-vector("numeric",length=length(unique(Pool))) # keeps track of whether taxa have been sampled
SubsampledFrequency<-vector("numeric",length=length(unique(Pool))) # subsampled frequencies of the taxa
Coverage<-0
while (Coverage<Quota) {
# draw a specimen
DrawnSpecimen<-sample(1:SpecimensRemaining,size=1)
DrawnTaxon<-Pool[DrawnSpecimen]
# increment frequency for this taxon
SubsampledFrequency[DrawnTaxon]<-SubsampledFrequency[DrawnTaxon]+1
# if taxon has not yet been found, increment the coverage
if (Seen[DrawnTaxon]==0) {
if (DrawnTaxon!=MostFrequent&&(IgnoreSingletons==0||Abundance[DrawnTaxon]>1)) {
Coverage<-Coverage+Frequency[DrawnTaxon]
}
Seen[DrawnTaxon]<-1
# increment the richness if the Quota hasn't been exceeded,
# and randomly throw back some draws that put the coverage over Quota
if (Coverage<Quota || runif(1)<=Frequency[DrawnTaxon]) {
Richness<-Richness+1
}
else {
SubsampledFrequency[DrawnTaxon]<-SubsampledFrequency[DrawnTaxon]-1
}
}
# decrease pool of specimens not yet drawn
Pool[DrawnSpecimen]<-Pool[SpecimensRemaining]
SpecimensRemaining<-SpecimensRemaining-1
}
return(Richness)
}
# A function for resampling by a fixed number of individuals
subsampleIndividuals<-function(Abundance,Quota,Trials=100) {
Richness<-vector("numeric",length=Trials)
Abundance<-Abundance[Abundance>0]
Pool<-rep(1:length(Abundance),times=Abundance)
if (sum(Abundance) < Quota) {
print("Fewer Individuals than Quota")
return(length(unique(Pool)))
}
for (i in 1:Trials) {
Subsample<-sample(Pool,Quota,replace=FALSE)
Richness[i]<-length(unique(Subsample))
}
return(mean(Richness))
}
# A multicore version of subsampleIndividuals()
multicoreIndividuals<-function(Abundance,Quota,Trials=1000,Cores=4) {
Abundance<-Abundance[Abundance>0]
Pool<-rep(1:length(Abundance),times=Abundance)
if (sum(Abundance) < Quota) {
print("Fewer Individuals than Quota")
return(length(unique(Pool)))
}
registerDoMC(cores=Cores)
Richness<-times(Trials) %dopar% {
Subsample<-sample(Pool,Quota,replace=FALSE)
length(unique(Subsample))
}
return(mean(Richness))
}
# A variant of subsamplingIndividuals() that resamples with replacement
# for any samples that have diversities below the Quota.
# A function for resampling by a fixed number of individuals.
# This allows for diversity to be lower than the Quota.
# This is a non-standard procedure that is not recommended for general use.
resampleIndividuals<-function(Abundance,Quota,Trials=100) {
Richness<-vector("numeric",length=Trials)
Abundance<-Abundance[Abundance>0]
Pool<-rep(1:length(Abundance),times=Abundance)
if (sum(Abundance) < Quota) {
for (i in 1:Trials) {
Subsample<-sample(Pool,Quota,replace=TRUE)
Richness[i]<-length(unique(Subsample))
}
return(mean(Richness))
}
for (i in 1:Trials) {
Subsample<-sample(Pool,Quota,replace=FALSE)
Richness[i]<-length(unique(Subsample))
}
return(mean(Richness))
}