-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmultidiversity.R
120 lines (97 loc) · 4.68 KB
/
multidiversity.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#' Calculate the mean of top X values
#'
#' created by Eric Allan
#'
#' @param x a vector?
#' @param n Specifies the number of values which are chosen for mean calculation.
#'
#' @return The maximum based on several values (e.g. mean of top 5).
#' @examples
#' blabla TODO
#'
#' @export
maxx <- function(x, n, ...){
return(mean(rev(sort(x))[1:n], ...))}
#' Calculate multifunctionality
#'
#' created by Eric Allan
#'
#' @param x Input data frame or matrix. Rows are different diversities/ functions for multifun/div calculation. Columns are
#' replicates, e.g. plots.
#' @param threshold can be a proportion of the maximum e.g. 0.5 or can be the median or mean. It can also
#' be a vector of the same length as the number of diversities to allow different thresholds for different diversities.
#' threshold = FALSE calculates the mean of the scaled diversities or functions
#' @param sc scaling can be by any function specified in "sc", i.e. max, mean, sd etc., max is the default. If the maximum
#' should be a mean of the top n values specify sc = "maxx", mx = n
#' @param mx only if sc = "maxx", when the maximum should be a mean of the top n values, mx = n.
#' @param cent centering by the mean is possible with cent = TRUE, to take z-scores of the diversities/processes, use sc="sd" and cent = TRUE
#' @param by specifies a vector of the same length as nrow(x) to be used to split the data and use different thresholds for the groups in "by"
#' @param weights allows different weightings for the different diversities/functions and should be a vector of the same length
#' as the number of diversities/functions (shorter vectors will be recycled) to weight a diversity as 0 and drop it from the calculation,
#' code the weight as "NA"
#' the default is weights = 1: all diversities weighted the same
#'
#' @return matrix with 2 columns, the first column is the multidiversity/ -functionality of each plot, the second being the
#' number of measured (non NA) functions per plot.
#' @examples
#' # create fantasy dataset with 3 functions f1, f2 and f3 and 30 plots.
#' set.seed(2)
#' fundataset <- data.table::data.table(f1 = runif(30, min=0, max=100), f2 = runif(30, min=0, max=1), f3 = runif(30, min=0, max=10))
#' # calculate multifunctionality of this dataset, being the proportion of functions per plot which exceed the threshold of 0.5
#' # of the mean of maximum 5 functions.
#' multidiv(fundataset, threshold = 0.5, sc = "maxx", mx = 5, cent=FALSE, weights = 1)
#'
#' @export
multidiv <- function(x, threshold=FALSE, sc = "max", mx = 1, cent = FALSE, by =FALSE, weights = 1){
result <- matrix(nrow=nrow(x), ncol=2)
weights <- rep_len(weights, length.out=ncol(x)) ### expand weights
### split the data on "by"
if(any(by!=FALSE)){
xs <- split(x, by)
xst <- list()
for(i in 1:length(unique(by))){
if(mx > 1){
xst[[i]] <- scale(xs[[i]], scale = apply(xs[[i]], 2, maxx, mx, na.rm=T), center = cent)}
else{
xst[[i]] <- scale(xs[[i]], scale = apply(xs[[i]], 2, match.fun(sc), na.rm=T), center = cent)
}}
x.stand <- do.call("rbind", xst)
}
### otherwise standardise globally
else{
if(mx > 1){
x.stand <- scale(x, scale = apply(x, 2, maxx, mx, na.rm=T), center = cent)}
else{
x.stand <- scale(x, scale = apply(x, 2, match.fun(sc), na.rm=T), center = cent)
}}
#### sum diversities measured on each plot
#### NA weighted diversities are dropped from the calculation
x2 <- sweep(x, 2, weights, "*") ## remove diversities with NA threshold from calc. of how many measured
gm <- apply(x2, 1, function(x)(sum(complete.cases(x))))
### no threshold: average standardised values
if(FALSE %in% threshold){ ### prevent error message if threshold is vector
x.stand2 <- sweep(x.stand, 2, weights, "*")
m <- apply(x.stand2, 1, mean, na.rm=T)
}
else{
if (any(c("median","mean") %in% threshold)){ ### mean or median threshold
tf <- match.fun(threshold)
x.thresh <- apply(x.stand, 2, function(x)(1*(x > tf(x, na.rm=T))))
weights2 <- matrix(rep(weights, nrow(x.thresh)), nrow=nrow(x.thresh), byrow=TRUE)
x.thresh <- x.thresh*weights2 ### multiply by weights
gg <- apply(x.thresh, 1, sum, na.rm=T)
m <- gg/gm
}
else{
x.thresh <- 1*sweep(x.stand, 2, threshold, ">") ### does each variable pass threshold?
weights2 <- matrix(rep(weights, nrow(x.thresh)), nrow=nrow(x.thresh), byrow=TRUE)
x.thresh <- x.thresh*weights2 ### multiply by weights
gg <- apply(x.thresh, 1, sum,na.rm=T)
m <- gg/gm
}}
result[, 1] <- m
result[, 2] <- gm
colnames(result)<-c("m", "groups measured")
return(result)
}
NULL