-
Notifications
You must be signed in to change notification settings - Fork 0
/
multivariante.R
160 lines (126 loc) · 5 KB
/
multivariante.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#This scripts introduce multivariate analysis
library(vegan)
library(mvabund)
#PCA----
data(iris)
#prepare the data
#extract quantitative variables
ir <-
ir_species <-
#explore the data
#run pca
#princomp
pca <- prcomp(ir)
#methods print, summary, plot, biplot
#or nicer:
library(devtools)
install_github("ggbiplot", "vqv")
library(ggbiplot)
g <- ggbiplot(pca, obs.scale = 1, var.scale = 1,
groups = ir_species, ellipse = TRUE,
circle = TRUE)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
legend.position = 'top')
print(g)
#more tricks
predict(pca, newdata=(tail(log_ir, 2)))
#NMDS and PERMANOVA----
#The data
Herbivores <- read.csv(file = "data/Herbivore_specialisation.csv", header = TRUE)
#simplify objects to use
Habitat <-
DayNight <-
#select the community
Herb_community <-
#The basic is the distance measure you use:
#distance selection!----
?dist
?vegdist
?betadiver
#A few words:
#binary:
#Jackard
#Sorensen (This coefficient weights matches in species composition
#between the two samples more heavily than mismatches)
#quantitative
#Euclidian: simple distance, good for e.g. distance between sites
#bray: 0-1 The Bray-Curtis measure ignores cases in which the species
#is absent in both community samples, and it is dominated
#by the abundant species so that rare species add very little to the
#value of the coefficient
#morisita: 0-1 independent of sample size. Only for counts. Recomended.
#kulczynski: Weigth more rare species.
#gower (allows factors!)
#Best is Legendre book numerical ecology
#(only found Krebs online): http://www.zoology.ubc.ca/~krebs/downloads/krebs_chapter_12_2014.pdf
#NMDS: `metaMDS`
#plot
#Add legend
Habitat.uni <- unique(Habitat)
legend(x = -2.5, y = -1.5, Habitat.uni, pch=16, col = 1:5, cex = 0.8)
#Same with DayNight
plot(Herb_community.mds$points, col = DayNight, pch = 16)
legend(x = -2.5, y = -1.5, c("Day","Night"), pch=16, col = 1:5, cex = 0.8)
#assumptions:
Herb_community.mds$stress
#If the stress value is greater than 0.2, it is advisable to include an
#additional dimension, but remember that human brains are not very well
#equipped to visualise objects in more than 2-dimensions.
#Transformation and standardisation. Transforming data sets prior to
#creating a MDS plot is often desirable, not to meet assumptions of normality,
#but to reduce the influence of extreme values. For example,
Herb_community.sq <- sqrt(Herb_community)
Herb_community.sq.mds <- metaMDS(comm = Herb_community.sq,
distance = "bray", trace = FALSE)
plot(Herb_community.sq.mds$points, col = Habitat, pch = 16)
#alternative plot
ordiplot(Herb_community.mds,type="n")
ordihull(Herb_community.mds,groups=Habitat,draw="polygon",col="grey90",
label=FALSE)
orditorp(Herb_community.mds,display="species",col="red",air=0.01)
#Testing hypothesis PERMANOVA----
#First test: Are centroids different? `adonis`
#Second test: Is the spread different? `betadisper`
#There are a couple of problems with these kinds of analysis.
#First, their statistical power is very low, except for variables
#with high variance. This means that for variables which are less
#variable, the analyses are less likely to detect a treatment effect.
#Second, they do not account for a very important property of
#multivariate data, which is the mean-variance relationship.
#Typically, in multivariate datasets like species-abundance data
#sets, counts for rare species will have many zeros with little
#variance, and the higher counts for more abundant species will
#be more variable.
#The mvabund approach improves power across a range of species
#with different variances and includes an assumption of a mean-variance
#relationship. It does this by fitting a single generalised linear
#model (GLM) to each response variable with a common set of predictor
#variables. We can then use resampling to test for significant community
#level or species level responses to our predictors.
#mvabund way----
#create a mvabund object (similar to a data.frame)
Herb_spp <- mvabund(Herbivores[,5:11])
#explore data
boxplot(Herbivores[,5:11],horizontal = TRUE,las=2, main="Abundance")
meanvar.plot(Herb_spp, xlab = "mean", ylab = "variance")
plot(Herb_spp~Herbivores$Habitat, cex.axis=0.8, cex=0.8)
#model ALL species at once fitting many GLM's that share error structure.
mod1 <- manyglm(Herb_spp ~ Herbivores$Habitat, family="poisson")
#check assumptions
plot(mod1)
#refit
mod2 <- manyglm(Herb_spp ~ Herbivores$Habitat, family="negative_binomial")
plot(mod2)
#test
anova(mod2) #slow
#See which species differ where
anova(mod2, p.uni="adjusted")
#Allows for model complexity
mod3 <- manyglm(Herb_spp ~ Herbivores$Habitat*Herbivores$DayNight,
family="negative_binomial")
anova(mod3)
#Further reading:
#http://environmentalcomputing.net/introduction-to-mvabund/
#http://www.davidzeleny.net/anadat-r/doku.php/en:pcoa_nmds)
#https://jonlefcheck.net/2012/10/24/nmds-tutorial-in-r/