forked from bienflorencia/rBiodiversidata
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02_estimation_of_species-richness.R
212 lines (169 loc) · 8.79 KB
/
02_estimation_of_species-richness.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# -----------------------------------
# Author: Florencia Grattarola
# Date: 7 December 2020
# Contact: flograttarola@gmail.com
# -----------------------------------
# Estimation of species-richness
### Description
# This is a script that calculates estimated values of species-richness per grid-cell. The functions uses the package iNEXT.
# ------------------------------------------------------------------------------
#############
# LIBRARIES #
#############
library(iNEXT)
library(sf)
library(tidyverse)
# ------------------------------------------------------------------------------
########
# DATA #
########
# These objects were created with the previous script (Biodiversity metrics calculation)
Tetrapods_GIS
Grid_UY25_Tetrapods
Grid_UY25
# ------------------------------------------------------------------------------
#############
# FUNCTIONS #
#############
# Function to calculate the frequency of species incidence data per grid-cell (incidence matrix)
# Each grid-cell is further divided into sub-grids of 1x1 km to create a species incidence dataset at the grid-cell level by counting the number of sub-gridded cells that contained occurrence records for individual species
getSpeciesSampledGrids <- function(grid_plygons, data_points) {
dataSpeciesSampledGrids <- data.frame(Id=character(),
sampleGridId=character(),
speciesName = character(),
abundance= numeric(),
stringsAsFactors=FALSE)
for(Grid_i in unique(grid_plygons$Id)){
data_points_i <- st_crop(data_points, st_bbox(grid_plygons %>% filter(Id==Grid_i))) %>% select(species)
isEmpty <- st_intersection(st_geometry(grid_plygons[grid_plygons$Id==Grid_i,]),
st_geometry(data_points_i))
if(length(isEmpty)!=0) {
boxSubGridGrid <- st_bbox(grid_plygons %>% filter(Id==Grid_i))
subGrid <- st_make_grid(st_as_sfc(boxSubGridGrid),
cellsize = 1000, what='polygons', square = TRUE)
subGrid <- st_sf(index = 1:length(lengths(subGrid)), subGrid)
for(subGrid_i in unique(subGrid$index)){
isEmpty <- st_intersection(st_geometry(subGrid[subGrid_i,]), st_geometry(data_points_i))
if(length(isEmpty)!=0){
dataSubGrid <- st_join(x= subGrid[subGrid_i,],
y=data_points_i,
left=TRUE, join = st_contains)
speciesAbundance <- dataSubGrid %>% as.data.frame() %>% group_by(index, species) %>% count()
for(i in 1:nrow(speciesAbundance)){
dataSpeciesSampledGrids_i <- data.frame(Id=Grid_i,
sampleGridId = speciesAbundance$index[[i]],
speciesName = speciesAbundance$species[[i]],
abundance=speciesAbundance$n[[i]],
stringsAsFactors=FALSE)
dataSpeciesSampledGrids <- rbind(dataSpeciesSampledGrids, dataSpeciesSampledGrids_i)
}
}
}
}
cat(str_c(Grid_i, ': done ', '\n'))
}
return(dataSpeciesSampledGrids)
}
# Function to split groups and name them
named_group_split <- function(.tbl, ...) {
grouped <- group_by(.tbl, ...)
names <- rlang::eval_bare(rlang::expr(paste(!!!group_keys(grouped), sep = " / ")))
grouped %>%
group_split() %>%
rlang::set_names(names)
}
# This function will be used to return a named list will use it inside our `createDataForiNEXT`was created by [romainfrancois](https://github.com/romainfrancois) and can be found here: https://gist.github.com/romainfrancois/75df56ef682efaff5eee118450fb7a8c
# Function to split groups and name them
createDataForiNEXT <- function(data, gridsToInclude) {
as_tibble(data) %>%
filter(Id %in% gridsToInclude) %>%
mutate(abundance=ifelse(abundance==0, 0, 1)) %>%
mutate(sampleGridId= str_c('X', sampleGridId, sep='')) %>%
mutate(speciesName=str_replace(speciesName, ' ', '_')) %>%
group_by(Id, add = TRUE) %>%
pivot_wider(names_from = sampleGridId,
values_from = abundance,
values_fill = list(abundance = 0)) %>%
select_if(~ !is.numeric(.) || sum(.) != 0) %>%
named_group_split(Id) %>%
map(select_if, (~!is.numeric(.) || sum(.) != 0)) %>%
map(select, -Id) %>%
map(column_to_rownames, var='speciesName')
}
# Function to get the Cmax value.
# To determine Cmax, each sample within the grid unit was first extrapolated to double the reference sample size, then Cmax was calculated as the minimum among the coverage values obtained from those extrapolated samples
getCmax <- function(data_iNEXT) {
Cmax = 1
for (grid in data_iNEXT$iNextEst){
doubleSampleReference = max(grid$t)
sampleCoverage = unique(grid$SC[grid$t==doubleSampleReference])
Cmax = ifelse(sampleCoverage<Cmax, sampleCoverage, Cmax)
}
return(Cmax)
}
# Function to get the sampling coverage at C5% (5% percentile of sampling coverage at doubled sample sizes).
getPercentileSC <- function(data_iNEXT, i) {
n <- nrow(data_iNEXT$DataInfo)
x = (n*i)/ 100
index = floor(x+1)
SC_doubleSampleReference <- c()
for (grid in data_iNEXT$iNextEst){
doubleSampleReference = max(grid$t)
sampleCoverage = unique(grid$SC[grid$t==doubleSampleReference])
SC_doubleSampleReference <- c(SC_doubleSampleReference, sampleCoverage)
}
SC <- as_tibble(SC_doubleSampleReference) %>%
arrange(value) %>% slice(index) %>% pull(value)
return(SC)
}
# ------------------------------------------------------------------------------
#######
# RUN #
#######
##### Tetrapods
Tetrapods_dataSpeciesSampledGrids <- getSpeciesSampledGrids(Grid_UY25, Tetrapods_GIS)
# 1) First we include all grids for analysis
Tetrapods_allGrids <- Tetrapods_dataSpeciesSampledGrids %>% distinct(Id) %>% pull
Tetrapods_allGrids_listByGrid <- createDataForiNEXT(Tetrapods_dataSpeciesSampledGrids,
Tetrapods_allGrids)
# 2) We then check using the function DataInfo from iNEXT those grids for which S.obs>6 & 'T'>6 & 'T'!= Q1 and run the function createDataForiNEXT again excluding the grid-cells that not match with our criteria (U = sum of species incidences, T = number of sub-grids where at least one incidence was found, Q2 = number of duplicates, Q1 = number of uniques, Sobs = observed number of species
Tetrapods_gridsToInclude <- DataInfo(Tetrapods_allGrids_listByGrid, datatype = "incidence_raw") %>%
filter(S.obs>6 & `T`>6 & U!= Q1) %>% filter(SC>=0.1) %>%
pull((site))
# 3) Now we create a new list of grids, only including those that match our criteria
Tetrapods_listByGrid <- createDataForiNEXT(Tetrapods_dataSpeciesSampledGrids, Tetrapods_gridsToInclude)
# 4) run iNEXT
Tetrapods_iNEXT <-iNEXT(Tetrapods_listByGrid, q=c(0,1,2), datatype="incidence_raw")
# 5) calculate Cmax and C5% coverage
Tetrapods_Cmax <- getCmax(Tetrapods_iNEXT)
Tetrapods_Richness_Cmax <- estimateD(Tetrapods_listByGrid, "incidence_raw",
base="coverage", level=Tetrapods_Cmax)
Tetrapods_C5 <- getPercentileSC(Tetrapods_iNEXT, 5)
Tetrapods_Richness_C5 <- estimateD(Tetrapods_listByGrid, "incidence_raw",
base="coverage", level=Tetrapods_C5)
# 6) Join the data generated to the spatial data
Grid_UY25_Tetrapods <- left_join(x= left_join(x=Grid_UY25,
y=Tetrapods_iNEXT$DataInfo %>%
rename(Id=site), by='Id'),
y=bind_cols(Tetrapods_Richness_Cmax,
Tetrapods_Richness_C5 %>% rename_all(paste0, "1")) %>%
filter(order==0 & order1==0) %>%
rename(Id=site), by='Id')
# ------------------------------------------------------------------------------
#########
# PLOTS #
#########
plot_iNext_Tetrapods_Sobs <- ggplot() +
geom_sf(data = Grid_UY25_Tetrapods, aes(fill=S.obs)) +
scale_fill_distiller(palette = "Spectral", na.value="#ede8e8") +
geom_sf(data = Uruguay, color='black', fill=NA) +
labs(fill = 'Tetrapods\nObserved\nSpecies\nRichness') +
theme_bw()
plot_iNext_Tetrapods_SCmax <- ggplot() +
geom_sf(data = Grid_UY25_Tetrapods, aes(fill=qD)) +
scale_fill_distiller(palette = "Spectral", na.value="#ede8e8") +
geom_sf(data = Uruguay, color='black', fill=NA) +
labs(fill = 'Tetrapods\nSpecies\nRichness\nat Cmax') +
theme_bw()
plot_iNext_Tetrapods <- grid.arrange(plot_iNext_Tetrapods_Sobs, plot_iNext_Tetrapods_SCmax, ncol=2)
ggsave('figures/plot_iNext_Tetrapods_UY25.png', plot_iNext_Tetrapods, width = 12, height = 7, units = "in", dpi = 150)