forked from ameliaritger/mbon-shiny-app
-
Notifications
You must be signed in to change notification settings - Fork 0
/
shiny_prep.Rmd
320 lines (276 loc) · 15.3 KB
/
shiny_prep.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
---
title: "Shiny app - prep"
author: "Amelia Ritger"
date: "2/3/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE)
library(tidyverse)
library(janitor)
library(sf)
library(tmap)
library(vegan)
library(gt)
```
## Organize the data
```{r}
#Read in data
reef <- read_csv("MBONReef_Histogram.csv")
#Tidy up the data
reef_tidy <- reef %>%
clean_names() %>% #standardize names
#rename(latitude=lat_start, longitude=long_start) %>% #rename latitude and longitude
group_by(location) %>% #group by location to get average lat/long values for each location
mutate(latitude=head(lat_end,1), longitude=head(long_end,1)) %>% #get average lat/long values (because that's how that works...)
ungroup() %>% #really important, we don't want to confuse R!
pivot_longer("annelida_cirriformia_luxuriosa":"substrate_amphipod_tube_complex") %>% #make long form
separate(name, into="phylum", sep="_", remove=FALSE) %>% #Add column for phylum name
mutate(vectorized_name=str_split(name, pattern="_")) %>% #In case this is useful...
filter(!phylum=="no") %>% #remove values related to data collection issue
filter(!phylum=="substrate") #remove substrate values
#Because it's faster to do it outside tidyverse
reef_tidy$binary <- ifelse(reef_tidy$value>0, 1, 0) #Add presence/absence column
reef_tidy$species <- gsub("^[^_]*_","",reef_tidy$name, perl=TRUE) #Add column for species name
reef_tidy$longitude <- ifelse(reef_tidy$longitude<0, reef_tidy$longitude, -reef_tidy$longitude) #because all longitude values in this region should be negative - looking at you, Rodes...
#Do some more tidying
reef_tidy <- reef_tidy %>%
mutate(species = str_replace(species, pattern="tube worm", "tubeworm")) %>% #replace "tube worm" with "tubeworm" for later grouping
separate(species, into="genus", sep="_", remove=FALSE) %>% #Add column for genus name
mutate(species=str_replace_all(species, "_", " "),
species=str_to_sentence(species)) %>% #Make species names actually look like species names
filter(!str_detect(species, pattern="dead")) #Remove instances where organism is dead
#mutate(species_clean = str_remove(species, pattern="unknown"))
#replace(species_clean, "hey there", str_detect(species_clean, "sponge"))
#Now time for some massive if, else statements to group organisms not identified to species
reef_tidy <- reef_tidy %>%
mutate(grouped_species = ifelse(str_detect(species, pattern = " worm"), "Other worms", ifelse(str_detect(species, pattern = "phoronid"), "Other worms", ifelse(str_detect(species, pattern = "tubeworm"), "Tubeworms", ifelse(str_detect(species, pattern = "algae"), "Other Algaes", ifelse(str_detect(species, pattern = "Filamentous"), "Other Algaes", ifelse(str_detect(species, pattern = "turf"), "Other Algaes", ifelse(str_detect(species, pattern = "blade"), "Other Algaes", ifelse(str_detect(species, pattern = "tunicate"), "Other Tunicates", ifelse(str_detect(species, pattern = "anemone"), "Other anemones", ifelse(str_detect(species, pattern = "bryozoan"), "Other bryozoans", ifelse(str_detect(species, pattern = "White fan"), "Other bryozoans", ifelse(str_detect(species, pattern = "sponge"), "Other Sponges", ifelse(str_detect(species, pattern = "Orange encrusting"), "Other Sponges", ifelse(str_detect(species, pattern = "Haliclona sp"), "Other Sponges", ifelse(str_detect(species, pattern = "zigzag"), "Other Hydroids", species)))))))))))))))) %>%
mutate(grouped_genus = ifelse(str_detect(grouped_species, pattern = "Other"), grouped_species, ifelse(str_detect(grouped_species, pattern = "orange"), grouped_species, ifelse(str_detect(grouped_species, pattern = "White"), grouped_species, ifelse(str_detect(grouped_species, pattern = "encrusting"), grouped_species, ifelse(str_detect(grouped_species, pattern = "zigzag"), grouped_species, ifelse(str_detect(grouped_species, pattern = "solitary"), grouped_species, ifelse(str_detect(grouped_species, pattern = "sectioned"), grouped_species, genus)))))))) %>% #do the same for genus
mutate(grouped_genus = str_to_title(grouped_genus)) %>% #capitalize genus name
mutate(phylum = str_to_title(phylum)) %>% #capitalize phylum name
mutate(kingdom = "Animalia")
genusnames <- unique(reef_tidy$genus)
```
## Create different datasets for different uses
#### Subset for a phylum
```{r, message=FALSE, warning=FALSE}
reef_phylum <- reef_tidy %>%
filter(binary > "0") %>% #filter out species not present
mutate(focal_genus="Corynactis") %>% #pick a focal phylum (BASED ON INPUT)
mutate(to_match = ifelse(grouped_genus==focal_genus, filename, "FALSE")) %>% #create a column that we can subset all rows in a plot based on the presence of focal phylum in the plot at least once
filter(filename %in% to_match) %>% #if focal phylum is present, keep all observations of that plot ("filename")
#filter(!phylum=="annelida") %>% #remove the focal phylum, because we don't need to see co-occurrence of focal phylum with focal phylum (BASED ON INPUT)
filter(location=="Mohawk") %>%
distinct(filename, genus, .keep_all=TRUE) #remove duplicate values within the same plot
```
#### Subset for a location
```{r, message=FALSE, warning=FALSE}
reef_location <- reef_tidy %>%
filter(binary > "0") %>% #filter out species not present
mutate(focal_location="Rodes") %>% #pick a focal location (BASED ON INPUT)
mutate(to_match = ifelse(location==focal_location, filename, "FALSE")) %>% #create a column that we can subset all rows in a plot based on the presence of focal phylum in the plot at least once
filter(filename %in% to_match) %>% #if focal phylum is present, keep all observations of that plot ("filename")
#filter(!phylum=="annelida") %>% #remove the focal phylum, because we don't need to see co-occurrence of focal phylum with focal phylum (BASED ON INPUT)
filter(phylum=="annelida") %>% #select only the coocurring phyla you want to look at (BASED ON INPUT)
distinct(filename, .keep_all=TRUE) #remove duplicate values within the same plot
#Find average abundance values for each location
reef_summary2 <- reef_tidy %>%
filter(binary > "0") %>% #filter out species not present
filter((grouped_genus=="Corynactis")|(phylum=="Corynactis")) %>%
group_by(location) %>% #group by location, then lat/long
summarize(Abundance = mean(value), #get the mean count
median_count = median(value),
sd_count = sd(value), #get the s.d. count
sample_size = n())
#Create separate dataframe of just latitude, longitude, and locations (use for later plotting species diversity/richness at each location)
reef_location_unique <- reef_tidy %>%
distinct(location, latitude, longitude) %>%
st_as_sf(coords=c("longitude", "latitude"), crs=4326) #Make sticky geometries for lat/long
```
#### Find species diversity/richness for each site
```{r, message=FALSE, warning=FALSE}
#Prep data
reef_vegan <- reef_tidy %>% #named so because of the vegan package!
group_by(location,grouped_species) %>% #group by location, then lat/long
summarize(mean_count = mean(value)) %>% #get the mean count
select(location, grouped_species, mean_count) %>%
ungroup()
#Calculate species diversity and richness for each site
reef_vegan_subset <- reef_vegan %>%
pivot_wider(names_from=grouped_species, values_from=mean_count) %>%
select(`Abietinaria spp`:`Zonaria farlowii`)
diversity <- diversity(reef_vegan_subset)
richness <- specnumber(reef_vegan_subset)
# #Create a function to generate a continuous color palette
# Pal <- colorRampPalette(c('red','blue'))
# #Add a column of color values based on the diversity values
# reef_vegan %>%
# mutate(color = Pal(10)[as.numeric(cut(reef_vegan$diversity,breaks = 10))])
# #or,
# reef_vegan$color_new <- colorRampPalette(c("blue", "red"))(length(unique(reef_vegan$diversity)))
#
# #Add this information to the vegan dataframe
# reef_vegan <- reef_location_unique %>%
# add_column(diversity, richness, color=Pal(10)[as.numeric(cut(diversity(reef_vegan_subset),breaks = 10))])
# # Or
# reef_vegan <- reef_location_unique %>%
# add_column(diversity, richness) %>%
# mutate(color_brandnew = colorRampPalette(c("yellow", "red"))(length(richness)))
reef_vegan <- reef_location_unique %>%
add_column(diversity, richness)
```
## Let's try plotting something
#### The mean counts of each phylum (or species) occurring at each location:
```{r}
ggplot(reef_summary, aes(x=phylum,y=mean_count)) +
geom_point(color="black",
size=2) +
geom_errorbar(aes(x=phylum,
ymin=mean_count - sd_count,
ymax=mean_count + sd_count),
width=0.1) +
scale_y_continuous(limits = c(-10,45)) +
facet_wrap(~location)
```
#### The median counts of each phylum (or species) occurring at each location:
```{r}
ggplot(reef_summary, aes(x=phylum,y=median_count)) +
geom_point(color="black",
size=2) +
geom_errorbar(aes(x=phylum,
ymin=median_count - iqr,
ymax=median_count + iqr),
width=0.1) +
scale_y_continuous(limits = c(-10,45)) +
facet_wrap(~location)
```
I will allow people to pick their phylum (or species) of interest and pick their location of interest
### Let's try plotting something else
#### The counts of co-occuring phyla (or species) with a focal phylum (or species)
```{r}
ggplot(reef_phylum, aes(x=fct_rev(phylum))) +
geom_bar() +
coord_flip()
```
I will allow people to pick their focal phylum and co-occuring phylum
### Create a table of percent co-occurence of focal genus and neighbor genera (PICK UP TO 3), given total number times focal genus made an appearance
```{r}
#Find number of times focal phylum makes an appearance
reef_focal <- reef_tidy %>%
filter(binary > "0") %>%
mutate(to_match = ifelse(phylum %in% c("cnidaria"), filename, "FALSE")) %>% #create a column that we can subset all rows in a plot based on the presence of focal phylum in the plot at least once
filter(filename %in% to_match) %>% #if focal phylum is present, keep all observations of that plot ("filename")
filter(!str_detect(to_match, pattern="FALSE")) %>% #drop rows containing FALSE
distinct(filename) #get unique plots containing focal phylum
present <- nrow(reef_focal)
#Find number of times neighbor phyla make an appearance
reef_neighbor <- reef_tidy %>%
filter(binary > "0") %>%
mutate(to_match = ifelse(phylum %in% c("arthropoda", "chordata", "echinodermata"), filename, "FALSE")) %>%
filter(filename %in% to_match) %>%
filter(!str_detect(to_match, pattern="FALSE")) %>%
distinct(filename)
absent <- nrow(reef_neighbor)
#Find number of times focal phylum co-occurs with neighbor genus
reef_together <- reef_tidy %>%
filter(binary > "0") %>%
mutate(to_match = ifelse(phylum %in% "cnidaria", filename, "FALSE")) %>%
filter(filename %in% to_match) %>%
mutate(to_match = ifelse(phylum %in% c("arthropoda", "chordata", "echinodermata"), filename, "FALSE")) %>% #do the same thing again for the neighbor phyla
filter(filename %in% to_match) %>%
distinct(filename)
neighborly <- nrow(reef_together)
#Create basic table
reef_table <- as.data.frame(cbind(present, absent, neighborly))%>%
mutate(percent_occur = neighborly/present,
percent_absent = neighborly/absent)
#Generate nice table using `gt`
reef_table %>%
gt() %>%
fmt_percent(columns=vars(percent_occur, percent_absent), decimal=2) %>% #convert to %
tab_options(table.width = pct(80)) %>% #make the table width 80% of the page width
cols_label(present="Focal genus present",
absent = "Neighbor genera present",
neighborly="Genera present together",
percent_occur="Percent co-occurrence",
percent_absent = "Percent times neighbors occur without focal")
```
Some alternative methods to creating a table
```{r}
df1 <- tribble(
~'neighbor', ~'focal', ~'together',
nrow(reef_neighbor), nrow(reef_focal), nrow(reef_together)
) %>%
mutate(percent_focal = together/focal,
percent_neighbor = together/neighbor) %>%
gt() %>%
fmt_percent(columns=vars(percent_focal, percent_neighbor), decimal=1) %>%
tab_options(table.width = pct(80)) %>% #make the table width 80% of the page width
cols_label(focal="Focal phylum present",
neighbor = "Neighbor phyla present",
together="Phyla present together",
percent_focal="Percent focal co-occurrs with neighbors",
percent_neighbor="Percent neighbors co-occurs with focal")
df2 <- as.data.frame(cbind(nrow(reef_focal), nrow(reef_neighbor), nrow(reef_together))) %>%
mutate(percent_focal = V3/V1,
percent_neighbor = V3/V2) %>%
gt() %>%
fmt_percent(columns=vars(percent_focal, percent_neighbor), decimal=1) %>%
tab_options(table.width = pct(80)) %>% #make the table width 80% of the page width
cols_label(V1="Focal phylum present",
V2 = "Neighbor phyla present",
V3="Phyla present together",
percent_focal="Percent focal co-occurrs with neighbors",
percent_neighbor="Percent neighbors co-occurs with focal")
```
```{r}
reef_summary <- reef_tidy %>%
filter(binary > "0") %>% #filter out species not present
st_as_sf(coords=c("longitude", "latitude"), crs=4326) %>% #Make sticky geometries for lat/long
group_by(location,phylum,orientation) %>% #group by location, then lat/long (and then orientation for vert/horizontal comparisons)
summarize(`mean abundance` = mean(value), #get the mean count
median_count = median(value),
sd_count = sd(value), #get the s.d. count
iqr = IQR(value), #get the interquartile range for the count
sample_size = n()) %>%
filter(location=="Carp Reef",
str_detect(orientation,pattern="l")) #add filter to separate vertical from horizontal
library(paletteer)
ggplot(data=reef_summary, aes(x=phylum, y=sample_size)) +
geom_col() +
coord_flip() +
ylab("Abundance") +
xlab("Phylum") +
theme_minimal()
```
### Let's make an interactive map! (mean counts of each phylum at each location)
```{r}
reef_sf <- st_as_sf(reef_summary, coords=geometry, crs=4326) #make data sf geometry friendly
reef_map <- tm_basemap("Esri.WorldImagery") +
tm_shape(reef_sf) + #tells tmap where we're looking (i.e. california)
#tm_dots(labels="value", col="purple", size=0.5) #tells tmap to plot points on map labelled with id associated
tm_symbols(id="location", col = "purple", size = "mean_count", scale = 3) +
tm_facets(by = "phylum")
tmap_mode("view") #view = interactive
reef_map
```
### Let's make an interactive map! (species diversity/richness at each site)
```{r}
reef_index_sf <- st_as_sf(reef_vegan, coords=geometry, crs=4326) #make data sf geometry friendly
reef_map <- tm_basemap("Esri.WorldImagery") +
tm_shape(reef_index_sf) + #tells tmap where we're looking (i.e. california)
#tm_dots(labels="value", col="purple", size=0.5) #tells tmap to plot points on map labelled with id associated
tm_symbols(id="location", col = "diversity", size = "diversity", scale = 3, palette = "inferno", n = 9, contrast = c(1, 0.5)) #view species diversity or richness (BASED ON INPUT)
tmap_mode("view") #view = interactive
reef_map
ggplot(data=reef_index_sf, aes(x=location, y=diversity)) +
geom_col(aes(fill=diversity)) +
scale_fill_viridis_c(option = "B", begin = 1, end = 0.5) +
#scale_fill_brewer(palette="Oranges") +
#scale_fill_gradient(low="#FFF8C4",high="#B74202") +
#8E3004 #FFFBD4
xlab("Location") +
ylab("diversity") +
coord_flip() +
theme_minimal()
```