From 59c526d42b0fc5f1e8d856fe14f0e9e82552d041 Mon Sep 17 00:00:00 2001 From: EmmaCartuyvels1 Date: Wed, 2 Oct 2024 14:57:14 +0200 Subject: [PATCH] Clean-up --- source/expl_analysis.Rmd | 198 ++++++++++++++++++++++++--------------- 1 file changed, 124 insertions(+), 74 deletions(-) diff --git a/source/expl_analysis.Rmd b/source/expl_analysis.Rmd index 93acdbd..47dc121 100644 --- a/source/expl_analysis.Rmd +++ b/source/expl_analysis.Rmd @@ -2,30 +2,30 @@ title: "Exploratory analysis" author: "Emma Cartuyvels, Ward Langeraert, Toon Van Daele" date: "2024-07-24" -output: html_document +output: + html_document: + code_folding: hide --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(sf) +library(lubridate) library(mapview) library(vcd) # For Cohen’s Kappa to measure Inter-Rater Reliability library(vegan) # For Bray-Curtis dissimilarity and other ecological metrics -library(ecodist) # For Mantel tests -library(ade4) # For beta diversity -library(spatstat) # For spatial pattern analysis -library(pscl) # For occupancy modeling -library(geosphere) # For spatial overlap library(dplyr) library(ggplot2) library(tidyr) +library(stringr) +library(ggpubr) library(INBOtheme) conflicted::conflicts_prefer(dplyr::filter) ``` -```{r data} +```{r data, cache=TRUE} birdcubeflanders_year_sf <- read_sf(here::here("data", "interim", "birdcubeflanders_year.gpkg")) @@ -243,7 +243,7 @@ range_comp <- function(sel_species, period = 2007:2022, ``` -```{r} +```{r, cache=TRUE} comp_range_data <- as.data.frame(studied_spec) comp_range_data$abv_squares <- NA comp_range_data$perc_abv_total_abv <- NA @@ -287,8 +287,13 @@ comp_range_data |> inner_join(abv_data_total |> distinct(species, category), by = join_by(studied_spec == species)) |> ggplot(aes(x = perc_abv_total_abv, y = perc_birdcube_total_abv, color = category)) + - geom_point() + geom_point() + + stat_cor(mapping = aes(color = NULL), + label.x.npc = "centre", + label.y.npc = "bottom", + method = "pearson") ``` + If we look at the graph it appears that the number of squares in which a species is observed in the ABV is correlated to the number of squares in which a species is observed in the cube. ```{r} @@ -296,7 +301,11 @@ comp_range_data |> inner_join(abv_data_total |> distinct(species, category), by = join_by(studied_spec == species)) |> ggplot(aes(x = perc_abv_total_abv, y = percentage_birdcube_spec_abv, color = category)) + - geom_point() + geom_point() + + stat_cor(mapping = aes(color = NULL), + label.x.npc = "centre", + label.y.npc = "bottom", + method = "pearson") ``` ```{r} @@ -304,7 +313,11 @@ comp_range_data |> inner_join(abv_data_total |> distinct(species, category), by = join_by(studied_spec == species)) |> ggplot(aes(x = perc_abv_total_abv, y = perc_cube_total_cube, color = category)) + - geom_point() + geom_point() + + stat_cor(mapping = aes(color = NULL), + label.x.npc = "centre", + label.y.npc = "bottom", + method = "pearson") ``` ```{r} @@ -346,6 +359,10 @@ comp_range_data2 |> by = join_by(studied_spec == species)) |> ggplot(aes(x = perc_abv_total_abv, y = perc_cube_total_cube, color = category)) + geom_point() + + stat_cor(mapping = aes(color = NULL), + label.x.npc = "centre", + label.y.npc = "bottom", + method = "pearson") + facet_grid("cyclus", scales = "free_y") @@ -454,25 +471,111 @@ time_series_cor |> ## Trend similarity ```{r} -abv_data_total |> +abv_dif <- abv_data_total |> group_by(cyclus, species) |> summarise(total = sum(individualCount)) |> pivot_wider(names_from = cyclus, names_prefix = "abv_", values_from = total, - values_fill = 0) + values_fill = 0) |> + mutate(dif1 = abv_2 - abv_1, + dif2 = abv_3 - abv_2, + dif3 = abv_4 - abv_3) -birdcubeflanders_year |> +cube_dif <- birdcubeflanders_year |> filter(species %in% studied_spec) |> group_by(cyclus, species) |> summarise(total = sum(n)) |> pivot_wider(names_from = cyclus, names_prefix = "cube_", values_from = total, - values_fill = 0) -``` - + values_fill = 0) |> + mutate(dif1_cube = cube_2 - cube_1, + dif2_cube = cube_3 - cube_2, + dif3_cube = cube_4 - cube_3) |> + select(species, dif1_cube, dif2_cube, dif3_cube) + +comp_dir <- abv_dif |> + select(species, dif1, dif2, dif3) |> + inner_join(cube_dif) |> + mutate(dif1 = dif1 > 0, + dif2 = dif2 > 0, + dif3 = dif3 > 0, + dif1_cube = dif1_cube > 0, + dif2_cube = dif2_cube > 0, + dif3_cube = dif3_cube > 0) |> + pivot_longer( + cols = !species + ) |> + mutate(set = ifelse(str_detect(name, "cube"), + "cube", + "abv")) |> + mutate(dif = str_sub(name, 1, 4)) |> + select(-name) |> + pivot_wider(names_from = set, + values_from = value) + + +Kappa(table(comp_dir[,c(3,4)])) +``` +Value of k | Strength of agreement +------- | -------- +< 0 | Poor +0.01 - 0.20 | Slight +0.21 - 0.40 | Fair +0.41 - 0.60 | Moderate +0.61 - 0.80 | Substantial +0.81 - 1.00 | Almost perfect + +```{r Kappa for common species} +abv_dif <- abv_data_total |> + filter(category %in% c("Rare")) |> + group_by(cyclus, species) |> + summarise(total = sum(individualCount)) |> + pivot_wider(names_from = cyclus, + names_prefix = "abv_", + values_from = total, + values_fill = 0) |> + mutate(dif1 = abv_2 - abv_1, + dif2 = abv_3 - abv_2, + dif3 = abv_4 - abv_3) +cube_dif <- birdcubeflanders_year |> + filter(species %in% abv_dif$species) |> + group_by(cyclus, species) |> + summarise(total = sum(n)) |> + pivot_wider(names_from = cyclus, + names_prefix = "cube_", + values_from = total, + values_fill = 0) |> + mutate(dif1_cube = cube_2 - cube_1, + dif2_cube = cube_3 - cube_2, + dif3_cube = cube_4 - cube_3) |> + select(species, dif1_cube, dif2_cube, dif3_cube) + +comp_dir <- abv_dif |> + select(species, dif1, dif2, dif3) |> + inner_join(cube_dif) |> + mutate(dif1 = dif1 > 0, + dif2 = dif2 > 0, + dif3 = dif3 > 0, + dif1_cube = dif1_cube > 0, + dif2_cube = dif2_cube > 0, + dif3_cube = dif3_cube > 0) |> + pivot_longer( + cols = !species + ) |> + mutate(set = ifelse(str_detect(name, "cube"), + "cube", + "abv")) |> + mutate(dif = str_sub(name, 1, 4)) |> + select(-name) |> + pivot_wider(names_from = set, + values_from = value) + + +Kappa(table(comp_dir[,c(3,4)])) +``` # 2. Occupancy Rate Comparison @@ -492,9 +595,9 @@ occupancy_2 <- birdcubeflanders_year %>% group_by(species) %>% summarize(occupancy_rate_2 = mean(n())) -occupancy_comparison <- occupancy_1 %>% - inner_join(occupancy_2, by = "species") %>% - summarize(kappa = Kappa(occupancy_rate_1, occupancy_rate_2)$value) +#occupancy_comparison <- occupancy_1 %>% +# inner_join(occupancy_2, by = "species") %>% +# summarize(kappa = Kappa(occupancy_rate_1, occupancy_rate_2)$value) ``` # 3. Species Richness and Composition @@ -526,58 +629,5 @@ species_composition_2 <- birdcubeflanders_year |> bray_curtis <- vegdist(rbind(species_composition_1[-1], species_composition_2[-1]), method = "bray") -``` - - -```{r} -# 4. Spatial Patterns -# Spatial autocorrelation with Moran's I -moran_1 <- moran.test(dataset1$presence, nb2listw(poly2nb(dataset1))) -moran_2 <- moran.test(dataset2$presence, nb2listw(poly2nb(dataset2))) - -# Spatial overlap -overlap <- geosphere::areaPolygon(intersect(st_union(dataset1), - st_union(dataset2))) - -# 5. Model-Based Comparisons (Occupancy Models) -# Fit occupancy models to both datasets -occupancy_model_1 <- zeroinfl(presence ~ species + offset(log(year)) | 1, - data = dataset1) -occupancy_model_2 <- zeroinfl(presence ~ species + offset(log(year)) | 1, - data = dataset2) - -# Compare model coefficients -summary(occupancy_model_1)$coefficients -summary(occupancy_model_2)$coefficients - -# 6. Detection/Non-Detection Agreement -detection_matrix_1 <- dataset1 %>% - group_by(species, geometry) %>% - summarize(detected = any(presence > 0)) - -detection_matrix_2 <- dataset2 %>% - group_by(species, geometry) %>% - summarize(detected = any(presence > 0)) - -concordance <- detection_matrix_1 %>% - inner_join(detection_matrix_2, - by = c("species", "geometry"), - suffix = c("_1", "_2")) %>% - summarize(agreement = mean(detected_1 == detected_2)) - -# 7. Temporal Synchrony (if applicable) -# Compare timing of species occurrences (phenology) -# This depends on more granular temporal data - -# 8. Data Quality Indicators -# Compare sampling effort -sampling_effort_1 <- dataset1 %>% - group_by(geometry, year) %>% - summarize(surveys = n()) - -sampling_effort_2 <- dataset2 %>% - group_by(geometry, year) %>% - summarize(surveys = n()) - -# Compare detection probabilities if available (requires further modeling) +bray_curtis ```