diff --git a/source/expl_analysis.Rmd b/source/expl_analysis.Rmd index b84aed1..79e7886 100644 --- a/source/expl_analysis.Rmd +++ b/source/expl_analysis.Rmd @@ -28,7 +28,7 @@ We summarise the ABV data per year and per kmĀ² so that it is comparable with th ```{r transform abv} abv_data_total_tf <- abv_data_total |> - group_by(species, year, verbatimLocality) |> + group_by(species, year, TAG) |> summarise(n = sum(individualCount)) |> ungroup() ``` @@ -70,9 +70,10 @@ abv_data_total_tf |> summarise(n()) |> st_drop_geometry() ``` + ## The cube data -There are 666 species present in the data. 355 of these were observed less than a 100 times, 197 were observed more than 1000 times. +There are 666 species present in the data. 355 of these were observed less than a 100 times, 197 were observed more than 1000 times. More information can be found [here]( https://docs.b-cubed.eu/occurrence-cube/specification/#dimensions). ```{r} birdcubeflanders_year |> @@ -102,6 +103,17 @@ birdcubeflanders_year |> Do we actually need to do this, because the strength of the cubes comes from having more, albeit unstructured, data. +```{r} +utm_year <- abv_data_total |> + st_drop_geometry() |> + distinct(TAG, year) +``` + +```{r} + +``` + + ```{r} filt_birdcube <- birdcubeflanders_year |> slice(0) @@ -133,24 +145,27 @@ Let's check if these species are observed in the same utm squares for the full p ```{r} range_comp <- function(sel_species, period = 2007:2022) { - set_abv <- abv_data_total_tf |> + set_abv <- abv_data_total |> + st_drop_geometry() |> filter(species %in% sel_species, - year %in% period) |> - group_by(verbatimLocality) |> - summarise(n = sum(n)) + year %in% period, + individualCount > 0) |> + group_by(TAG) |> + summarise(n = sum(individualCount)) set_cube <- birdcubeflanders_year |> + st_drop_geometry() |> filter(species %in% sel_species, year %in% period) |> group_by(TAG) |> summarise(n = sum(n)) - perc_overlap <- as.list(st_covered_by(set_abv, set_cube)) - - n_overlap <- sum(sapply(perc_overlap, function(x) length(x) > 0)) - n_total <- length(perc_overlap) - - (n_overlap / n_total) * 100 + total_abv <- length(set_abv$TAG) + total_overlap <- length(which(set_cube$TAG %in% set_abv$TAG)) + + perc <- (total_overlap/total_abv) * 100 + + list(perc, total_abv, total_overlap) } ``` @@ -158,9 +173,13 @@ range_comp <- function(sel_species, period = 2007:2022) { ```{r} comp_range_data <- as.data.frame(studied_spec) comp_range_data$percentage_overlap <- NA +comp_range_data$abv_squares <- NA +comp_range_data$overlap_abv_birdcube <- NA for (i in studied_spec){ - comp_range_data[comp_range_data$studied_spec == i, 2] <- range_comp(i) + comp_range_data[comp_range_data$studied_spec == i, 2] <- range_comp(i)[1] + comp_range_data[comp_range_data$studied_spec == i, 3] <- range_comp(i)[2] + comp_range_data[comp_range_data$studied_spec == i, 4] <- range_comp(i)[3] } ``` @@ -171,6 +190,21 @@ DT::datatable(comp_range_data) This table shows the percentage of squares were the species was observed in both the cubes and the ABV monitoring compared to all squares where the species was observed during the ABV monitoring. +Fix some problems with species names: +Poecile montanus vs Parus montanus +Dendrocopus major vs Dendrocopos major +Saxicola rubicola vs Saxicola torquatus +```{r} +rgbif::name_usage(2477968) %>% View() +rgbif::name_usage(2477968)$data %>% View() +rgbif::name_usage(7840991)$data %>% View() +rgbif::name_lookup("Dendrocopus major")$data |> View() +rgbif::name_lookup("Dendrocopos major")$data |> View() +rgbif::name_lookup("Dendrocopos major", datasetKey = "d7dddbf4-2cf0-4f39-9b2a-bb099caae36c")$data |> View() +rgbif::name_lookup("Dendrocopus major", datasetKey = "d7dddbf4-2cf0-4f39-9b2a-bb099caae36c")$data |> View() +``` + + ```{r} # Load necessary libraries library(sf)