Merge pull request #86 from b-cubed-eu/map-multi-spec

Map multi spec
b-cubed-eu · Jul 17, 2024 · d649333 · d649333
2 parents 2a610ce + 45415e3
commit d649333
Show file tree

Hide file tree

Showing 49 changed files with 3,780 additions and 69 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -17,3 +17,4 @@
 ^man-roxygen$
 ^organisation.yml$
 ^pkgdown$
+^vignettes/articles$
diff --git a/.zenodo.json b/.zenodo.json
@@ -1,6 +1,6 @@
 {
   "title": "gcube: Simulating Biodiversity Data Cubes",
-  "version": "0.0.1",
+  "version": "0.1.0",
   "license": "MIT",
   "upload_type": "software",
   "description": "<p>Simulation framework for biodiversity data cubes.<\/p>",

diff --git a/CITATION.cff b/CITATION.cff
@@ -25,4 +25,4 @@ abstract: "Simulation framework for biodiversity data cubes."
 identifiers:
 - type: url
   value: https://b-cubed-eu.github.io/gcube/
-version: 0.0.1
+version: 0.1.0
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: gcube
 Title: Simulating Biodiversity Data Cubes
-Version: 0.0.1
+Version: 0.1.0
 Authors@R: c(
     person("Ward", "Langeraert", , "ward.langeraert@inbo.be", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-5900-8109", affiliation = "Research Institute for Nature and Forest (INBO)")),
@@ -43,11 +43,14 @@ Imports:
     cli,
     dplyr,
     gstat,
+    methods,
     mnormt,
+    purrr,
     rlang,
     sf,
     stats,
     terra,
+    tidyr,
     vegan,
     withr
 Suggests: 
@@ -57,13 +60,11 @@ Suggests:
     rmarkdown,
     testthat (>= 3.0.0),
     tidyterra
-VignetteBuilder: 
-    knitr
 Config/checklist/communities: b3; inbo
 Config/checklist/keywords: simulation; data cubes; B-Cubed; biodiversity;
     Monte-Carlo
 Config/testthat/edition: 3
 Encoding: UTF-8
 Language: en-GB
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,7 +4,15 @@ export(add_coordinate_uncertainty)
 export(apply_manual_sampling_bias)
 export(apply_polygon_sampling_bias)
 export(create_spatial_pattern)
+export(filter_observations)
+export(generate_taxonomy)
 export(grid_designation)
+export(map_add_coordinate_uncertainty)
+export(map_filter_observations)
+export(map_grid_designation)
+export(map_sample_observations)
+export(map_simulate_occurrences)
+export(map_simulation_functions)
 export(sample_from_binormal_circle)
 export(sample_from_uniform_circle)
 export(sample_observations)
@@ -19,17 +27,22 @@ importFrom(cli,cli_abort)
 importFrom(cli,cli_warn)
 importFrom(gstat,gstat)
 importFrom(gstat,vgm)
+importFrom(methods,formalArgs)
 importFrom(mnormt,rmnorm)
+importFrom(purrr,pmap)
+importFrom(purrr,quietly)
 importFrom(rlang,.data)
 importFrom(stats,predict)
 importFrom(stats,rbinom)
 importFrom(stats,rnorm)
 importFrom(stats,rpois)
 importFrom(stats,runif)
+importFrom(stats,setNames)
 importFrom(terra,global)
 importFrom(terra,rast)
 importFrom(terra,rasterize)
 importFrom(terra,spatSample)
 importFrom(terra,vect)
+importFrom(tidyr,unnest)
 importFrom(vegan,decostand)
 importFrom(withr,local_seed)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,12 @@
+# gcube 0.1.0
+
+* Create data cube for multiple species using `purrr::pmap()`.
+* Generate species taxonomy.
+* Change vignettes into articles.
+* Create article for multi-species approach.
+* Add name rationale to README.
+* Clarify pkgdown website page names.
+
 # gcube 0.0.1
 
 * Add [`checklist`](https://inbo.github.io/checklist/) infrastructure.

diff --git a/R/create_spatial_pattern.R b/R/create_spatial_pattern.R
@@ -46,7 +46,7 @@
 #' library(tidyterra)
 #'
 #' # Create polygon
-#' plgn <- st_polygon(list(cbind(c(5,10,8,2,3,5), c(2,1,7,9,5,2))))
+#' plgn <- st_polygon(list(cbind(c(5, 10, 8, 2, 3, 5), c(2, 1, 7, 9, 5, 2))))
 #' ggplot() +
 #'   geom_sf(data = plgn) +
 #'   theme_minimal()

diff --git a/R/filter_observations.R b/R/filter_observations.R
@@ -0,0 +1,71 @@
+#' Filter detected observations
+#'
+#' The function filters observations from all observations based on a
+#' `sampling_status` column, e.g. created by `sample_observations()`.
+#'
+#' @param observations_total An sf object with POINT geometry or a simple
+#' dataframe with `sampling_status` column containing values `"detected"`.
+#' This format is created by `sample_observations()`.
+#' @param invert Logical. If `FALSE` (default), filter `"detected"`
+#' occurrences. Otherwise, filter all other occurrences.
+#'
+#' @returns A dataframe or an sf object with POINT geometry containing detected
+#' occurrences (if `invert = FALSE`), or other occurrences (if `invert = TRUE`).
+#'
+#' @export
+#'
+#' @family main
+#'
+#' @examples
+#' # Load packages
+#' library(sf)
+#' library(dplyr)
+#'
+#' # Set seed for reproducibility
+#' set.seed(123)
+#'
+#' # Simulate some occurrence data with coordinates and time points
+#' num_points <- 10
+#' occurrences <- data.frame(
+#'   lon = runif(num_points, min = -180, max = 180),
+#'   lat = runif(num_points, min = -90, max = 90),
+#'   time_point = 0
+#'   )
+#'
+#' # Convert the occurrence data to an sf object
+#' occurrences_sf <- st_as_sf(occurrences, coords = c("lon", "lat"))
+#'
+#' # Sample observations without sampling bias
+#' observations_total_sf <- sample_observations(
+#'   occurrences_sf,
+#'   detection_probability = 0.8,
+#'   sampling_bias = "no_bias",
+#'   seed = 123
+#'   )
+#'
+#' # Filter detected observations
+#' filter_observations(observations_total_sf)
+#'
+#' # Filter undetected observations
+#' filter_observations(observations_total_sf, invert = TRUE)
+
+filter_observations <- function(observations_total, invert = FALSE) {
+  ### Start checks
+  # Check if observations_total is a dataframe and/or an sf object
+  stopifnot("`observations_total` must be an sf object or a dataframe." =
+              inherits(observations_total, "sf") ||
+              inherits(observations_total, "data.frame"))
+
+  # Check if invert is a logical vector of length 1
+  stopifnot("`invert` must be a logical vector of length 1." =
+              assertthat::is.flag(invert) && assertthat::noNA(invert))
+  ### End checks
+
+
+  # Filter dataframe
+  if (invert) {
+    observations_total[observations_total$sampling_status != "detected", ]
+  } else {
+    observations_total[observations_total$sampling_status == "detected", ]
+  }
+}
diff --git a/R/generate_taxonomy.R b/R/generate_taxonomy.R
@@ -0,0 +1,175 @@
+#' Generate a taxonomic hierarchy
+#'
+#' This function generates a random taxonomic hierarchy for a specified numbers
+#' of species, genera, families, orders, classes, phyla, and kingdoms. The
+#' output is a data frame with the hierarchical classification for each species.
+#'
+#' The function works by randomly assigning species to genera, genera to
+#' families, families to orders, orders to classes, classes to phyla, and phyla
+#' to kingdoms. Sampling is done with replacement, meaning that multiple
+#' lower-level taxa (e.g., species) can be assigned to the same higher-level
+#' taxon (e.g., genus).
+#'
+#' @param num_species Number of species to generate, or a dataframe. With a
+#' dataframe, the function will create a species with taxonomic hierarchy for
+#' each row. The original columns of the dataframe will be retained in the
+#' output.
+#' @param num_genera Number of genera to generate.
+#' @param num_families Number of families to generate.
+#' @param num_orders Number of orders to generate. Defaults to 1.
+#' @param num_classes Number of classes to generate. Defaults to 1.
+#' @param num_phyla Number of phyla to generate. Defaults to 1.
+#' @param num_kingdoms Number of kingdoms to generate. Defaults to 1.
+#' @param seed The seed for random number generation to make results
+#' reproducible. If `NA` (the default), no seed is used.
+#'
+#' @return A data frame with the taxonomic classification of each species. If
+#' `num_species` is a dataframe, the taxonomic classification is added to this
+#' input dataframe. The original columns of the dataframe will be retained in
+#' the output.
+#'
+#' @export
+#'
+#' @import dplyr
+#' @importFrom withr local_seed
+#'
+#' @family multispecies
+#'
+#' @examples
+#' # Create simple taxonomic hierarchy
+#' generate_taxonomy(
+#'   num_species = 5,
+#'   num_genera = 3,
+#'   num_families = 2,
+#'   seed = 123)
+#'
+#' # Add taxonomic hierarchy to a dataframe
+#' existing_df <- data.frame(
+#'   count = c(1, 2, 5, 4, 8, 9, 3),
+#'   det_prob = c(0.9, 0.9, 0.9, 0.8, 0.5, 0.2, 0.2)
+#'   )
+#'
+#' generate_taxonomy(
+#'   num_species = existing_df,
+#'   num_genera = 4,
+#'   num_families = 2,
+#'   seed = 125)
+
+generate_taxonomy <- function(
+    num_species,
+    num_genera,
+    num_families,
+    num_orders = 1,
+    num_classes = 1,
+    num_phyla = 1,
+    num_kingdoms = 1,
+    seed = NA) {
+  ### Start checks
+  # 1. Check input type and length
+  # Check if numbers are single counts (or dataframe)
+  stopifnot("`num_species` should be a single integer or a dataframe." =
+              assertthat::is.count(num_species) ||
+              inherits(num_species, "data.frame"))
+  stopifnot("`num_genera` should be a single integer." =
+              assertthat::is.count(num_genera))
+  stopifnot("`num_families` should be a single integer." =
+              assertthat::is.count(num_families))
+  stopifnot("`num_orders` should be a single integer." =
+              assertthat::is.count(num_orders))
+  stopifnot("`num_phyla` should be a single integer." =
+              assertthat::is.count(num_phyla))
+  stopifnot("`num_kingdoms` should be a single integer." =
+              assertthat::is.count(num_kingdoms))
+
+  # Check if seed is NA or a number
+  stopifnot("`seed` must be a numeric vector of length 1 or NA." =
+              assertthat::is.number(seed) || is.na(seed))
+
+  # 2. Other checks
+  # Validate dataframe input
+  if (inherits(num_species, "data.frame")) {
+    # Define dataframe and number of species correctly
+    species_df <- num_species
+    num_species <- nrow(species_df)
+
+    # Generate species names
+    species_df$species <- paste0("species", seq_len(num_species))
+  } else {
+    # Generate species names
+    species_df <- data.frame(species = paste0("species", seq_len(num_species)))
+  }
+
+  # Check if number of species is smaller than number of genera is smaller than
+  # number of families ...
+  stopifnot(
+    "Number of genera should be smaller or equal to number of species." =
+      num_species >= num_genera)
+  stopifnot(
+    "Number of families should be smaller or equal to number of genera." =
+      num_genera >= num_families)
+  stopifnot(
+    "Number of orders should be smaller or equal to number of families." =
+      num_families >= num_orders)
+  stopifnot(
+    "Number of classes should be smaller or equal to number of orders." =
+      num_orders >= num_classes)
+  stopifnot(
+    "Number of phyla should be smaller or equal to number of classes." =
+      num_classes >= num_phyla)
+  stopifnot(
+    "Number of kingdoms should be smaller or equal to number of phyla." =
+      num_phyla >= num_kingdoms)
+  ### End checks
+
+  # Set seed if provided
+  if (!is.na(seed)) {
+    withr::local_seed(seed)
+  }
+
+  # Assign species to genera
+  genera <- paste0("genus", seq_len(num_genera))
+  species_to_genera <- sample(genera, num_species, replace = TRUE)
+
+  # Assign genera to families
+  families <- paste0("family", seq_len(num_families))
+  genera_to_families <- data.frame(
+    genus = genera,
+    family = sample(families, num_genera, replace = TRUE))
+
+  # Assign families to orders
+  orders <- paste0("order", seq_len(num_orders))
+  families_to_orders <- data.frame(
+    family = families,
+    order = sample(orders, num_families, replace = TRUE))
+
+  # Assign orders to classes
+  classes <- paste0("class", seq_len(num_classes))
+  orders_to_classes <- data.frame(
+    order = orders,
+    class = sample(classes, num_orders, replace = TRUE))
+
+  # Assign classes to phyla
+  phyla <- paste0("phylum", seq_len(num_phyla))
+  classes_to_phyla <- data.frame(
+    class = classes,
+    phylum = sample(phyla, num_classes, replace = TRUE))
+
+  # Assign phyla to kingdoms
+  kingdoms <- paste0("kingdom", seq_len(num_kingdoms))
+  phyla_to_kingdoms <- data.frame(
+    phylum = phyla,
+    kingdom = sample(kingdoms, num_phyla, replace = TRUE))
+
+  # Create a data frame to store the taxonomy
+  taxonomy <- species_df %>%
+    dplyr::mutate(genus = species_to_genera) %>%
+    dplyr::left_join(genera_to_families, by = "genus") %>%
+    dplyr::left_join(families_to_orders, by = "family") %>%
+    dplyr::left_join(orders_to_classes, by = "order") %>%
+    dplyr::left_join(classes_to_phyla, by = "class") %>%
+    dplyr::left_join(phyla_to_kingdoms, by = "phylum") %>%
+    dplyr::select("species", "genus", "family", "order", "class", "phylum",
+                  "kingdom", everything())
+
+  return(taxonomy)
+}