Skip to content

Commit

Permalink
Merge pull request #86 from b-cubed-eu/map-multi-spec
Browse files Browse the repository at this point in the history
Map multi spec
  • Loading branch information
wlangera authored Jul 17, 2024
2 parents 2a610ce + 45415e3 commit d649333
Show file tree
Hide file tree
Showing 49 changed files with 3,780 additions and 69 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
^man-roxygen$
^organisation.yml$
^pkgdown$
^vignettes/articles$
2 changes: 1 addition & 1 deletion .zenodo.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "gcube: Simulating Biodiversity Data Cubes",
"version": "0.0.1",
"version": "0.1.0",
"license": "MIT",
"upload_type": "software",
"description": "<p>Simulation framework for biodiversity data cubes.<\/p>",
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ abstract: "Simulation framework for biodiversity data cubes."
identifiers:
- type: url
value: https://b-cubed-eu.github.io/gcube/
version: 0.0.1
version: 0.1.0
9 changes: 5 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: gcube
Title: Simulating Biodiversity Data Cubes
Version: 0.0.1
Version: 0.1.0
Authors@R: c(
person("Ward", "Langeraert", , "ward.langeraert@inbo.be", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-5900-8109", affiliation = "Research Institute for Nature and Forest (INBO)")),
Expand Down Expand Up @@ -43,11 +43,14 @@ Imports:
cli,
dplyr,
gstat,
methods,
mnormt,
purrr,
rlang,
sf,
stats,
terra,
tidyr,
vegan,
withr
Suggests:
Expand All @@ -57,13 +60,11 @@ Suggests:
rmarkdown,
testthat (>= 3.0.0),
tidyterra
VignetteBuilder:
knitr
Config/checklist/communities: b3; inbo
Config/checklist/keywords: simulation; data cubes; B-Cubed; biodiversity;
Monte-Carlo
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-GB
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@ export(add_coordinate_uncertainty)
export(apply_manual_sampling_bias)
export(apply_polygon_sampling_bias)
export(create_spatial_pattern)
export(filter_observations)
export(generate_taxonomy)
export(grid_designation)
export(map_add_coordinate_uncertainty)
export(map_filter_observations)
export(map_grid_designation)
export(map_sample_observations)
export(map_simulate_occurrences)
export(map_simulation_functions)
export(sample_from_binormal_circle)
export(sample_from_uniform_circle)
export(sample_observations)
Expand All @@ -19,17 +27,22 @@ importFrom(cli,cli_abort)
importFrom(cli,cli_warn)
importFrom(gstat,gstat)
importFrom(gstat,vgm)
importFrom(methods,formalArgs)
importFrom(mnormt,rmnorm)
importFrom(purrr,pmap)
importFrom(purrr,quietly)
importFrom(rlang,.data)
importFrom(stats,predict)
importFrom(stats,rbinom)
importFrom(stats,rnorm)
importFrom(stats,rpois)
importFrom(stats,runif)
importFrom(stats,setNames)
importFrom(terra,global)
importFrom(terra,rast)
importFrom(terra,rasterize)
importFrom(terra,spatSample)
importFrom(terra,vect)
importFrom(tidyr,unnest)
importFrom(vegan,decostand)
importFrom(withr,local_seed)
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# gcube 0.1.0

* Create data cube for multiple species using `purrr::pmap()`.
* Generate species taxonomy.
* Change vignettes into articles.
* Create article for multi-species approach.
* Add name rationale to README.
* Clarify pkgdown website page names.

# gcube 0.0.1

* Add [`checklist`](https://inbo.github.io/checklist/) infrastructure.
Expand Down
2 changes: 1 addition & 1 deletion R/create_spatial_pattern.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
#' library(tidyterra)
#'
#' # Create polygon
#' plgn <- st_polygon(list(cbind(c(5,10,8,2,3,5), c(2,1,7,9,5,2))))
#' plgn <- st_polygon(list(cbind(c(5, 10, 8, 2, 3, 5), c(2, 1, 7, 9, 5, 2))))
#' ggplot() +
#' geom_sf(data = plgn) +
#' theme_minimal()
Expand Down
71 changes: 71 additions & 0 deletions R/filter_observations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#' Filter detected observations
#'
#' The function filters observations from all observations based on a
#' `sampling_status` column, e.g. created by `sample_observations()`.
#'
#' @param observations_total An sf object with POINT geometry or a simple
#' dataframe with `sampling_status` column containing values `"detected"`.
#' This format is created by `sample_observations()`.
#' @param invert Logical. If `FALSE` (default), filter `"detected"`
#' occurrences. Otherwise, filter all other occurrences.
#'
#' @returns A dataframe or an sf object with POINT geometry containing detected
#' occurrences (if `invert = FALSE`), or other occurrences (if `invert = TRUE`).
#'
#' @export
#'
#' @family main
#'
#' @examples
#' # Load packages
#' library(sf)
#' library(dplyr)
#'
#' # Set seed for reproducibility
#' set.seed(123)
#'
#' # Simulate some occurrence data with coordinates and time points
#' num_points <- 10
#' occurrences <- data.frame(
#' lon = runif(num_points, min = -180, max = 180),
#' lat = runif(num_points, min = -90, max = 90),
#' time_point = 0
#' )
#'
#' # Convert the occurrence data to an sf object
#' occurrences_sf <- st_as_sf(occurrences, coords = c("lon", "lat"))
#'
#' # Sample observations without sampling bias
#' observations_total_sf <- sample_observations(
#' occurrences_sf,
#' detection_probability = 0.8,
#' sampling_bias = "no_bias",
#' seed = 123
#' )
#'
#' # Filter detected observations
#' filter_observations(observations_total_sf)
#'
#' # Filter undetected observations
#' filter_observations(observations_total_sf, invert = TRUE)

filter_observations <- function(observations_total, invert = FALSE) {
### Start checks
# Check if observations_total is a dataframe and/or an sf object
stopifnot("`observations_total` must be an sf object or a dataframe." =
inherits(observations_total, "sf") ||
inherits(observations_total, "data.frame"))

# Check if invert is a logical vector of length 1
stopifnot("`invert` must be a logical vector of length 1." =
assertthat::is.flag(invert) && assertthat::noNA(invert))
### End checks


# Filter dataframe
if (invert) {
observations_total[observations_total$sampling_status != "detected", ]
} else {
observations_total[observations_total$sampling_status == "detected", ]
}
}
175 changes: 175 additions & 0 deletions R/generate_taxonomy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#' Generate a taxonomic hierarchy
#'
#' This function generates a random taxonomic hierarchy for a specified numbers
#' of species, genera, families, orders, classes, phyla, and kingdoms. The
#' output is a data frame with the hierarchical classification for each species.
#'
#' The function works by randomly assigning species to genera, genera to
#' families, families to orders, orders to classes, classes to phyla, and phyla
#' to kingdoms. Sampling is done with replacement, meaning that multiple
#' lower-level taxa (e.g., species) can be assigned to the same higher-level
#' taxon (e.g., genus).
#'
#' @param num_species Number of species to generate, or a dataframe. With a
#' dataframe, the function will create a species with taxonomic hierarchy for
#' each row. The original columns of the dataframe will be retained in the
#' output.
#' @param num_genera Number of genera to generate.
#' @param num_families Number of families to generate.
#' @param num_orders Number of orders to generate. Defaults to 1.
#' @param num_classes Number of classes to generate. Defaults to 1.
#' @param num_phyla Number of phyla to generate. Defaults to 1.
#' @param num_kingdoms Number of kingdoms to generate. Defaults to 1.
#' @param seed The seed for random number generation to make results
#' reproducible. If `NA` (the default), no seed is used.
#'
#' @return A data frame with the taxonomic classification of each species. If
#' `num_species` is a dataframe, the taxonomic classification is added to this
#' input dataframe. The original columns of the dataframe will be retained in
#' the output.
#'
#' @export
#'
#' @import dplyr
#' @importFrom withr local_seed
#'
#' @family multispecies
#'
#' @examples
#' # Create simple taxonomic hierarchy
#' generate_taxonomy(
#' num_species = 5,
#' num_genera = 3,
#' num_families = 2,
#' seed = 123)
#'
#' # Add taxonomic hierarchy to a dataframe
#' existing_df <- data.frame(
#' count = c(1, 2, 5, 4, 8, 9, 3),
#' det_prob = c(0.9, 0.9, 0.9, 0.8, 0.5, 0.2, 0.2)
#' )
#'
#' generate_taxonomy(
#' num_species = existing_df,
#' num_genera = 4,
#' num_families = 2,
#' seed = 125)

generate_taxonomy <- function(
num_species,
num_genera,
num_families,
num_orders = 1,
num_classes = 1,
num_phyla = 1,
num_kingdoms = 1,
seed = NA) {
### Start checks
# 1. Check input type and length
# Check if numbers are single counts (or dataframe)
stopifnot("`num_species` should be a single integer or a dataframe." =
assertthat::is.count(num_species) ||
inherits(num_species, "data.frame"))
stopifnot("`num_genera` should be a single integer." =
assertthat::is.count(num_genera))
stopifnot("`num_families` should be a single integer." =
assertthat::is.count(num_families))
stopifnot("`num_orders` should be a single integer." =
assertthat::is.count(num_orders))
stopifnot("`num_phyla` should be a single integer." =
assertthat::is.count(num_phyla))
stopifnot("`num_kingdoms` should be a single integer." =
assertthat::is.count(num_kingdoms))

# Check if seed is NA or a number
stopifnot("`seed` must be a numeric vector of length 1 or NA." =
assertthat::is.number(seed) || is.na(seed))

# 2. Other checks
# Validate dataframe input
if (inherits(num_species, "data.frame")) {
# Define dataframe and number of species correctly
species_df <- num_species
num_species <- nrow(species_df)

# Generate species names
species_df$species <- paste0("species", seq_len(num_species))
} else {
# Generate species names
species_df <- data.frame(species = paste0("species", seq_len(num_species)))
}

# Check if number of species is smaller than number of genera is smaller than
# number of families ...
stopifnot(
"Number of genera should be smaller or equal to number of species." =
num_species >= num_genera)
stopifnot(
"Number of families should be smaller or equal to number of genera." =
num_genera >= num_families)
stopifnot(
"Number of orders should be smaller or equal to number of families." =
num_families >= num_orders)
stopifnot(
"Number of classes should be smaller or equal to number of orders." =
num_orders >= num_classes)
stopifnot(
"Number of phyla should be smaller or equal to number of classes." =
num_classes >= num_phyla)
stopifnot(
"Number of kingdoms should be smaller or equal to number of phyla." =
num_phyla >= num_kingdoms)
### End checks

# Set seed if provided
if (!is.na(seed)) {
withr::local_seed(seed)
}

# Assign species to genera
genera <- paste0("genus", seq_len(num_genera))
species_to_genera <- sample(genera, num_species, replace = TRUE)

# Assign genera to families
families <- paste0("family", seq_len(num_families))
genera_to_families <- data.frame(
genus = genera,
family = sample(families, num_genera, replace = TRUE))

# Assign families to orders
orders <- paste0("order", seq_len(num_orders))
families_to_orders <- data.frame(
family = families,
order = sample(orders, num_families, replace = TRUE))

# Assign orders to classes
classes <- paste0("class", seq_len(num_classes))
orders_to_classes <- data.frame(
order = orders,
class = sample(classes, num_orders, replace = TRUE))

# Assign classes to phyla
phyla <- paste0("phylum", seq_len(num_phyla))
classes_to_phyla <- data.frame(
class = classes,
phylum = sample(phyla, num_classes, replace = TRUE))

# Assign phyla to kingdoms
kingdoms <- paste0("kingdom", seq_len(num_kingdoms))
phyla_to_kingdoms <- data.frame(
phylum = phyla,
kingdom = sample(kingdoms, num_phyla, replace = TRUE))

# Create a data frame to store the taxonomy
taxonomy <- species_df %>%
dplyr::mutate(genus = species_to_genera) %>%
dplyr::left_join(genera_to_families, by = "genus") %>%
dplyr::left_join(families_to_orders, by = "family") %>%
dplyr::left_join(orders_to_classes, by = "order") %>%
dplyr::left_join(classes_to_phyla, by = "class") %>%
dplyr::left_join(phyla_to_kingdoms, by = "phylum") %>%
dplyr::select("species", "genus", "family", "order", "class", "phylum",
"kingdom", everything())

return(taxonomy)
}
Loading

0 comments on commit d649333

Please sign in to comment.