Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
AugustT committed Feb 17, 2021
2 parents ba7f703 + c10ea80 commit 9f3932a
Show file tree
Hide file tree
Showing 18 changed files with 174 additions and 102 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: sparta
Type: Package
Title: Trend Analysis for Unstructured Data
Version: 0.2.10
Date: 2020-01-07
Version: 0.2.14
Date: 2020-02-15
Authors@R: c(person("Tom", "August", role = c("aut", "cre"), email = "tomaug@ceh.ac.uk"),
person("Gary", "Powney", role = c("aut")),
person("Charlie", "Outhwaite", role = c("aut")),
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@ export(siteSelectionMinTP)
export(telfer)
export(visitsSummary)
import(LearnBayes)
import(dplyr)
import(gdata)
import(ggplot2)
import(lme4)
import(rmarkdown)
import(sp)
importFrom(boot,inv.logit)
importFrom(dplyr,arrange)
importFrom(dplyr,count)
importFrom(dplyr,distinct)
importFrom(dplyr,group_by)
importFrom(dplyr,summarise)
Expand Down
4 changes: 3 additions & 1 deletion R/dataMetrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
#' @param formattedData output from formattedOccData
#' @param verbose Logical. Default (`FALSE`) returns seven metrics.
#'
#' @references Pocock, Logie, Isaac, Outhwaite & August. Rapid assessment of the suitability of multi-species citizen science datasets for occupancy trend analysis. bioRxiv 813626 (2019) doi:10.1101/813626.
#' @references Pocock, Logie, Isaac, Outhwaite & August (2019).
#' Rapid assessment of the suitability of multi-species citizen
#' science datasets for occupancy trend analysis. \emph{bioRxiv} 813626 doi:10.1101/813626.
#' @export


Expand Down
8 changes: 4 additions & 4 deletions R/formatOccData.r
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
#'
#' # Create data
#' n <- 15000 #size of dataset
#' nyr <- 20 # number of years in data
#' nyear <- 20 # number of years in data
#' nSurveys <- 100 # set number of dates
#' nSites <- 50 # set number of sites
#'
#' # Create somes dates
#' first <- as.Date(strptime("2010/01/01", "%Y/%m/%d"))
#' last <- as.Date(strptime(paste(2010+(nyr-1),"/12/31", sep=''), "%Y/%m/%d"))
#' first <- as.Date(strptime("2010/01/01", format="%Y/%m/%d"))
#' last <- as.Date(strptime(paste(2010+(nyear-1),"/12/31", sep=''), format="%Y/%m/%d"))
#' dt <- last-first
#' rDates <- first + (runif(nSurveys)*dt)
#'
Expand Down Expand Up @@ -157,7 +157,7 @@ formatOccData <- function(taxa, site, survey, replicate = NULL, closure_period =
temp <- taxa_data[,c('taxa','visit')]
names(temp)[1] <- "species_name"
temp$pres <- TRUE # add TRUE column which will populate the spp with visit matrix/dataframe
spp_vis <- dcast(temp, formula = visit ~ species_name, value.var = "pres", fill = FALSE) # This is the dataframe that contains a row per visit and a column for each species present or not. USed to create the focal column in the next step
spp_vis <- dcast(temp, formula = visit ~ species_name, value.var = "pres", fill = FALSE, fun=unique) # This is the dataframe that contains a row per visit and a column for each species present or not. USed to create the focal column in the next step


# Add Julian Day if needed
Expand Down
2 changes: 1 addition & 1 deletion R/formulaBuilder.r
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ formulaBuilder <- function(family, list_length, site_effect, overdispersion){
model_formula <- ifelse(family == 'Bernoulli',
'taxa ~ year',
'cbind(successes, failures) ~ year')
if(list_length) model_formula <- paste(model_formula, '+ listLength')
if(list_length) model_formula <- paste(model_formula, '+ log(listLength)')
if(site_effect) model_formula <- paste(model_formula, '+ (1|site)')
if(overdispersion) model_formula <- paste(model_formula, '+ (1|obs)')

Expand Down
7 changes: 7 additions & 0 deletions R/gridref_regions-data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#' @name gridref_regions
#' @title A dataset of UK grid references with regions
#' @description This is a dataset of 1km-square UK grid references with regions ENGLAND, WALES, SCOTLAND, NORTHERN_IRELAND
#' @docType data
#' @usage gridref_regions
#' @author Rob Cooke, 2021-02-15
NULL
131 changes: 90 additions & 41 deletions R/occDetFunc.r
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#' @param additional.init.values A named list giving user specified initial values to
#' be added to the defaults.
#' @param return_data Logical, if \code{TRUE} (default) the BUGS data object is returned with the data
#' @param saveMatrix Logical, if \code{FALSE} (default) the sims.matrix element of the jags object is omitted, in order to reduce the filesize.
#' @param criterion Determines whether the model should be run. If an integer then this defines the threshold number of records (50 in Outhwaite et al 2019).
#' Other options are `EqualWt` or `HighSpec`, which define the application of "rules of thumb" defined in Pocock et al 2019.
#' Defaults to 1, in which case the model is applied for so long there is a single record of the focal species.
Expand Down Expand Up @@ -178,7 +179,7 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
seed = NULL, model.function = NULL, regional_codes = NULL,
region_aggs = NULL, additional.parameters = NULL,
additional.BUGS.elements = NULL, additional.init.values = NULL,
return_data = TRUE, criterion = 1, provenance = NULL){
return_data = FALSE, criterion = 1, provenance = NULL, saveMatrix = FALSE){

################## BASIC CHECKS
# first run the error checks
Expand All @@ -199,9 +200,12 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr

# strip out the visits to sites that were visited in just one year
i <- occDetdata$site %in% sites_to_include
occDetdata <- occDetdata[i,]
spp_vis <- spp_vis[i,]

if(sum(i) > 0){
occDetdata <- occDetdata[i,]
spp_vis <- spp_vis[i,]
} else stop(paste0("There are no sites visited in at least ", nyr, " years."))

# calcluate a set of data metrics for this species
data_Metrics <- dataMetrics(sp = taxa_name,
formattedData = list(occDetdata=occDetdata, spp_vis=spp_vis))
Expand Down Expand Up @@ -276,8 +280,16 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
if(!is.null(regional_codes)){
if(!inherits(regional_codes, 'data.frame')) stop("regional_codes should be a data.frame")

# check whether there is a column called "site".
#If not, let's assume that the site column is the first in the dataframe
#NB previous behaviour was to assume *both* that it was column 1 and named 'site'
if(!"site" %in% names(regional_codes)) {
warning(paste0("renaming ", names(regional_codes)[1], " as 'site'"))
names(regional_codes)[1] <- "site"
}

# remove locations that are not in the data
abs_sites <- as.character(regional_codes[,1])[!as.character(regional_codes[,1]) %in% as.character(occDetdata$site)]
abs_sites <- as.character(regional_codes$site)[!as.character(regional_codes$site) %in% as.character(occDetdata$site)]
if(length(abs_sites) > 0){
warning(paste(length(abs_sites), 'sites are in regional_codes but not in occurrence data'))
}
Expand All @@ -304,9 +316,9 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
if(length(sites_no_region2) >= 1)
warning(paste(length(sites_no_region2), 'sites are in occurrence data but not in regional data and will be removed'))

# strip these same sites out of the occDetdata
# strip these same sites out of the occDetdata & the regional codes
bad_sites <- unique(c(abs_sites, sites_multi_row, sites_multi_region, sites_no_region, sites_no_region2))
regional_codes <- regional_codes[!regional_codes$site %in% bad_sites, ]
regional_codes <- subset(regional_codes, !site %in% bad_sites)
occDetdata <- subset(occDetdata, !site %in% bad_sites)
}

Expand Down Expand Up @@ -338,7 +350,8 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
}

# year and site need to be numeric starting from 1 to length of them. This is due to the way the bugs code is written
site_match <- unique(data.frame(name = occDetdata$site, id = as.numeric(as.factor(occDetdata$site))))
nsite <- length(unique(occDetdata$site))
site_match <- data.frame(name = unique(occDetdata$site), id = 1:nsite)
occDetdata <- merge(occDetdata, site_match, by.x='site', by.y="name")

# need to get a measure of whether the species was on that site in that year, unequivocally, in zst
Expand Down Expand Up @@ -424,8 +437,13 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
# HERE IS THE BUGS DATA
bugs_data <- with(occDetdata,
list(y = as.numeric(focal), Year = TP, Site = id,
nyear = nTP, nsite = nrow(zst), nvisit = nrow(occDetdata)))
nyear = nTP, nsite = nsite, nvisit = nrow(occDetdata)))

# temporary test
if(max(occDetdata$id) != bugs_data$nsite) stop(paste0("Site idenitifier exceeds nsite (",
max(occDetdata$id), nsite,")"))


for(btype in modeltype){ # one call per element of modeltype: each adds a section
bugs_data <- getBugsData(bugs_data, modeltype = btype,
occDetData = occDetdata)
Expand All @@ -450,10 +468,13 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
# removed unwanted bugs elements
bugs_data <- bugs_data[!names(bugs_data) %in% c('psi0.a', 'psi0.b')]

# expand the lookup table to include regions
regional_lookup <- merge(regional_codes, site_match, by.y="name", by.x="site")

zero_sites <- NULL
for(region in region_names){
if(sum(regional_codes[ , region]) != 0){
bugs_data[paste0('r_', region)] <- list(regional_codes[occDetdata$id,region])
bugs_data[paste0('r_', region)] <- list(regional_lookup[order(regional_lookup$id),region])
bugs_data[paste0('nsite_r_', region)] <- sum(regional_codes[, region])
} else {
zero_sites <- c(zero_sites, region)
Expand Down Expand Up @@ -485,29 +506,54 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
}
}

regions_years <- list()
regions_nobs <- list()
regions_sites <-list()

bugs_data_copy <- merge(bugs_data_copy, regional_codes, all.x = TRUE)

# add regional codes to this copy and get n_obs, max and min years and year gaps for each region
for(region_name in region_names){
regions_nobs[paste0('n_obs_','r_', region_name)] <- sum(bugs_data_copy$y * bugs_data_copy[,region_name])
regions_sites[paste0('n_sites_','r_', region_name)] <- sum(bugs_data_copy[,region_name])
current_r <- bugs_data_copy$y * bugs_data_copy[,region_name] * bugs_data_copy$year
current_r <- subset(current_r,current_r !=0)
current_rmin <- (min_year-1) + min(current_r)
current_rmax <- (min_year-1) + max(current_r)
regions_years[paste0('min_year_data_','r_', region_name)] <- current_rmin
regions_years[paste0('max_year_data_','r_', region_name)] <- current_rmax
current_datagaps <- dataGaps(current_r, min_year, max_year, current_rmin, current_rmax)
regions_years[paste0('gap_start_','r_', region_name)] <- current_datagaps$gap_start
regions_years[paste0('gap_end_','r_', region_name)] <- current_datagaps$gap_end
regions_years[paste0('gap_middle_','r_', region_name)] <- current_datagaps$gap_middle
}
regions_years <- list()
regions_nobs <- list()
regions_sites <-list()

bugs_data_copy <- merge(bugs_data_copy, regional_codes, all.x = TRUE)

# add regional codes to this copy and get n_obs, max and min years and year gaps for each region
for(region_name in region_names){

regions_nobs[paste0('n_obs_','r_', region_name)] <- sum(bugs_data_copy$y * bugs_data_copy[,region_name])
regions_sites[paste0('n_sites_','r_', region_name)] <- sum(bugs_data_copy[,region_name])
current_r <- bugs_data_copy$y * bugs_data_copy[,region_name] * bugs_data_copy$year
current_r <- subset(current_r,current_r !=0)

if(length(current_r) > 2){

current_rmin <- (min_year-1) + min(current_r)
current_rmax <- (min_year-1) + max(current_r)
regions_years[paste0('min_year_data_','r_', region_name)] <- current_rmin
regions_years[paste0('max_year_data_','r_', region_name)] <- current_rmax
current_datagaps <- dataGaps(current_r, min_year, max_year, current_rmin, current_rmax)
regions_years[paste0('gap_start_','r_', region_name)] <- current_datagaps$gap_start
regions_years[paste0('gap_end_','r_', region_name)] <- current_datagaps$gap_end
regions_years[paste0('gap_middle_','r_', region_name)] <- current_datagaps$gap_middle

} else if(length(current_r) == 1) {

current_rmin <- (min_year-1) + min(current_r)
current_rmax <- (min_year-1) + max(current_r)
regions_years[paste0('min_year_data_','r_', region_name)] <- current_rmin
regions_years[paste0('max_year_data_','r_', region_name)] <- current_rmax
current_datagaps <- dataGaps(current_r, min_year, max_year, current_rmin, current_rmax)
regions_years[paste0('gap_start_','r_', region_name)] <- current_datagaps$gap_start
regions_years[paste0('gap_end_','r_', region_name)] <- current_datagaps$gap_end
regions_years[paste0('gap_middle_','r_', region_name)] <- NA

} else if(length(current_r) < 1){

regions_years[paste0('min_year_data_','r_', region_name)] <- NA
regions_years[paste0('max_year_data_','r_', region_name)] <- NA
regions_years[paste0('gap_start_','r_', region_name)] <- NA
regions_years[paste0('gap_end_','r_', region_name)] <- NA
regions_years[paste0('gap_middle_','r_', region_name)] <- NA

}
}
}

# add max and min data years for the whole dataset
all_years_data <- bugs_data_copy$y * bugs_data_copy$year
all_years_data <- subset(all_years_data, all_years_data !=0)
Expand Down Expand Up @@ -598,14 +644,14 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
)

dir.create(path = output_dir, showWarnings = FALSE) # create the top results folder
if (class(error_status) == "try-error" ){

warning('JAGS returned an error when modelling', taxa_name, 'error:', error_status[1])
return(NULL)

} # end of "if(proceed)"
}
} # end of "if(proceed)"

if (class(error_status) == "try-error" ){
warning('JAGS returned an error when modelling', taxa_name, 'error:', error_status[1])
return(NULL)
} else {

########################################## Add metadata

# calcluate number of site:year combinations with repeat visits (across the whole dataset)
Expand Down Expand Up @@ -656,7 +702,7 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
)

# add regional elements if applicable
if(!is.null(regional_codes)){
if(!is.null(regional_codes) & proceed){
MD$summary$region_nobs <- regions_nobs
MD$summary$region_years <- regions_years
MD$summary$region_nsite <- regions_sites
Expand All @@ -678,13 +724,16 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
metadata[name] <- list(MD)
attr(out, 'metadata') <- metadata

if(!saveMatrix) out$BUGSoutput$sims.matrix <- NULL

out$SPP_NAME <- taxa_name
out$min_year <- min_year
out$max_year <- max_year
out$sites_included <- site_match
out$sites_included <- ifelse(test = proceed, yes = site_match, no = NA)
out$nsites <- bugs_data$nsite
out$nvisits <- bugs_data$nvisit
out$species_observations <- sum(bugs_data$y)
out$sparta_version <- packages["sparta"]
if(!is.null(regional_codes)) out$regions <- region_names
if(!is.null(region_aggs)) out$region_aggs <- region_aggs
if(return_data) out$bugs_data <- bugs_data
Expand All @@ -693,5 +742,5 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
class(out) <- 'occDet'
if(write_results) save(out, file = file.path(output_dir, paste(taxa_name, ".rdata", sep = "")))
return(out)
}
}
}

7 changes: 5 additions & 2 deletions R/reportingRateModel.r
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,11 @@
#' @importFrom plyr rbind.fill
#' @importFrom dplyr distinct
#' @references Roy, H.E., Adriaens, T., Isaac, N.J.B. et al. (2012) Invasive alien predator
#' causes rapid declines of native European ladybirds. Diversity & Distributions,
#' 18, 717-725.
#' causes rapid declines of native European ladybirds. \emph{Diversity & Distributions},
#' 18: 717-725.
#' @references Isaac, N.J.B. et al. (2014) Extracting robust trends in species' distributions
#' from unstructured opportunistic data: a comparison of methods.
#' \emph{bioRXiv} 006999, https://doi.org/10.1101/006999.

reportingRateModel <- function(taxa, site, time_period, list_length = FALSE, site_effect = FALSE,
species_to_include = unique(taxa), overdispersion = FALSE,
Expand Down
2 changes: 1 addition & 1 deletion R/visitsSummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#' }
#' @export
#' @importFrom dplyr count

#' @importFrom dplyr arrange

visitsSummary <- function(x) {

Expand Down
Binary file added data/gridref_regions.rda
Binary file not shown.
4 changes: 3 additions & 1 deletion man/dataMetrics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/formatOccData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/occDetFunc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 9f3932a

Please sign in to comment.