diff --git a/NAMESPACE b/NAMESPACE index a1e7708d..4c55d243 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,9 +28,13 @@ export(get_log) export(get_random) export(is.FIMSFit) export(is.FIMSFits) +export(lognormal) export(m_agecomp) export(m_index) export(m_landings) +export(multinomial) +export(new_data_distribution) +export(new_process_distribution) export(run_gtest) export(setup_and_run_gtest) export(setup_gtest) diff --git a/R/distribution_formulas.R b/R/distribution_formulas.R new file mode 100644 index 00000000..be3209d1 --- /dev/null +++ b/R/distribution_formulas.R @@ -0,0 +1,371 @@ +#' Validity checks for distributions +#' +#' Check the validity of arguments passed to [new_data_distribution()] and +#' [new_process_distribution()]. +#' +#' @param args A list of input arguments. +#' @noRd +#' @return +#' Nothing is returned if the check is successful, and error messages are +#' returned if the checks are unsuccessful. +check_distribution_validity <- function(args) { + family <- args[["family"]] + families <- args[["families"]] + sd <- args[["sd"]] + if (class(family) != "family") { + cli::cli_abort(c( + "x" = "{.code {family}} is the incorrect type.", + "i" = "{.var family} needs to be specified as a family class, e.g., + `family = gaussian()`." + )) + } + if (!(family[["family"]] %in% families)) { + fam_name <- family[["family"]] + cli::cli_abort(c( + "x" = "FIMS currently does not offer the family, {.code {fam_name}}, for + this distribution type.", + "i" = "The families available for this distribution type are: {.code + {families}}." + )) + } + if (!is.null(args[["data_type"]])) { + data_type <- args[["data_type"]] + data_type_names <- c("index", "agecomp", "lengthcomp") + + if (!(data_type %in% data_type_names)) { + cli::cli_abort(c( + "x" = "The data type is incorrect.", + "i" = "Correct values are: {.code {data_type_names}}." + )) + } + + if (grepl("comp", data_type) && + (family[["family"]] == "lognormal" || + family[["family"]] == "gaussian")) { + fam_name <- family[["family"]] + cli::cli_abort(c( + "x" = "The family, {.code {fam_name}} is not available for the data + type, {.code {data_type}}.", + "i" = "The available families for this data type are: `multinomial`." + )) + } + if (data_type == "index" && + family[["family"]] == "multinomial") { + fam_name <- family[["family"]] + cli::cli_abort(c( + "x" = "The family, {.code {fam_name}} is not available for the data + type, {.code {data_type}}.", + "i" = "The available families for this data type are: `lognormal` or + `gaussian`." + )) + } + } + + if (!all(sd[["value"]] > 0)) { + sd_value <- sd[["value"]] + cli::cli_abort(c( + "x" = "At least one value for sd, {.code {sd_value}} is out of bounds.", + "i" = "All standard deviation values need to be positive." + )) + } + + if (length(sd[["estimated"]]) > 1) { + if (length(sd[["value"]]) != length(sd[["estimated"]])) { + sd_length <- length(sd[["value"]]) + est_length <- length(sd[["estimated"]]) + cli::cli_abort(c( + "x" = "The length of `sd$value` is {.code {sd_length}} and the length + of `sd$estimated` is {.code {est_length}}.", + "i" = "The size of `value` and `estimated` must match." + )) + } + } +} + +#' Return name of expected value +#' +#' @inheritParams new_data_distribution +#' @noRd +#' @return +#' A string specifying the name of the expected value. +#' +get_expected_name <- function(family, data_type) { + expected_name <- dplyr::case_when( + data_type == "index" && grepl("lognormal|gaussian", family[["family"]]) && + family[["link"]] == "log" ~ "log_expected_index", + data_type == "index" && grepl("lognormal|gaussian", family[["family"]]) && + family[["link"]] == "identity" ~ "expected_index", + grepl("comp", data_type) ~ "proportion_catch_numbers_at_age" + ) + # Check combination of entries was okay and led to valid name + if (is.na(expected_name)) { + cli::cli_abort(c( + "x" = "Error, expected_name of distribution is {.var {expected_name}}.", + "i" = "Some combination of the family and data_type are incompatible." + )) + } + return(expected_name) +} + +#' Set up a new distribution for a data type +#' +#' @param module An identifier to a C++ fleet module that is linked to the data +#' of interest. +#' @param family A description of the error distribution and link function to +#' be used in the model. The argument takes a family class, e.g., +#' `stats::gaussian(link = "identity")`. +#' @param sd A list of length two. The first entry, `"value"`, (default = 1) +#' stores the initial values (scalar or vector) for the relevant standard +#' deviations. The second entry, `"estimated"` (default = FALSE) is a scalar +#' or vector of booleans indicating whether or not standard deviation is +#' estimated. If `"value"` is a vector and `"estimated"` is a scalar, the +#' `"estimated"` value will be applied to the entire vector, otherwise, the +#' dimensions of the two must match. +#' @param data_type A single string specifying the type of data that the +#' distribution will be fit to. Options are listed in the function call, +#' where the first option listed, i.e., `"index"` is the default. +#' @return +#' Reference Class. Use [show()] to view Rcpp class fields, methods, and +#' documentation. +#' @export +#' @examples +#' \dontrun{ +#' nyears <- 30 +#' # Create a new fleet module +#' fleet <- methods::new(Fleet) +#' # Create a distribution for the fleet module +#' fleet_distribution <- new_data_distribution( +#' module = fishing_fleet, +#' family = lognormal(link = "log"), +#' sd = list(value = rep(sqrt(log(0.01^2 + 1)), nyears), +#' estimated = rep(FALSE, nyears), +#' data_type = "index" +#' ) +#' } +new_data_distribution <- function( + module, + family, + sd = list(value = 1, estimated = FALSE), + data_type = c("index", "agecomp", "lengthcomp") +) { + data_type <- match.arg(data_type) + families <- c("lognormal", "gaussian", "multinomial") + + # validity check on user input + args <- list( + family = family, + sd = sd, + data_type = data_type, + families = families + ) + check_distribution_validity(args) + + # assign name of observed data based on data_type + obs_id_name <- glue::glue("observed_{data_type}_data_id") + + # Set up distribution based on `family` argument` + if (family[["family"]] == "lognormal") { + # create new Rcpp module + new_module <- new(TMBDlnormDistribution) + + # populate logged standard deviation parameter with log of input + new_module$log_logsd <- new( + ParameterVector, + log(sd$value), + length(sd$value) + ) + # setup whether or not sd parameter is estimated + if (length(sd$value) > 1 && length(sd$estimated) == 1) { + new_module$log_logsd$set_all_estimable(sd$estimated) + } else { + for (i in 1:seq_along(sd$estimated)) { + new_module$log_logsd[i]$estimated <- sd$estimated[i] + } + } + } + + if (family[["family"]] == "gaussian") { + # create new Rcpp module + new_module <- new(TMBDnormDistribution) + + # populate logged standard deviation parameter with log of input + new_module$log_sd <- new(ParameterVector, log(sd$value), length(sd$value)) + + # setup whether or not sd parameter is estimated + if (length(sd$value) > 1 && length(sd$estimated) == 1) { + new_module$log_sd$set_all_estimable(sd$estimated) + } else { + for (i in 1:seq_along(sd$estimated)) { + new_module$log_sd[i]$estimated <- sd$estimated[i] + } + } + } + + if (family[["family"]] == "multinomial") { + #create new Rcpp module + new_module <- new(TMBDmultinomDistribution) + } + + # setup link to observed data + if (data_type == "index") { + new_module$set_observed_data(module$GetObservedIndexDataID()) + } + if (data_type == "agecomp") { + new_module$set_observed_data(module$GetObservedAgeCompDataID()) + } + + # set name of expected values + expected <- get_expected_name(family, data_type) + # setup link to expected values + new_module$set_distribution_links("data", module$field(expected)$get_id()) + + return(new_module) +} + +#' Sets up a new distribution for a process +#' @inheritParams new_data_distribution +#' @param par A string specifying the parameter name the distribution applies +#' to. Parameters must be members of the specified module. Use +#' `methods::show(module)` to obtain names of parameters within the module. +#' @param is_random_effect A boolean indicating whether or not the process is +#' estimated as a random effect. +#' @seealso +#' * [new_data_distribution()] +#' @export +#' @return +#' Reference Class. Use `show()` to view Rcpp class fields, methods, and +#' documentation. +#' @examples +#' \dontrun{ +#' # Create a new fleet module +#' recruitment <- methods::new(BevertonHoltRecruitment) +#' # view parameter names of the recruitment module +#' methods::show(BevertonHoltRecruitment) +#' # Create a distribution for the recruitment module +#' recruitment_distribution <- new_process_distribution( +#' module = recruitment, +#' par = "log_devs", +#' family = gaussian(), +#' sd = list(value = 0.4, estimated = FALSE), +#' is_random_effect = FALSE +#' ) +#' } +new_process_distribution <- function(module, + par, + family, + sd = list(value = 1, estimated = FALSE), + is_random_effect = FALSE) { + families <- c("lognormal", "gaussian") + + # validity check on user input + args <- list(family = family, sd = sd, families = families) + check_distribution_validity(args) + + # Set up distribution based on `family` argument` + if (family[["family"]] == "lognormal") { + # create new Rcpp module + new_module <- new(TMBDlnormDistribution) + + # populate logged standard deviation parameter with log of input + new_module$log_logsd <- new( + ParameterVector, + log(sd$value), + length(sd$value) + ) + #setup whether or not sd parameter is estimated + if (length(sd$value) > 1 && length(sd$estimated) == 1) { + new_module$log_logsd$set_all_estimable(sd$estimated) + } else { + for (i in 1:seq_along(sd$estimated)) { + new_module$log_logsd[i]$estimated <- sd$estimated[i] + } + } + } + + if (family[["family"]] == "gaussian") { + # create new Rcpp module + new_module <- new(TMBDnormDistribution) + + # populate logged standard deviation parameter with log of input + new_module$log_sd <- new(ParameterVector, log(sd$value), length(sd$value)) + + #setup whether or not sd parameter is estimated + if (length(sd$value) > 1 && length(sd$estimated) == 1) { + new_module$log_sd$set_all_estimable(sd$estimated) + } else { + for (i in 1:seq_along(sd$estimated)) { + new_module$log_sd[i]$estimated <- sd$estimated[i] + } + } + } + + # indicate whether or not parameter is treated as a random effect in the model + module$field(par)$set_all_random(is_random_effect) + + n_dim <- length(module$field(par)) + + # create new Rcpp modules + new_module$x <- new(ParameterVector, n_dim) + new_module$expected_values <- new(ParameterVector, n_dim) + + # initialize values with 0 + # these are overwritten in the code later by user input + for (i in 1:n_dim) { + new_module$x[i]$value <- 0 + new_module$expected_values[i]$value <- 0 + } + + # setup links to parameter + new_module$set_distribution_links( + "random_effects", + module$field(par)$get_id() + ) + + return(new_module) +} + +#' Distributions not available in {stats} +#' +#' Family objects provide a convenient way to specify the details of the models +#' used by functions such as [stats::glm()]. These functions within this +#' package are not available within {stats} but are designed in a similar +#' manner. +#' +#' @inheritParams lognormal +#' @return +#' An object of class `family` (which has a concise print method). This +#' particular family has a truncated length compared to other distributions in +#' [stats::family()]. +#' \item{family}{character: the family name.} +#' \item{link}{character: the link name.} +#' +#' @seealso +#' * [stats::family()] +#' * [stats::gaussian()] +#' * [stats::glm()] +#' * [stats::power()] +#' * [stats::make.link()] +#' @export +#' @examples +#' a_family <- multinomial() +#' a_family[["family"]] +#' a_family[["link"]] +lognormal <- function(link = "log") { + family_class <- c( + list(family = "lognormal", link = link), + stats::make.link(link) + ) + class(family_class) <- "family" + return(family_class) +} + +#' @rdname lognormal +#' @export +multinomial <- function(link = "logit") { + family_class <- c( + list(family = "multinomial", link = link), + stats::make.link(link) + ) + class(family_class) <- "family" + return(family_class) +} diff --git a/inst/include/common/information.hpp b/inst/include/common/information.hpp index ed53c908..fafe0c9f 100644 --- a/inst/include/common/information.hpp +++ b/inst/include/common/information.hpp @@ -268,6 +268,65 @@ class Information { FIMS_INFO_LOG("segment"); f->Initialize(f->nyears, f->nages); + // set index data + if (f->fleet_observed_index_data_id_m != -999) { + uint32_t observed_index_id = + static_cast(f->fleet_observed_index_data_id_m); + data_iterator it = this->data_objects.find(observed_index_id); + INFO_LOG << "Input fleet index id = " << observed_index_id << "." + << std::endl; + if (it != this->data_objects.end()) { + f->observed_index_data = (*it).second; + INFO_LOG << "Index data successfully set." << std::endl; + DATA_LOG << "" << std::endl; + DATA_LOG << "Observed input for fleet " << f->id << ", index " + << observed_index_id << ": \n " + << f->observed_index_data->at(1) << std::endl; + } else { + valid_model = false; + ERROR_LOG << "Error: Expected data observations not defined for fleet" + << f->id << ", index " << observed_index_id << std::endl; + exit(1); + } + } else { + valid_model = false; + ERROR_LOG << "Error: No index data observed for fleet " << f->id + << ". FIMS requires index data for all fleets." << std::endl; + exit(1); + } + // end set index data + INFO_LOG << "Checking for available fleet age comp data objects." + << std::endl; + // set age composition data + if (f->fleet_observed_agecomp_data_id_m != -999) { + uint32_t observed_agecomp_id = + static_cast(f->fleet_observed_agecomp_data_id_m); + data_iterator it = this->data_objects.find(observed_agecomp_id); + INFO_LOG << "Input fleet age comp id = " << observed_agecomp_id << "." + << std::endl; + if (it != this->data_objects.end()) { + f->observed_agecomp_data = (*it).second; + INFO_LOG << "Age comp data successfully set." << std::endl; + DATA_LOG << "" << std::endl; + DATA_LOG << "Observed input age comp for fleet " << f->id << ", comp " + << observed_agecomp_id << ": \n " + << f->observed_agecomp_data->at(1) << std::endl; + } else { + valid_model = false; + ERROR_LOG << "Error: Expected age comp data observations not defined " + "for fleet " + << f->id << ", index " << observed_agecomp_id << std::endl; + exit(1); + } + } else { + valid_model = false; + ERROR_LOG << "Error: No age comp data observed for fleet " << f->id + << ". FIMS requires age comp data for all fleets." + << std::endl; + exit(1); + } + // end set composition data + INFO_LOG << "Checking for available fleet selectivity pattern." << std::endl; // set selectivity model diff --git a/inst/include/interface/rcpp/rcpp_interface.hpp b/inst/include/interface/rcpp/rcpp_interface.hpp index e2a164e5..d7e79b4e 100644 --- a/inst/include/interface/rcpp/rcpp_interface.hpp +++ b/inst/include/interface/rcpp/rcpp_interface.hpp @@ -657,6 +657,10 @@ RCPP_MODULE(fims) { .field("random_q", &FleetInterface::random_q) .field("log_expected_index", &FleetInterface::log_expected_index) .field("proportion_catch_numbers_at_age", &FleetInterface::proportion_catch_numbers_at_age) + .method("SetObservedAgeCompData", &FleetInterface::SetObservedAgeCompData) + .method("GetObservedAgeCompDataID", &FleetInterface::GetObservedAgeCompDataID) + .method("SetObservedIndexData", &FleetInterface::SetObservedIndexData) + .method("GetObservedIndexDataID", &FleetInterface::GetObservedIndexDataID) .method("SetSelectivity", &FleetInterface::SetSelectivity); Rcpp::class_("AgeComp") diff --git a/inst/include/interface/rcpp/rcpp_objects/rcpp_fleet.hpp b/inst/include/interface/rcpp/rcpp_objects/rcpp_fleet.hpp index 243304f6..4040f496 100644 --- a/inst/include/interface/rcpp/rcpp_objects/rcpp_fleet.hpp +++ b/inst/include/interface/rcpp/rcpp_objects/rcpp_fleet.hpp @@ -51,6 +51,10 @@ std::map FleetInterfaceBase::live_objects; * */ class FleetInterface : public FleetInterfaceBase { + int interface_observed_agecomp_data_id_m = + -999; /**< id of observed agecomp data object*/ + int interface_observed_index_data_id_m = + -999; /**< id of observed index data object*/ int interface_selectivity_id_m = -999; /**< id of selectivity component*/ public: @@ -76,17 +80,52 @@ class FleetInterface : public FleetInterfaceBase { virtual ~FleetInterface() {} - /** @brief returns the id for the fleet interface */ - virtual uint32_t get_id() { return this->id; } + /** @brief returns the id for the fleet interface */ + virtual uint32_t get_id() { return this->id; } - /** - * @brief Set the unique id for the Selectivity object - * - * @param selectivity_id Unique id for the Selectivity object - */ - void SetSelectivity(int selectivity_id) { - interface_selectivity_id_m = selectivity_id; - } + /** + * @brief Set the unique id for the Observed Age Comp Data object + * + * @param observed_agecomp_data_id Unique id for the Observed Age Comp Data + * object + */ + void SetObservedAgeCompData(int observed_agecomp_data_id) { + interface_observed_agecomp_data_id_m = observed_agecomp_data_id; + } + + /** + * @brief Return the unique id for the Observed Age Comp Data object + * + */ + int GetObservedAgeCompDataID() { + return interface_observed_agecomp_data_id_m; + } + + /** + * @brief Set the unique id for the Observed Index Data object + * + * @param observed_index_data_id Unique id for the Observed Index Data object + */ + void SetObservedIndexData(int observed_index_data_id) { + interface_observed_index_data_id_m = observed_index_data_id; + } + + /** + * @brief Return the unique id for the Observed Index Data object + * + */ + int GetObservedIndexDataID() { + return interface_observed_index_data_id_m; + } + + /** + * @brief Set the unique id for the Selectivity object + * + * @param selectivity_id Unique id for the Selectivity object + */ + void SetSelectivity(int selectivity_id) { + interface_selectivity_id_m = selectivity_id; + } /** * @brief finalize function. Extracts derived quantities back to * the Rcpp interface object from the Information object. @@ -257,6 +296,9 @@ class FleetInterface : public FleetInterfaceBase { fleet->is_survey = this->is_survey; fleet->nages = this->nages; fleet->nyears = this->nyears; + fleet->fleet_observed_agecomp_data_id_m = + interface_observed_agecomp_data_id_m; + fleet->fleet_observed_index_data_id_m = interface_observed_index_data_id_m; fleet->fleet_selectivity_id_m = interface_selectivity_id_m; fleet->log_q.resize(this->log_q.size()); diff --git a/inst/include/population_dynamics/fleet/fleet.hpp b/inst/include/population_dynamics/fleet/fleet.hpp index cba3a26a..b114dbb1 100644 --- a/inst/include/population_dynamics/fleet/fleet.hpp +++ b/inst/include/population_dynamics/fleet/fleet.hpp @@ -33,6 +33,16 @@ struct Fleet : public fims_model_object::FIMSObject { std::shared_ptr> selectivity; /*!< selectivity component*/ + // index data + int fleet_observed_index_data_id_m = -999; /*!< id of index data */ + std::shared_ptr> + observed_index_data; /*!< observed index data*/ + + // age comp data + int fleet_observed_agecomp_data_id_m = -999; /*!< id of age comp data */ + std::shared_ptr> + observed_agecomp_data; /*!< observed agecomp data*/ + // Mortality and catchability fims::Vector log_Fmort; /*!< estimated parameter: log Fishing mortality*/ diff --git a/man/lognormal.Rd b/man/lognormal.Rd new file mode 100644 index 00000000..5ff21008 --- /dev/null +++ b/man/lognormal.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/distribution_formulas.R +\name{lognormal} +\alias{lognormal} +\alias{multinomial} +\title{Distributions not available in {stats}} +\usage{ +lognormal(link = "log") + +multinomial(link = "logit") +} +\value{ +An object of class \code{family} (which has a concise print method). This +particular family has a truncated length compared to other distributions in +\code{\link[stats:family]{stats::family()}}. +\item{family}{character: the family name.} +\item{link}{character: the link name.} + +@seealso +\itemize{ +\item \code{\link[stats:family]{stats::family()}} +\item \code{\link[stats:family]{stats::gaussian()}} +\item \code{\link[stats:glm]{stats::glm()}} +\item \code{\link[stats:power]{stats::power()}} +\item \code{\link[stats:make.link]{stats::make.link()}} +} +} +\description{ +Family objects provide a convenient way to specify the details of the models +used by functions such as \code{\link[stats:glm]{stats::glm()}}. These functions within this +package are not available within {stats} but are designed in a similar +manner. +} +\examples{ +a_family <- multinomial() +a_family[["family"]] +a_family[["link"]] +} diff --git a/man/new_data_distribution.Rd b/man/new_data_distribution.Rd new file mode 100644 index 00000000..93459566 --- /dev/null +++ b/man/new_data_distribution.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/distribution_formulas.R +\name{new_data_distribution} +\alias{new_data_distribution} +\title{Set up a new distribution for a data type} +\usage{ +new_data_distribution( + module, + family, + sd = list(value = 1, estimated = FALSE), + data_type = c("index", "agecomp", "lengthcomp") +) +} +\arguments{ +\item{module}{An identifier to a C++ fleet module that is linked to the data +of interest.} + +\item{family}{A description of the error distribution and link function to +be used in the model. The argument takes a family class, e.g., +\code{stats::gaussian(link = "identity")}.} + +\item{sd}{A list of length two. The first entry, \code{"value"}, (default = 1) +stores the initial values (scalar or vector) for the relevant standard +deviations. The second entry, \code{"estimated"} (default = FALSE) is a scalar +or vector of booleans indicating whether or not standard deviation is +estimated. If \code{"value"} is a vector and \code{"estimated"} is a scalar, the +\code{"estimated"} value will be applied to the entire vector, otherwise, the +dimensions of the two must match.} + +\item{data_type}{A single string specifying the type of data that the +distribution will be fit to. Options are listed in the function call, +where the first option listed, i.e., \code{"index"} is the default.} +} +\value{ +Reference Class. Use \code{\link[=show]{show()}} to view Rcpp class fields, methods, and +documentation. +} +\description{ +Set up a new distribution for a data type +} +\examples{ +\dontrun{ +nyears <- 30 +# Create a new fleet module +fleet <- methods::new(Fleet) +# Create a distribution for the fleet module +fleet_distribution <- new_data_distribution( + module = fishing_fleet, + family = lognormal(link = "log"), + sd = list(value = rep(sqrt(log(0.01^2 + 1)), nyears), + estimated = rep(FALSE, nyears), + data_type = "index" +) +} +} diff --git a/man/new_process_distribution.Rd b/man/new_process_distribution.Rd new file mode 100644 index 00000000..78b93ab8 --- /dev/null +++ b/man/new_process_distribution.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/distribution_formulas.R +\name{new_process_distribution} +\alias{new_process_distribution} +\title{Sets up a new distribution for a process} +\usage{ +new_process_distribution( + module, + par, + family, + sd = list(value = 1, estimated = FALSE), + is_random_effect = FALSE +) +} +\arguments{ +\item{module}{An identifier to a C++ fleet module that is linked to the data +of interest.} + +\item{par}{A string specifying the parameter name the distribution applies +to. Parameters must be members of the specified module. Use +\code{methods::show(module)} to obtain names of parameters within the module.} + +\item{family}{A description of the error distribution and link function to +be used in the model. The argument takes a family class, e.g., +\code{stats::gaussian(link = "identity")}.} + +\item{sd}{A list of length two. The first entry, \code{"value"}, (default = 1) +stores the initial values (scalar or vector) for the relevant standard +deviations. The second entry, \code{"estimated"} (default = FALSE) is a scalar +or vector of booleans indicating whether or not standard deviation is +estimated. If \code{"value"} is a vector and \code{"estimated"} is a scalar, the +\code{"estimated"} value will be applied to the entire vector, otherwise, the +dimensions of the two must match.} + +\item{is_random_effect}{A boolean indicating whether or not the process is +estimated as a random effect.} +} +\value{ +Reference Class. Use \code{show()} to view Rcpp class fields, methods, and +documentation. +} +\description{ +Sets up a new distribution for a process +} +\examples{ +\dontrun{ +# Create a new fleet module +recruitment <- methods::new(BevertonHoltRecruitment) +# view parameter names of the recruitment module +methods::show(BevertonHoltRecruitment) +# Create a distribution for the recruitment module +recruitment_distribution <- new_process_distribution( + module = recruitment, + par = "log_devs", + family = gaussian(), + sd = list(value = 0.4, estimated = FALSE), + is_random_effect = FALSE +) +} +} +\seealso{ +\itemize{ +\item \code{\link[=new_data_distribution]{new_data_distribution()}} +} +} diff --git a/tests/testthat/helper-integration-tests-setup.R b/tests/testthat/helper-integration-tests-setup.R index e9420acd..dc4a11db 100644 --- a/tests/testthat/helper-integration-tests-setup.R +++ b/tests/testthat/helper-integration-tests-setup.R @@ -87,6 +87,8 @@ setup_and_run_FIMS_without_wrappers <- function(iter_id, fishing_fleet$estimate_q <- FALSE fishing_fleet$random_q <- FALSE fishing_fleet$SetSelectivity(fishing_fleet_selectivity$get_id()) + fishing_fleet$SetObservedIndexData(fishing_fleet_index$get_id()) + fishing_fleet$SetObservedAgeCompData(fishing_fleet_age_comp$get_id()) # Set up fishery index data using the lognormal fishing_fleet_index_distribution <- methods::new(TMBDlnormDistribution) @@ -97,12 +99,12 @@ setup_and_run_FIMS_without_wrappers <- function(iter_id, } fishing_fleet_index_distribution$log_logsd$set_all_estimable(FALSE) # Set Data using the IDs from the modules defined above - fishing_fleet_index_distribution$set_observed_data(fishing_fleet_index$get_id()) + fishing_fleet_index_distribution$set_observed_data(fishing_fleet$GetObservedIndexDataID()) fishing_fleet_index_distribution$set_distribution_links("data", fishing_fleet$log_expected_index$get_id()) # Set up fishery age composition data using the multinomial fishing_fleet_agecomp_distribution <- methods::new(TMBDmultinomDistribution) - fishing_fleet_agecomp_distribution$set_observed_data(fishing_fleet_age_comp$get_id()) + fishing_fleet_agecomp_distribution$set_observed_data(fishing_fleet$GetObservedAgeCompDataID()) fishing_fleet_agecomp_distribution$set_distribution_links("data", fishing_fleet$proportion_catch_numbers_at_age$get_id()) # repeat data for surveys @@ -132,6 +134,8 @@ setup_and_run_FIMS_without_wrappers <- function(iter_id, survey_fleet$estimate_q <- TRUE survey_fleet$random_q <- FALSE survey_fleet$SetSelectivity(survey_fleet_selectivity$get_id()) + survey_fleet$SetObservedIndexData(survey_fleet_index$get_id()) + survey_fleet$SetObservedAgeCompData(survey_fleet_age_comp$get_id()) # Set up survey index data using the lognormal survey_fleet_index_distribution <- methods::new(TMBDlnormDistribution) @@ -143,13 +147,13 @@ setup_and_run_FIMS_without_wrappers <- function(iter_id, } survey_fleet_index_distribution$log_logsd$set_all_estimable(FALSE) # Set Data using the IDs from the modules defined above - survey_fleet_index_distribution$set_observed_data(survey_fleet_index$get_id()) + survey_fleet_index_distribution$set_observed_data(survey_fleet$GetObservedIndexDataID()) survey_fleet_index_distribution$set_distribution_links("data", survey_fleet$log_expected_index$get_id()) # Age composition data survey_fleet_agecomp_distribution <- methods::new(TMBDmultinomDistribution) - survey_fleet_agecomp_distribution$set_observed_data(survey_fleet_age_comp$get_id()) + survey_fleet_agecomp_distribution$set_observed_data(survey_fleet$GetObservedAgeCompDataID()) survey_fleet_agecomp_distribution$set_distribution_links("data", survey_fleet$proportion_catch_numbers_at_age$get_id()) # Recruitment @@ -184,9 +188,9 @@ setup_and_run_FIMS_without_wrappers <- function(iter_id, recruitment_distribution$log_sd <- new(ParameterVector, 1) recruitment_distribution$log_sd[1]$value <- log(om_input$logR_sd) recruitment_distribution$log_sd[1]$estimated <- FALSE - recruitment_distribution$x <- new(ParameterVector, om_input$nyr) - recruitment_distribution$expected_values <- new(ParameterVector, om_input$nyr) - for (i in 1:om_input$nyr) { + recruitment_distribution$x <- new(ParameterVector, om_input$nyr - 1) + recruitment_distribution$expected_values <- new(ParameterVector, om_input$nyr - 1) + for (i in 1:(om_input$nyr - 1)) { recruitment_distribution$x[i]$value <- 0 recruitment_distribution$expected_values[i]$value <- 0 } @@ -349,23 +353,26 @@ setup_and_run_FIMS_with_wrappers <- function(iter_id, fishing_fleet$estimate_q <- FALSE fishing_fleet$random_q <- FALSE fishing_fleet$SetSelectivity(fishing_fleet_selectivity$get_id()) + fishing_fleet$SetObservedIndexData(fishing_fleet_index$get_id()) + fishing_fleet$SetObservedAgeCompData(fishing_fleet_age_comp$get_id()) # Set up fishery index data using the lognormal - fishing_fleet_index_distribution <- methods::new(TMBDlnormDistribution) - # lognormal observation error transformed on the log scale - fishing_fleet_index_distribution$log_logsd <- new(ParameterVector, om_input$nyr) - for (y in 1:om_input$nyr) { - fishing_fleet_index_distribution$log_logsd[y]$value <- log(sqrt(log(em_input$cv.L$fleet1^2 + 1))) - } - fishing_fleet_index_distribution$log_logsd$set_all_estimable(FALSE) - # Set Data using the IDs from the modules defined above - fishing_fleet_index_distribution$set_observed_data(fishing_fleet_index$get_id()) - fishing_fleet_index_distribution$set_distribution_links("data", fishing_fleet$log_expected_index$get_id()) + fishing_fleet_index_distribution <- new_data_distribution( + module = fishing_fleet, + family = lognormal(link = "log"), + sd = list( + value = rep(sqrt(log(em_input$cv.L$fleet1^2 + 1)), om_input$nyr), + estimated = FALSE + ), + data_type = "index" + ) # Set up fishery age composition data using the multinomial - fishing_fleet_agecomp_distribution <- methods::new(TMBDmultinomDistribution) - fishing_fleet_agecomp_distribution$set_observed_data(fishing_fleet_age_comp$get_id()) - fishing_fleet_agecomp_distribution$set_distribution_links("data", fishing_fleet$proportion_catch_numbers_at_age$get_id()) + fishing_fleet_agecomp_distribution <- new_data_distribution( + module = fishing_fleet, + family = multinomial(link = "logit"), + data_type = "agecomp" + ) # repeat data for surveys survey_index <- em_input$surveyB.obs$survey1 @@ -393,25 +400,27 @@ setup_and_run_FIMS_with_wrappers <- function(iter_id, survey_fleet$estimate_q <- TRUE survey_fleet$random_q <- FALSE survey_fleet$SetSelectivity(survey_fleet_selectivity$get_id()) + survey_fleet$SetObservedIndexData(survey_fleet_index$get_id()) + survey_fleet$SetObservedAgeCompData(survey_fleet_age_comp$get_id()) # Set up survey index data using the lognormal - survey_fleet_index_distribution <- methods::new(TMBDlnormDistribution) - # lognormal observation error transformed on the log scale - # sd = sqrt(log(cv^2 + 1)), sd is log transformed - survey_fleet_index_distribution$log_logsd <- new(ParameterVector, om_input$nyr) - for (y in 1:om_input$nyr) { - survey_fleet_index_distribution$log_logsd[y]$value <- log(sqrt(log(em_input$cv.survey$survey1^2 + 1))) - } - survey_fleet_index_distribution$log_logsd$set_all_estimable(FALSE) - # Set Data using the IDs from the modules defined above - survey_fleet_index_distribution$set_observed_data(survey_fleet_index$get_id()) - survey_fleet_index_distribution$set_distribution_links("data", survey_fleet$log_expected_index$get_id()) + survey_fleet_index_distribution <- new_data_distribution( + module = survey_fleet, + family = lognormal(link = "log"), + sd = list( + value = rep(sqrt(log(em_input$cv.survey$survey1^2 + 1)), om_input$nyr), + estimated = FALSE + ), + data_type = "index" + ) # Age composition data - survey_fleet_agecomp_distribution <- methods::new(TMBDmultinomDistribution) - survey_fleet_agecomp_distribution$set_observed_data(survey_fleet_age_comp$get_id()) - survey_fleet_agecomp_distribution$set_distribution_links("data", survey_fleet$proportion_catch_numbers_at_age$get_id()) + survey_fleet_agecomp_distribution <- new_data_distribution( + module = survey_fleet, + family = multinomial(link = "logit"), + data_type = "agecomp" + ) # Recruitment # create new module in the recruitment class (specifically Beverton-Holt, @@ -438,21 +447,12 @@ setup_and_run_FIMS_with_wrappers <- function(iter_id, # alternative setting: recruitment$log_devs <- rep(0, length(om_input$logR.resid)) recruitment$log_devs <- methods::new(ParameterVector, om_input$logR.resid[-1], om_input$nyr-1) - recruitment_distribution <- new(TMBDnormDistribution) - # set up logR_sd using the normal log_sd parameter - # logR_sd is NOT logged. It needs to enter the model logged b/c the exp() is - # taken before the likelihood calculation - recruitment_distribution$log_sd <- new(ParameterVector, 1) - recruitment_distribution$log_sd[1]$value <- log(om_input$logR_sd) - recruitment_distribution$log_sd[1]$estimated <- FALSE - recruitment_distribution$x <- new(ParameterVector, om_input$nyr) - recruitment_distribution$expected_values <- new(ParameterVector, om_input$nyr) - for (i in 1:om_input$nyr) { - recruitment_distribution$x[i]$value <- 0 - recruitment_distribution$expected_values[i]$value <- 0 - } - recruitment_distribution$set_distribution_links("random_effects", recruitment$log_devs$get_id()) - recruitment$estimate_log_devs <- TRUE + recruitment_distribution <- new_process_distribution( + module = recruitment, + par = "log_devs", family = gaussian(), + sd = list(value = om_input$logR_sd, estimated = FALSE), + is_random_effect = FALSE + ) # Growth ewaa_growth <- new(EWAAgrowth) diff --git a/tests/testthat/test-distribution-formulas.R b/tests/testthat/test-distribution-formulas.R new file mode 100644 index 00000000..59e34d50 --- /dev/null +++ b/tests/testthat/test-distribution-formulas.R @@ -0,0 +1,152 @@ +load(test_path("fixtures", "integration_test_data.RData")) +iter_id <- 1 +# Load operating model data +om_input <- om_input_list[[iter_id]] +om_output <- om_output_list[[iter_id]] +em_input <- em_input_list[[iter_id]] + +# Clear any previous FIMS settings +clear() + +test_that("test new_process_distribution", { + # Recruitment + # create new module in the recruitment class (specifically Beverton-Holt, + # when there are other options, this would be where the option would be chosen) + recruitment <- new(BevertonHoltRecruitment) + + # set up log_rzero (equilibrium recruitment) + recruitment$log_rzero[1]$value <- log(om_input$R0) + recruitment$log_rzero[1]$is_random_effect <- FALSE + recruitment$log_rzero[1]$estimated <- TRUE + # set up logit_steep + recruitment$logit_steep[1]$value <- -log(1.0 - om_input$h) + + log(om_input$h - 0.2) + recruitment$logit_steep[1]$is_random_effect <- FALSE + recruitment$logit_steep[1]$estimated <- FALSE + # turn on estimation of deviations + # recruit deviations should enter the model in normal space. + # The log is taken in the likelihood calculations + # alternative setting: recruitment$log_devs <- rep(0, length(om_input$logR.resid)) + recruitment$log_devs <- methods::new( + ParameterVector, + om_input$logR.resid[-1], + om_input$nyr - 1 + ) + + # set up logR_sd using the normal log_sd parameter + recruitment_distribution <- new_process_distribution( + module = recruitment, + par = "log_devs", + family = gaussian(), + sd = list(value = om_input$logR_sd, estimated = FALSE), + is_random_effect = FALSE + ) + recruitment$estimate_log_devs <- TRUE + + expect_equal(log(om_input$logR_sd), recruitment_distribution$log_sd[1]$value) + expect_equal(length(recruitment$log_devs), length(recruitment_distribution$x)) + expect_equal(length(recruitment_distribution$x), + length(recruitment_distribution$expected_values)) + expect_error( + new_process_distribution( + module = recruitment, + par = "log_devs", + family = multinomial(), + sd = list(value = om_input$logR_sd, estimated = FALSE), + is_random_effect = FALSE + ) + ) + expect_error( + new_process_distribution( + module = recruitment, + par = "log_devs", + family = binomial(), + sd = list(value = om_input$logR_sd, estimated = FALSE), + is_random_effect = FALSE + ) + ) + expect_error( + new_process_distribution( + module = recruitment, + par = "log_devs", + family = gaussian(), + sd = list(value = -1, estimated = FALSE), + is_random_effect = FALSE + ) + ) + expect_error( + new_process_distribution( + module = recruitment, + par = "log_devs", + family = gaussian(), + sd = list( + value = rep(om_input$logR_sd, 3), + estimated = rep(FALSE, 2) + ), + is_random_effect = FALSE) + ) + clear() +}) + + +test_that("test new_data_distribution", { + # Data + catch <- em_input$L.obs$fleet1 + # set fishing fleet catch data, need to set dimensions of data index + # currently FIMS only has a fleet module that takes index for both survey index and fishery catch + fishing_fleet_index <- new(Index, om_input$nyr) + fishing_fleet_index$index_data <- catch + fishing_fleet <- new(Fleet) + fishing_fleet$nages <- om_input$nages + fishing_fleet$nyears <- om_input$nyr + fishing_fleet$log_Fmort <- methods::new(ParameterVector, log(om_output$f), om_input$nyr) + fishing_fleet$log_Fmort$set_all_estimable(TRUE) + fishing_fleet$log_q[1]$value <- log(1.0) + fishing_fleet$estimate_q <- FALSE + fishing_fleet$random_q <- FALSE + fishing_fleet$SetObservedIndexData(fishing_fleet_index$get_id()) + + # Set up fishery index data using the lognormal + fleet_sd <- rep(sqrt(log(em_input$cv.L$fleet1^2 + 1)), om_input$nyr) + fishing_fleet_index_distribution <- new_data_distribution( + module = fishing_fleet, + family = lognormal(link = "log"), + sd = list(value = fleet_sd, estimated = FALSE), + data_type = "index" + ) + expect_equal(log(fleet_sd[1]), + fishing_fleet_index_distribution$log_logsd[1]$value) + expect_error( + new_data_distribution( + module = fishing_fleet, + family = multinomial(), + sd = list(value = fleet_sd, estimated = FALSE), + data_type = "index" + ) + ) + expect_error( + new_data_distribution( + module = fishing_fleet, + family = multinomial(), + sd = list(value = fleet_sd, estimated = FALSE), + data_type = "index" + ) + ) + expect_error( + new_data_distribution( + module = fishing_fleet, + family = gaussian(), + sd = list(value = fleet_sd, estimated = FALSE), + data_type = "agecomp" + ) + ) + expect_error( + new_data_distribution( + module = fishing_fleet, + family = lognormal(), + sd = list(value = fleet_sd, estimated = FALSE), + data_type = "lengthcomp" + ) + ) + clear() +}) diff --git a/tests/testthat/test-rcpp-fleet-interface.R b/tests/testthat/test-rcpp-fleet-interface.R index 3e8bff47..904df47d 100644 --- a/tests/testthat/test-rcpp-fleet-interface.R +++ b/tests/testthat/test-rcpp-fleet-interface.R @@ -22,4 +22,16 @@ fleet module", { clear() }) +test_that("Fleet: SetObservedAgeCompData works", { + fleet <- new(Fleet) + expect_silent(fleet$SetObservedAgeCompData(1)) + expect_equal(fleet$GetObservedAgeCompDataID(), 1) + clear() +}) +test_that("Fleet: SetObservedIndexData works", { + fleet <- new(Fleet) + expect_silent(fleet$SetObservedIndexData(1)) + expect_equal(fleet$GetObservedIndexDataID(), 1) + clear() +}) diff --git a/vignettes/fims-demo.Rmd b/vignettes/fims-demo.Rmd index 14eea6c7..7f1e2afc 100644 --- a/vignettes/fims-demo.Rmd +++ b/vignettes/fims-demo.Rmd @@ -163,35 +163,32 @@ fishing_fleet$log_Fmort$set_all_estimable(TRUE) fishing_fleet$log_q[1]$value <- log(1.0) fishing_fleet$estimate_q <- FALSE fishing_fleet$random_q <- FALSE +# Set Index, AgeComp, and Selectivity using the IDs from the modules defined above +fishing_fleet$SetObservedIndexData(fishing_fleet_index$get_id()) +fishing_fleet$SetObservedAgeCompData(fishing_fleet_age_comp$get_id()) fishing_fleet$SetSelectivity(fishing_fleet_selectivity$get_id()) ``` -We will use the Lognormal distribution for fitting the fishing fleet cpue data +We will use the Lognormal distribution for fitting the fishing fleet CPUE data. We indicate a log link function so that the expected value is the log of the expected CPUE. ```{r fleet-index-distribution} -fishing_fleet_index_distribution <- methods::new(TMBDlnormDistribution) -#lognormal observation error transformed on the log scale -fishing_fleet_index_distribution$log_logsd <- new(ParameterVector, nyears) -for(y in 1:nyears){ - fishing_fleet_index_distribution$log_logsd[y]$value <- log(sqrt(log(0.01^2 + 1))) -} -fishing_fleet_index_distribution$log_logsd$set_all_estimable(FALSE) -# Set Data using the IDs from the modules defined above -fishing_fleet_index_distribution$set_observed_data(fishing_fleet_index$get_id()) -``` - -The next step is to link this distribution to the expected value of the fleet, log expected_index, using the -set_distribution_links function. The first argument assigns the distribution type ("data", "random_effects", or "prior"). -The second argument takes the id of the parameter or derived value being linked to the distribution. -```{r fleet-expected} -fishing_fleet_index_distribution$set_distribution_links("data", fishing_fleet$log_expected_index$get_id()) - +fishing_fleet_index_distribution <- new_data_distribution( + module = fishing_fleet, + family = lognormal(link = "log"), + sd = list( + value = rep(sqrt(log(0.01^2 + 1)), nyears), + estimated = FALSE + ), + data_type = "index" +) ``` Now we will set up the distribution for the fishery age composition data using the Multinomial distribution ```{r fleet-age-comp-distribution} -fishing_fleet_agecomp_distribution <- methods::new(TMBDmultinomDistribution) -fishing_fleet_agecomp_distribution$set_observed_data(fishing_fleet_age_comp$get_id()) -fishing_fleet_agecomp_distribution$set_distribution_links("data", fishing_fleet$proportion_catch_numbers_at_age$get_id()) +fishing_fleet_agecomp_distribution <- new_data_distribution( + module = fishing_fleet, + family = multinomial(link = "logit"), + data_type = "agecomp" +) ``` ### The Survey Module We will now repeat the steps from Fleet to set up the Survey. A survey object is essentially the same as a fleet object with a catchability (q) variable. @@ -233,27 +230,28 @@ survey_fleet$nyears <- nyears survey_fleet$log_q[1]$value <- log(3.315143e-07) survey_fleet$estimate_q <- TRUE survey_fleet$random_q <- FALSE +survey_fleet$SetObservedIndexData(survey_fleet_index$get_id()) +survey_fleet$SetObservedAgeCompData(survey_fleet_age_comp$get_id()) survey_fleet$SetSelectivity(survey_fleet_selectivity$get_id()) ``` ```{r survey-distribution} -survey_fleet_index_distribution <- methods::new(TMBDlnormDistribution) -#lognormal observation error transformed on the log scale -# sd = sqrt(log(cv^2 + 1)), sd is log transformed -survey_fleet_index_distribution$log_logsd <- new(ParameterVector, nyears) -for(y in 1:nyears){ - survey_fleet_index_distribution$log_logsd[y]$value <- log(sqrt(log(0.2^2 + 1))) -} -survey_fleet_index_distribution$log_logsd$set_all_estimable(FALSE) -# Set Data using the IDs from the modules defined above -survey_fleet_index_distribution$set_observed_data(survey_fleet_index$get_id()) -survey_fleet_index_distribution$set_distribution_links("data", survey_fleet$log_expected_index$get_id()) +survey_fleet_index_distribution <- new_data_distribution( + module = survey_fleet, + family = lognormal(link = "log"), + sd = list( + value = rep(sqrt(log(0.2^2 + 1)), nyears), + estimated = FALSE + ), + data_type = "index" +) # Age composition data - -survey_fleet_agecomp_distribution <- methods::new(TMBDmultinomDistribution) -survey_fleet_agecomp_distribution$set_observed_data(survey_fleet_age_comp$get_id()) -survey_fleet_agecomp_distribution$set_distribution_links("data", survey_fleet$proportion_catch_numbers_at_age$get_id()) +survey_fleet_agecomp_distribution <- new_data_distribution( + module = survey_fleet, + family = multinomial(link = "logit"), + data_type = "agecomp" +) ``` ### Creating a Population @@ -291,19 +289,13 @@ recruitment$log_devs <- new(ParameterVector, c( In order to estimate *log_devs*, a new distribution needs to be created. This will be left commented out as we will proceed with not estimating log_devs. ```{r recruitment-distribution} -recruitment_distribution <- new(TMBDnormDistribution) -recruitment_distribution$log_sd <- new(ParameterVector, 1) -recruitment_distribution$log_sd[1]$value <- log(0.4) -recruitment_distribution$log_sd[1]$estimated = FALSE -# set dimension of observations -recruitment_distribution$x <- new(ParameterVector, nyears) -recruitment_distribution$expected_values <- new(ParameterVector, nyears) -for(i in 1:nyears){ - recruitment_distribution$x[i]$value <- 0 - recruitment_distribution$expected_values[i]$value <- 0 -} -recruitment_distribution$set_distribution_links("random_effects", recruitment$log_devs$get_id()) -recruitment$estimate_log_devs = TRUE +recruitment_distribution <- new_process_distribution( + module = recruitment, + par = "log_devs", + family = gaussian(), + sd = list(value = 0.4, estimated = FALSE), + is_random_effect = FALSE +) ``` #### Growth