Skip to content

Commit

Permalink
refine get_estimates()
Browse files Browse the repository at this point in the history
- create helper functions for parameters and derived quants
- modify how gradients are added
  • Loading branch information
iantaylor-NOAA committed Aug 16, 2024
1 parent b72c066 commit ecce969
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 118 deletions.
244 changes: 127 additions & 117 deletions R/FIMSOutput.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,105 @@ create_fims_output <- function(json_list, call = NULL, data) {
return(out)
}

#' Process parameter info in json output and return rows for the estimates tibble
#'
#' @param json A list extracted from an element of the `json_list`` provided to [get_estimates()].
#' @param fleet The fleet number associated with this parameter (if applicable)
#' @param module_name The name of the module which contains this parameter (e.g. "selectivity" or "Fleet")
#' @param module_type The type of the module which contains this parameter (e.g. "Logistic" or "survey")
#' @return
#' Rows that can be added to the `estimates` tibble.
get_parameter <- function(json, fleet = NA, module_name, module_type) {
# placeholder for output
estimates_newrows <- NULL

# rename element to remove inconsistency between "value" and "values".
# TODO: delete this step when json becomes standardized
# if ("value" in )
names(json)[names(json) == "value"] <- "values"
names(json)[names(json) == "estimated_value"] <- "estimated_values"

# check for zero length output
# (currently only present for parameter with name = "proportion_female")
if ("values" %in% names(json) && length(json$values) > 0) {
# create parameter label
# TODO: revise this once parameter labeling gets refined in the future
label <- paste(
module_name, # (e.g. "selectivity" or "Fleet")
module_type, # (e.g. "Logistic" or "survey")
json$name, # parameter name (e.g. "inflection_point")
sep = "_"
)

# create tibble with new rows for these parameters
estimates_newrows <- dplyr::tibble(
label = label,
parameter_id = json$id,
fleet = fleet,
age = NA, # TODO: not yet available in the json output
time = NA, # TODO: not yet available in the json output
initial = json$values,
estimate = json$estimated_values,
uncertainty = NA, # TODO: not yet available in the json output
likelihood = NA, # TODO: not yet available in the json output
gradient = NA, # to be filled in based on parameter_id
estimated = json$is_estimated

# TODO: do something with "is_random_effect" in the future?
)
} # end check for non-zero length of values vector

# return result (could be NULL if "values" is empty)
return(estimates_newrows)
}

# TODO: consider merging get_parameter() and get_derived_quantity()

#' Process derived quantitiy info in json output and return rows for the estimates tibble
#'
#' @param json A list extracted from an element of the `json_list`` provided to [get_estimates()].
#' @param fleet The fleet number associated with this derived quantity (if applicable)
#' @param module_name The name of the module which contains this derived quantity
#' @param module_type The type of the module which contains this derived quantity
#' @return
#' Rows that can be added to the `estimates` tibble.
get_derived_quantity <- function(json, fleet = NA, module_name, module_type) {
# placeholder for output
estimates_newrows <- NULL

# create parameter label
# TODO: revise this once parameter labeling gets refined in the future
label <- paste(
module_name, # (e.g. "selectivity" or "Fleet")
module_type, # (e.g. "Logistic" or "survey")
json$name, # parameter name (e.g. "inflection_point")
sep = "_"
)
# create tibble with new rows for these parameters
estimates_newrows <- dplyr::tibble(
label = label,
parameter_id = NA,
fleet = fleet,
age = NA, # TODO: not yet available in the json output
time = NA, # TODO: not yet available in the json output
initial = NA,
estimate = json$values,
uncertainty = NA, # TODO: not yet available in the json output
likelihood = NA, # TODO: not yet available in the json output
gradient = NA, # to be filled in based on parameter_id
estimated = NA
)

# return result (could be NULL if "values" is empty)
return(estimates_newrows)
}

get_estimates <- function(json_list, nyears = 30, ages = 1:12) {
# Need to define tibble with zero rows so NA will be used to fill in
# missing sections when combining estimates and derived quantities later
estimates_outline <- dplyr::tibble(
label = character(),
parameter_id = integer(), # not included in design doc but will be useful for processing
fleet = integer(), # was initially character()
age = numeric(),
time = numeric(),
Expand All @@ -107,137 +201,53 @@ get_estimates <- function(json_list, nyears = 30, ages = 1:12) {
estimated = logical()
)

# Format the JSON to get information out easier
# Delete anything you want here to the return function

# # Doing this renaming is dumb
# # but helped me understand some stuff in the beginning
# names(json_list) <- ifelse(
# names(json_list) == "module",
# purrr::map(json_list, "name"),
# names(json_list)
# )

# # Does not get what we need but helped me understand the structure
# # it could be better to use tidyjson in the beginning then reverse
# # engineer their functions using purrr and tidyr later.
# # Might want to remove the data objects and just work with modules
# # for this function
# z <- json_list |>
# unlist() |>
# tibble::enframe() |>
# tidyr::separate_wider_delim(
# name,
# delim = ".",
# too_few = "align_start",
# names = c("module", "type", "x")
# ) |>
# dplyr::filter(type == "name")

# loop over highest level elements in json file which have name "module"
for (i in which(names(json_list) == "module")) {
# module names in fims-demo currently include
# "data", "selectivity", "Fleet", "recruitment", "growth", "maturity", "Population"
# code below doesn't do anything with "data" as it doesn't have elements called
# "parameter" or "derived_quantity"
message(json_list[[i]]$name)
module_name <- json_list[[i]]$name
module_type <- json_list[[i]]$type
fleet <- NA
if (module_name == "Fleet") {
fleet <- json_list[[i]]$id
}
message("processing element ", i, ": ", module_name)

# loop over parameters (if there are none then won't loop)
# still need to add loop over derived quantities
for (parameter_index in which(names(json_list[[i]]) == "parameter")) {
for (index in which(names(json_list[[i]]) == "parameter")) {
# get info from parameter element of module
# "is_random_effect" is the one element not yet utilized in the code below
parameter_info <- json_list[[i]][[parameter_index]]
# check for zero length output (currently present for parameter with name = "proportion_female")
if (
"value" %in% names(parameters) && length(parameter_info$value) != 0 |
"values" %in% names(parameters) && length(parameter_info$values) != 0
) {
# create parameter label
# parameter labeling will change in the future, currently just concatenating info
label <- paste(
json_list[[i]]$name, # module name (e.g. "selectivity", or "Fleet")
json_list[[i]]$type, # module type (e.g. "Logistic", "fleet")
parameter_info$name, # parameter name (e.g. "inflection_point")
sep = "_"
)
# Hack to make unique labels by appending sequence of numbers to
# parameter label for vectors (only other type seems to be "scalar")
if (parameter_info$type == "vector") {
label <- paste(label,
1:length(parameter_info$value), # hack to make unique names, need to figure out assigment of age and time
sep = "_"
)
}

# create tibble with new rows for these parameters
estimates_newrows <- dplyr::tibble(
label = label,
# fleet number is in "id"
fleet = ifelse(
test = json_list[[i]]$name == "Fleet",
yes = json_list[[i]]$id,
no = NA
),
age = NA,
time = NA,
# "value" vs "values" depending on whether parameter is a scalar or vector
initial = ifelse(
test = parameter_info$type == "scalar",
yes = parameter_info$value,
no = parameter_info$values
),
estimate = ifelse(
test = parameter_info$type == "scalar",
yes = parameter_info$estimated_value,
no = parameter_info$estimated_values
),
uncertainty = NA,
likelihood = NA,
gradient = ifelse(
test = parameter_info$id >= 0,
yes = json_list$final_gradient[parameter_info$id + 1], # id starts at 0
no = NA
),
estimated = parameter_info$is_estimated
)
# if new rows were created above, bind to end of tibble
if (nrow(estimates_newrows) > 0) {
estimates_outline <- dplyr::bind_rows(estimates_outline, estimates_newrows)
}
} # end check for non-zero length of values vector
estimates_newrows <- get_parameter(
json = json_list[[i]][[index]],
fleet = fleet,
module_name = module_name,
module_type = module_type
)
# if new rows were returned by get_parameter(), then bind to end of tibble
if (!is.null(estimates_newrows) > 0) {
estimates_outline <- dplyr::bind_rows(estimates_outline, estimates_newrows)
}
} # end loop over parameters

# loop over derived quantities
# in the future this could be merged with processing of parameters, but easier to keep separate for now
for (derived_quantity_index in which(names(json_list[[i]]) == "derived_quantity")) {
info <- json_list[[i]][[derived_quantity_index]]
label <- paste(
json_list[[i]]$name, # module name (e.g. "Population")
json_list[[i]]$type, # module type for things where there's no tag (e.g. "Logistic")
info$name, # parameter name (e.g. "inflection_point")
sep = "_"
)
# add gradient based on parameter_id
# for all parameters with id avialable (not -999),
# TODO: check why length of final_gradient doesn't match number of parameters
good_parameter_id <- !is.na(estimates_outline$parameter_id) & estimates_outline$parameter_id >= 0
estimates_outline$gradient[good_parameter_id] <-
json_list$final_gradient[estimates_outline$parameter_id[good_parameter_id] + 1]

# create tibble with new rows for these parameters
estimates_newrows <- dplyr::tibble(
label = label,
# fleet number is in "id"
fleet = ifelse(
test = json_list[[i]]$name == "Fleet",
yes = json_list[[i]]$id,
no = NA
),
age = NA,
time = NA,
initial = NA,
estimate = info$values,
uncertainty = NA,
likelihood = NA,
gradient = NA,
estimated = NA
# loop over derived quantities
# in the future this could be merged with processing of parameters,
# but easier to keep separate for now
for (index in which(names(json_list[[i]]) == "derived_quantity")) {
estimates_newrows <- get_derived_quantity(
json = json_list[[i]][[index]],
fleet = fleet,
module_name = module_name,
module_type = module_type
)

# if new rows were created above, bind to end of tibble
if (nrow(estimates_newrows) > 0) {
estimates_outline <- dplyr::bind_rows(estimates_outline, estimates_newrows)
Expand Down
3 changes: 2 additions & 1 deletion vignettes/fims-demo.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,8 @@ sdr <- TMB::sdreport(obj)
sdr_fixed <- summary(sdr, "fixed")
report <- obj$report(obj$env$last.par.best)
output <- jsonlite::fromJSON(ToJSON())
estimates <- get_estimates(output) # work in progress
# work in progress, currently functions to create output2@estimates
output2 <- create_fims_output(output)
# # how to write json output to a file
# z = ToJSON()
# writeLines(z, "test_2024-07-30.json")
Expand Down

0 comments on commit ecce969

Please sign in to comment.