diff --git a/R/model-tree.R b/R/model-tree.R
index b221dc7a1..2a5ae4d51 100644
--- a/R/model-tree.R
+++ b/R/model-tree.R
@@ -1,12 +1,15 @@
#' Create a tree diagram of a modeling directory
#'
-#' @param .log_df a `bbi_run_log_df` tibble (the output of `run_log()`) ***or***
+#' @param .log_df A `bbi_run_log_df` tibble (the output of `run_log()`) ***or***
#' a base directory to look in for models. See details for more options.
-#' @param include_info vector of columns present in `.log_df` to include in the
+#' @param include_info A vector of columns present in `.log_df` to include in the
#' tooltip.
-#' @param color_by a run log column to color the nodes by. Can be helpful for
+#' @param color_by A run log column to color the nodes by. Can be helpful for
#' identifying which models are starred, have heuristics, etc. See details
#' for more information.
+#' @param size_by A **numeric** (or integer) run log column to size the nodes by.
+#' If not specified, the default is to size the nodes based on how many
+#' models are based on it.
#' @param add_summary Logical (`TRUE`/`FALSE`). If `TRUE`, include key columns
#' from [model_summary()] output.
#' @param digits Number of digits to round decimal places to for display in
@@ -16,9 +19,10 @@
#' @param static Logical (`TRUE`/`FALSE`). If `TRUE`, render the plot as a
#' static image. This takes a little longer, as the interactive plot must
#' be saved as a PNG and loaded into the viewer.
-#' @param width width in pixels (optional, defaults to automatic sizing)
-#' @param height height in pixels (optional, defaults to automatic sizing)
-#' @param ... additional arguments passed to [run_log()]. Only used if `.log_df`
+#' @param width Width in pixels (optional, defaults to automatic sizing)
+#' @param height Height in pixels (optional, defaults to automatic sizing)
+#' @param font_size Font size of the label text in pixels
+#' @param ... Additional arguments passed to [run_log()]. Only used if `.log_df`
#' is a modeling directory.
#'
#' @section Required Columns:
@@ -34,7 +38,7 @@
#' additional columns as tooltips. This is illustrated in the examples via
#' `add_summary()` and `add_config()`.
#'
-#' @section Tooltip formatting and coloring:
+#' @section Tooltip formatting, coloring, and sizing:
#' **Tooltip formatting**
#'
#' Any column in `.log_df` can be chosen to include in the tooltip. However,
@@ -50,8 +54,8 @@
#' - Note that the above summary columns will only receive the special
#' formatting if added via `add_summary = TRUE`.
#' - i.e. if `.log_df = run_log() %>% add_summary()` and
-#' `include_info = 'ofv'`, The `'OFV'` parameter will be formatted as any
-#' other additional column.
+#' `include_info = 'ofv'`, the `'OFV'` parameter will display the same as if
+#' it was _not_ passed to `include_info`.
#'
#' **Coloring**
#'
@@ -61,6 +65,7 @@
#' where earlier runs are whiter, and later runs appear to be more red. You can
#' pass `color_by = NULL` to make all model nodes `'red'`.
#'
+#'
#' @examples
#' \dontrun{
#'
@@ -72,19 +77,27 @@
#' model_tree(MODEL_DIR, color_by = "star")
#'
#'
-#' # Run `add_config()`, `add_summary()`, and/or `mutate()` calls beforehand
-#' run_log(MODEL_DIR) %>%
-#' add_config() %>%
-#' dplyr::mutate(out_of_date = model_has_changed | data_has_changed) %>%
+#' ## Run `add_config()`, `add_summary()`, and/or `mutate()` calls beforehand
+#'
+#'
+#' # Size nodes by objective function value
+#' run_log(MODEL_DIR) %>% add_summary() %>%
+#' model_tree(size_by = "ofv", color_by = "ofv")
+#'
+#' # Determine if certain models need to be re-run
+#' run_log(MODEL_DIR) %>% add_config() %>%
+#' dplyr::mutate(
+#' out_of_date = model_has_changed | data_has_changed
+#' ) %>%
#' model_tree(
#' include_info = c("model_has_changed", "data_has_changed", "nm_version"),
#' color_by = "out_of_date"
#' )
#'
-#' run_log(MODEL_DIR) %>%
-#' add_summary() %>%
+#' # Highlight models with any heuristics
+#' run_log(MODEL_DIR) %>% add_summary() %>%
#' model_tree(
-#' include_info = c("tags", "param_count", "eta_pval_significant"),
+#' include_info = c("param_count", "eta_pval_significant"),
#' color_by = "any_heuristics"
#' )
#'
@@ -95,74 +108,96 @@ model_tree <- function(
.log_df,
include_info = c("description","star", "tags"),
color_by = "run",
+ size_by = NULL,
add_summary = TRUE,
digits = 3,
zoomable = FALSE,
static = FALSE,
width = NULL,
height = NULL,
+ font_size = 10,
...
){
UseMethod("model_tree")
}
-#' @rdname model_tree
+
#' @export
model_tree.character <- function(
.log_df,
include_info = c("description","star", "tags"),
color_by = "run",
+ size_by = NULL,
add_summary = TRUE,
digits = 3,
zoomable = FALSE,
static = FALSE,
width = NULL,
height = NULL,
+ font_size = 10,
...
){
checkmate::assert_directory_exists(.log_df)
.log_df <- run_log(.log_df, ...)
model_tree(
.log_df,
- include_info = include_info, color_by = color_by,
+ include_info = include_info,
+ color_by = color_by, size_by = size_by,
add_summary = add_summary, digits = digits,
zoomable = zoomable, static = static,
- width = width, height = height
+ width = width, height = height,
+ font_size = font_size
)
}
-#' @rdname model_tree
#' @export
model_tree.bbi_log_df <- function(
.log_df,
- include_info = c("description","star", "tags"),
+ include_info = c("description", "star", "tags"),
color_by = "run",
+ size_by = NULL,
add_summary = TRUE,
digits = 3,
zoomable = FALSE,
static = FALSE,
width = NULL,
height = NULL,
+ font_size = 10,
...
){
# Make sure required dependencies are installed
stop_if_tree_missing_deps(static = static)
# Make tree data
- tree_data <- make_tree_data(.log_df, include_info, color_by, add_summary)
+ tree_data <- make_tree_data(.log_df, include_info, color_by, size_by, add_summary)
# Format coloring
tree_data <- color_tree_by(tree_data, color_by = color_by)
+ tree_attr <- attributes(tree_data)$color_by
+
+ # Format sizing
+ tree_data <- size_tree_by(tree_data, size_by = size_by)
+ node_size <- attributes(tree_data)$size_by
# Compile attributes into tooltip
- tree_data <- make_tree_tooltip(tree_data, digits = digits)
+ tree_data <- make_tree_tooltip(tree_data, digits = digits, font_size = font_size)
# Create model tree
- tree_attr <- ifelse(is.null(color_by), "leafCount", color_by)
+ # - Notes about aggFun:
+ # - identity is not the same as base::identity. collapsibleTree has specific
+ # handling for `aggFun = identity`, and it's the only way for sizing to work
+ # based on a column without aggregating values.
+ # - Note: The node sizing logic has a known quirk where it appears to scale
+ # sizes relative to the first "parent" node (i.e., the first row where `from`
+ # is not NA). This can cause inconsistencies in relative node sizes depending
+ # on the value of the first parent node.
pl_tree <- collapsibleTree::collapsibleTreeNetwork(
- tree_data, zoomable = zoomable, attribute = tree_attr,
- fill="col", collapsed = FALSE, nodeSize = "leafCount",
- tooltipHtml = "tooltip", width = width, height = height
+ tree_data, zoomable = zoomable, collapsed = FALSE,
+ # Coloring and sizing
+ attribute = tree_attr, fill="col", nodeSize = node_size, aggFun = identity,
+ # Tooltip and display
+ tooltipHtml = "tooltip", fontSize = font_size,
+ width = width, height = height
)
if(isTRUE(static)){
@@ -195,22 +230,27 @@ make_tree_data <- function(
.log_df,
include_info = c("description","star", "tags"),
color_by = "run",
+ size_by = NULL,
add_summary = TRUE
){
- cols_keep <- unique(c(include_info, color_by))
- checkmate::assert_true(all(cols_keep %in% names(.log_df)))
-
# Check for required columns and starting format
req_cols <- c(ABS_MOD_PATH, "run", "based_on", "model_type")
if(!(all(req_cols %in% names(.log_df)))){
- cols_missing <- req_cols[!(req_cols %in% names(.log_df))]
- cols_missing <- paste(cols_missing, collapse = ", ")
- rlang::abort(
- glue::glue("The following required columns are missing: {cols_missing}")
+ cols_missing <- setdiff(req_cols, names(.log_df))
+ cli::cli_abort(
+ "The following {.emph required} columns are missing from `.log_df`: {.val {cols_missing}}"
)
}
checkmate::assert_true(inherits(.log_df$based_on, "list"))
+ cols_keep <- unique(c(include_info, color_by, size_by))
+ if(!all(cols_keep %in% names(.log_df))){
+ cols_missing <- setdiff(cols_keep, names(.log_df))
+ cli::cli_abort(
+ "The following {.emph specified} columns are missing from `.log_df`: {.val {cols_missing}}"
+ )
+ }
+
# These columns have special handling either here or in the tooltip
base_log_cols <- c(req_cols, "description", "star", "tags")
@@ -484,7 +524,7 @@ check_model_tree <- function(network_df){
#' defines the model network.
#' @inheritParams model_tree
#' @noRd
-make_tree_tooltip <- function(tree_data, digits = 3){
+make_tree_tooltip <- function(tree_data, digits = 3, font_size = 10){
round_numeric <- function(x, digits){
# Round instead of signif - this can matter for objective functions
@@ -499,12 +539,17 @@ make_tree_tooltip <- function(tree_data, digits = 3){
# executed (i.e. NA values will still be displayed for these columns)
can_include <- function(txt) !is.na(txt) && txt != ""
+ bold_css <- "font-weight:bold;"
+ italics_css <- "font-style:italic;"
+ run_font_size <- font_size + 4
+ run_css <- glue::glue("font-size:{run_font_size}px; {bold_css}")
+
# Tooltip from run log
base_tt_cols <- attr(tree_data, "base_tt_cols")
tooltip <- purrr::imap_chr(tree_data$to, function(.x, .y){
mod_name <- ifelse(.x == "Start", .x, paste("Run", .x))
mod_html <- style_html(
- mod_name, color = "#538b01", "font-size:14px; font-weight:bold", br_after = TRUE
+ mod_name, color = "#538b01", run_css, br_after = TRUE
)
# Model type
@@ -518,7 +563,7 @@ make_tree_tooltip <- function(tree_data, digits = 3){
can_include(tree_data$addl_based_on[.y]),
style_html(
paste("Additional Based on:", tree_data$addl_based_on[.y]),
- "font-weight:bold", br_after = TRUE
+ bold_css, br_after = TRUE
),
""
)
@@ -526,7 +571,7 @@ make_tree_tooltip <- function(tree_data, digits = 3){
# Other parameters
desc_html <- ifelse(
"description" %in% base_tt_cols && can_include(tree_data$description[.y]),
- style_html(tree_data$description[.y], "font-style:italic", br_after = TRUE),
+ style_html(tree_data$description[.y], italics_css, br_after = TRUE),
""
)
tags_html <- ifelse(
@@ -536,7 +581,7 @@ make_tree_tooltip <- function(tree_data, digits = 3){
)
star_html <- ifelse(
"star" %in% base_tt_cols && can_include(tree_data$star[.y]) && isTRUE(tree_data$star[.y]),
- style_html("Starred", color = "#ffa502", "font-weight:bold", br_after = TRUE),
+ style_html("Starred", color = "#ffa502", bold_css, br_after = TRUE),
""
)
@@ -553,13 +598,13 @@ make_tree_tooltip <- function(tree_data, digits = 3){
# Conditional heuristics text
any_heuristics <- tree_data$any_heuristics[.y]
heuristics_txt <- if(!is.na(any_heuristics) && isTRUE(any_heuristics)){
- paste0("
", style_html("--Heuristics Found--", color = "#A30000", "font-weight:bold"))
+ paste0("
", style_html("--Heuristics Found--", color = "#A30000", bold_css))
}else{
""
}
# Conditional simulation text
has_sim_txt <- if(has_simulation(read_model(tree_data[[ABS_MOD_PATH]][.y]))){
- paste0("
", style_html("--Simulation attached--", color = "#ad7fa8", "font-weight:bold"))
+ paste0("
", style_html("--Simulation attached--", color = "#ad7fa8", bold_css))
}else{
""
}
@@ -585,12 +630,12 @@ make_tree_tooltip <- function(tree_data, digits = 3){
)
# Combined tooltip
paste0(
- style_html(mod_status, color = status_col, "font-weight:bold", br_before = TRUE, br_after = TRUE),
+ style_html(mod_status, color = status_col, bold_css, br_before = TRUE, br_after = TRUE),
ofv, n_sub, n_obs, heuristics_txt, has_sim_txt
)
}else{
# If not run, just show the status
- style_html(mod_status, color = status_col, "font-weight:bold", br_before = TRUE)
+ style_html(mod_status, color = status_col, bold_css, br_before = TRUE)
}
})
@@ -608,6 +653,7 @@ make_tree_tooltip <- function(tree_data, digits = 3){
})
paste0(other_html, collapse = "")
})
+ other_tooltip[1] <- "" # Skip start node for other tooltips
tooltip <- paste0(tooltip, other_tooltip, sum_tooltip)
}else{
tooltip <- paste0(tooltip, sum_tooltip)
@@ -618,7 +664,7 @@ make_tree_tooltip <- function(tree_data, digits = 3){
}
-#' Create a color column based on the unique values of another column
+#' Create a color column based on the unique values of another column.
#' @inheritParams make_tree_tooltip
#' @inheritParams model_tree
#' @noRd
@@ -653,7 +699,19 @@ color_tree_by <- function(tree_data, color_by = "run"){
n_levels <- dplyr::n_distinct(vals)
# To preview color palette: scales::show_col(pal_bbr)
- if(inherits(vals, "logical")){
+ if(inherits(vals, c("numeric", "integer"))){
+ # Gradient coloring (sorted): get colors for unique values (excluding NA)
+ sorted_vals <- sort(unique(vals))
+ pal_bbr <- scales::pal_gradient_n(bbr_cols)(seq(0, 1, length.out = n_levels))
+
+ # Assign colors based on sorted values
+ color_mapping <- setNames(pal_bbr, sorted_vals)
+ tree_data$col <- ifelse(
+ tree_data$col %in% sorted_vals,
+ color_mapping[as.character(tree_data$col)],
+ tree_data$col
+ )
+ }else if(inherits(vals, "logical")){
# Ensure both TRUE and FALSE colors are extracted, even if only one occurs
pal_bbr <- scales::pal_gradient_n(bbr_cols)(c(0,1))
# Explicitly set FALSE to white and TRUE to red
@@ -666,16 +724,68 @@ color_tree_by <- function(tree_data, color_by = "run"){
)
)
}else{
- # Gradient coloring: get colors for unique values (excluding NA)
+ # Gradient coloring; doesn't need to be sorted
pal_bbr <- scales::pal_gradient_n(bbr_cols)(seq(0, 1, length.out = n_levels))
tree_data$col <- factor(tree_data$col)
levels(tree_data$col) <- c(node_colors, pal_bbr)
}
+ # Set color_by attribute to color_by argument
+ attr(tree_data, "color_by") <- color_by
}else{
# all bbr red color
pal_bbr <- scales::pal_gradient_n(bbr_cols)(1)
tree_data$col <- factor(tree_data$col)
levels(tree_data$col) <- c(node_colors, pal_bbr)
+ # Set color_by attribute to leafCount (default)
+ attr(tree_data, "color_by") <- "leafCount"
+ }
+
+ return(tree_data)
+}
+
+#' Create a size column based on the unique _numeric_ (or integer) values of
+#' another column.
+#' @inheritParams make_tree_tooltip
+#' @inheritParams model_tree
+#' @param rescale_to A numeric vector of length 2 specifying the range to rescale
+#' `size_by` values to. Defaults to `c(1, 3)`, where `1` is the smallest node
+#' size and `3` is the largest.
+#' @noRd
+size_tree_by <- function(tree_data, size_by = NULL, rescale_to = c(1, 3)){
+ if(!is.null(size_by)){
+ checkmate::assert_true(size_by %in% names(tree_data))
+ checkmate::assert_numeric(rescale_to, len = 2, lower = 1)
+
+ tree_data$node_size <- tree_data[[size_by]]
+
+ # Scale size with numeric value
+ if(inherits(tree_data$node_size, c("numeric", "integer"))){
+
+ # Rescale to specified range
+ # - node sizes must be greater than 0 (decimals are fine)
+ # - A large SD can lead to large nodes (max of rescale_to should be small)
+ tree_data <- tree_data %>% dplyr::mutate(
+ node_size = scales::rescale(.data$node_size, to = rescale_to)
+ )
+
+ # Set node sizes with NA values (including start node) to mean rescale_to
+ tree_data$node_size[is.na(tree_data$node_size)] <- mean(rescale_to)
+
+ # Set size_by attribute to node_size column
+ attr(tree_data, "size_by") <- "node_size"
+ }else{
+ col_class <- class(tree_data[[size_by]])
+ cli::cli_warn(
+ c(
+ "Only numeric columns are supported. Column {.val {size_by}} is {.val {col_class}}",
+ "i" = "Setting node size to Default"
+ )
+ )
+ attr(tree_data, "size_by") <- "leafCount"
+ }
+ }else{
+ # Set size_by attribute to leafCount (default)
+ attr(tree_data, "size_by") <- "leafCount"
}
return(tree_data)
@@ -713,15 +823,16 @@ format_model_type <- function(model_type, fmt_html = FALSE, ...){
)
if(isTRUE(fmt_html)){
+ bold_css <- "font-weight:bold;"
mod_type_fmt <- dplyr::case_when(
model_type == "nonmem" ~
- style_html(mod_type_fmt, color = "#119a9c", "font-weight:bold", ...),
+ style_html(mod_type_fmt, color = "#119a9c", bold_css, ...),
model_type == "nmboot" ~
- style_html(mod_type_fmt, color = "#c49f02", "font-weight:bold", ...),
+ style_html(mod_type_fmt, color = "#c49f02", bold_css, ...),
model_type == "nmsim" ~
- style_html(mod_type_fmt, color = "#ad7fa8", "font-weight:bold", ...),
+ style_html(mod_type_fmt, color = "#ad7fa8", bold_css, ...),
TRUE ~
- style_html(mod_type_fmt, color = "black", "font-weight:bold", ...)
+ style_html(mod_type_fmt, color = "black", bold_css, ...)
)
}
diff --git a/man/make_tree_data.Rd b/man/make_tree_data.Rd
index b4e64320c..b6209ac02 100644
--- a/man/make_tree_data.Rd
+++ b/man/make_tree_data.Rd
@@ -8,19 +8,24 @@ make_tree_data(
.log_df,
include_info = c("description", "star", "tags"),
color_by = "run",
+ size_by = NULL,
add_summary = TRUE
)
}
\arguments{
\item{.log_df}{a \code{bbr} run log}
-\item{include_info}{vector of columns present in \code{.log_df} to include in the
+\item{include_info}{A vector of columns present in \code{.log_df} to include in the
tooltip.}
-\item{color_by}{a run log column to color the nodes by. Can be helpful for
+\item{color_by}{A run log column to color the nodes by. Can be helpful for
identifying which models are starred, have heuristics, etc. See details
for more information.}
+\item{size_by}{A \strong{numeric} (or integer) run log column to size the nodes by.
+If not specified, the default is to size the nodes based on how many
+models are based on it.}
+
\item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns
from \code{\link[=model_summary]{model_summary()}} output.}
}
diff --git a/man/model_tree.Rd b/man/model_tree.Rd
index 0c3fb7f2a..1fc520d57 100644
--- a/man/model_tree.Rd
+++ b/man/model_tree.Rd
@@ -2,60 +2,38 @@
% Please edit documentation in R/model-tree.R
\name{model_tree}
\alias{model_tree}
-\alias{model_tree.character}
-\alias{model_tree.bbi_log_df}
\title{Create a tree diagram of a modeling directory}
\usage{
model_tree(
.log_df,
include_info = c("description", "star", "tags"),
color_by = "run",
+ size_by = NULL,
add_summary = TRUE,
digits = 3,
zoomable = FALSE,
static = FALSE,
width = NULL,
height = NULL,
- ...
-)
-
-\method{model_tree}{character}(
- .log_df,
- include_info = c("description", "star", "tags"),
- color_by = "run",
- add_summary = TRUE,
- digits = 3,
- zoomable = FALSE,
- static = FALSE,
- width = NULL,
- height = NULL,
- ...
-)
-
-\method{model_tree}{bbi_log_df}(
- .log_df,
- include_info = c("description", "star", "tags"),
- color_by = "run",
- add_summary = TRUE,
- digits = 3,
- zoomable = FALSE,
- static = FALSE,
- width = NULL,
- height = NULL,
+ font_size = 10,
...
)
}
\arguments{
-\item{.log_df}{a \code{bbi_run_log_df} tibble (the output of \code{run_log()}) \emph{\strong{or}}
+\item{.log_df}{A \code{bbi_run_log_df} tibble (the output of \code{run_log()}) \emph{\strong{or}}
a base directory to look in for models. See details for more options.}
-\item{include_info}{vector of columns present in \code{.log_df} to include in the
+\item{include_info}{A vector of columns present in \code{.log_df} to include in the
tooltip.}
-\item{color_by}{a run log column to color the nodes by. Can be helpful for
+\item{color_by}{A run log column to color the nodes by. Can be helpful for
identifying which models are starred, have heuristics, etc. See details
for more information.}
+\item{size_by}{A \strong{numeric} (or integer) run log column to size the nodes by.
+If not specified, the default is to size the nodes based on how many
+models are based on it.}
+
\item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns
from \code{\link[=model_summary]{model_summary()}} output.}
@@ -69,11 +47,13 @@ dragging and scrolling.}
static image. This takes a little longer, as the interactive plot must
be saved as a PNG and loaded into the viewer.}
-\item{width}{width in pixels (optional, defaults to automatic sizing)}
+\item{width}{Width in pixels (optional, defaults to automatic sizing)}
+
+\item{height}{Height in pixels (optional, defaults to automatic sizing)}
-\item{height}{height in pixels (optional, defaults to automatic sizing)}
+\item{font_size}{Font size of the label text in pixels}
-\item{...}{additional arguments passed to \code{\link[=run_log]{run_log()}}. Only used if \code{.log_df}
+\item{...}{Additional arguments passed to \code{\link[=run_log]{run_log()}}. Only used if \code{.log_df}
is a modeling directory.}
}
\description{
@@ -96,7 +76,7 @@ additional columns as tooltips. This is illustrated in the examples via
\code{add_summary()} and \code{add_config()}.
}
-\section{Tooltip formatting and coloring}{
+\section{Tooltip formatting, coloring, and sizing}{
\strong{Tooltip formatting}
@@ -115,8 +95,8 @@ formatted specially:
\item Note that the above summary columns will only receive the special
formatting if added via \code{add_summary = TRUE}.
\item i.e. if \code{.log_df = run_log() \%>\% add_summary()} and
-\code{include_info = 'ofv'}, The \code{'OFV'} parameter will be formatted as any
-other additional column.
+\code{include_info = 'ofv'}, the \code{'OFV'} parameter will display the same as if
+it was \emph{not} passed to \code{include_info}.
}
}
@@ -140,19 +120,27 @@ run_log(MODEL_DIR) \%>\% model_tree()
model_tree(MODEL_DIR, color_by = "star")
-# Run `add_config()`, `add_summary()`, and/or `mutate()` calls beforehand
-run_log(MODEL_DIR) \%>\%
- add_config() \%>\%
- dplyr::mutate(out_of_date = model_has_changed | data_has_changed) \%>\%
+## Run `add_config()`, `add_summary()`, and/or `mutate()` calls beforehand
+
+
+# Size nodes by objective function value
+run_log(MODEL_DIR) \%>\% add_summary() \%>\%
+ model_tree(size_by = "ofv", color_by = "ofv")
+
+# Determine if certain models need to be re-run
+run_log(MODEL_DIR) \%>\% add_config() \%>\%
+ dplyr::mutate(
+ out_of_date = model_has_changed | data_has_changed
+ ) \%>\%
model_tree(
include_info = c("model_has_changed", "data_has_changed", "nm_version"),
color_by = "out_of_date"
)
-run_log(MODEL_DIR) \%>\%
- add_summary() \%>\%
+# Highlight models with any heuristics
+run_log(MODEL_DIR) \%>\% add_summary() \%>\%
model_tree(
- include_info = c("tags", "param_count", "eta_pval_significant"),
+ include_info = c("param_count", "eta_pval_significant"),
color_by = "any_heuristics"
)
diff --git a/tests/testthat/helpers-create-example-model.R b/tests/testthat/helpers-create-example-model.R
index 80880408b..110211b1b 100644
--- a/tests/testthat/helpers-create-example-model.R
+++ b/tests/testthat/helpers-create-example-model.R
@@ -157,7 +157,8 @@ make_fake_boot <- function(mod, n = 100, strat_cols = c("SEX", "ETN")){
# Unlike make_fake_boot however, the simulation will have a status of "Not Run"
make_fake_sim <- function(mod, mod_id = "mod-sim", n = 100){
mod_sim <- copy_model_from(mod, mod_id) %>% update_model_id()
- new_dir_path <- file.path(MODEL_DIR, mod_id)
+ model_dir <- bbr:::get_model_working_directory(mod)
+ new_dir_path <- file.path(model_dir, mod_id)
fs::dir_copy(mod$absolute_model_path, new_dir_path)
mod_sim <- add_msf_opt(mod_sim)
diff --git a/tests/testthat/test-model-tree.R b/tests/testthat/test-model-tree.R
index 32ee3a949..1cba0942a 100644
--- a/tests/testthat/test-model-tree.R
+++ b/tests/testthat/test-model-tree.R
@@ -2,6 +2,10 @@ context("Model tree diagram")
skip_if_not_ci_or_metworx("test-model-tree")
skip_if_tree_missing_deps()
+# These two functions ignore the 'start' node, as we are only comparing
+# to the run_log
+
+# Count how many nodes appear in the model tree for each model
count_nodes <- function(tree_list) {
if(length(tree_list) == 0) return(0)
# Iterate through each element in the list
@@ -17,6 +21,26 @@ count_nodes <- function(tree_list) {
return(total_nodes)
}
+# Get node attribute for each model
+get_node_attribute <- function(tree_list, attr = 'SizeOfNode') {
+ if(length(tree_list) == 0) return(numeric(0))
+ # Iterate through each element in the list
+ attribute_values <- numeric(0)
+ for(i in seq_along(tree_list)) {
+ # Check if the specified attribute exists in the current node
+ if(!is.null(tree_list[[i]][[attr]])){
+ attr_value <- tree_list[[i]][[attr]]
+ if(is.factor(attr_value)) attr_value <- as.character(attr_value)
+ attribute_values <- c(attribute_values, attr_value)
+ }
+ # If the current node has children, recursively get the attribute from children
+ if(length(tree_list[[i]]$children) > 0){
+ attribute_values <- c(attribute_values, get_node_attribute(tree_list[[i]]$children, attr))
+ }
+ }
+ return(attribute_values)
+}
+
withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), {
@@ -337,7 +361,7 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), {
c("#007319", rep("#EB003D", 4), "#C0C0C0")
)
- # Test numeric/character color_by (gradient coloring)
+ # Test character color_by (gradient coloring)
tree_data <- make_tree_data(run_log(MODEL_DIR), add_summary = FALSE)
tree_data_run <- color_tree_by(tree_data, color_by = "run")
expect_equal(
@@ -346,6 +370,95 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), {
# Note: all gradient colors will shift if number of models change
c("#007319", "#FFFFFF", "#F4DBD3", "#ED9D84", "#E35B44", "#EB003D")
)
+
+ # Regression test: numeric color_by is sorted appropriately
+ log_df <- run_log(MODEL_DIR)
+ log_df$numeric_vals <- c(1534, 3892, 731, 2653, 3574)
+ pl_tree <- model_tree(
+ log_df, add_summary = FALSE, color_by = "numeric_vals",
+ include_info = "numeric_vals"
+ )
+ node_colors <- get_node_attribute(pl_tree$x$data$children, attr = "fill")
+ expected_colors <- c("#F4DBD3", "#EB003D", "#FFFFFF", "#ED9D84", "#E35B44")
+ # Can inspect with `scales::show_col(node_colors)`
+ expect_equal(node_colors, expected_colors)
+
+ # Check integer case
+ log_df$numeric_vals <- as.integer(log_df$numeric_vals)
+ pl_tree <- model_tree(
+ log_df, add_summary = FALSE, color_by = "numeric_vals",
+ include_info = "numeric_vals"
+ )
+ node_colors <- get_node_attribute(pl_tree$x$data$children, attr = "fill")
+ expected_colors <- c("#F4DBD3", "#EB003D", "#FFFFFF", "#ED9D84", "#E35B44")
+ # Can inspect with `scales::show_col(node_colors)`
+ expect_equal(node_colors, expected_colors)
+ })
+
+ it("size_tree_by()", {
+ clean_test_enviroment(create_tree_models)
+
+ log_df <- run_log(MODEL_DIR) %>% dplyr::mutate(
+ size_col = as.integer(run)
+ )
+
+ # Checks that the size increases with each node (like size_col, i.e. run number)
+ pl_tree <- model_tree(log_df, add_summary = FALSE, size_by = "size_col")
+ node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode")
+ expect_true(all(diff(node_sizes) > 0))
+
+ ### Data checks ###
+
+ # Test numeric size_by (gradient sizing) - mimics objective function
+ set.seed(1234)
+ log_df <- log_df %>% dplyr::mutate(
+ size_col = abs(rnorm(nrow(log_df), mean = 1500, sd = 800))
+ )
+ size_col_vals <- log_df$size_col
+ pl_tree <- model_tree(log_df, add_summary = FALSE, size_by = "size_col")
+ node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode")
+
+ tree_data <- make_tree_data(log_df, add_summary = FALSE, size_by = "size_col")
+ tree_data_size <- size_tree_by(tree_data, size_by = "size_col")
+ data_sizes <- tree_data_size$node_size[-1]
+
+ # Checks that the ordering is consistent
+ # - Checks the underlying data, and rendered node size
+ expect_equal(order(size_col_vals), order(node_sizes))
+ expect_equal(order(size_col_vals), order(data_sizes))
+
+ # Check if all the same value
+ log_df2 <- log_df
+ log_df2$size_col <- 1
+ size_col_vals <- log_df2$size_col
+ pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "size_col")
+ node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode")
+
+ tree_data <- make_tree_data(log_df2, add_summary = FALSE, size_by = "size_col")
+ tree_data_size <- size_tree_by(tree_data, size_by = "size_col")
+ data_sizes <- tree_data_size$node_size[-1]
+
+ # Checks that all values are the same size
+ # - Checks the underlying data, and rendered node size
+ expect_true(dplyr::n_distinct(node_sizes) == 1)
+ expect_true(dplyr::n_distinct(data_sizes) == 1)
+
+ ## Warns if non-numeric (or non-integer) column ##
+ log_df2 <- log_df2 %>% dplyr::mutate(run = as.character(run))
+ # Check logical
+ expect_warning(
+ pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "star"),
+ 'Only numeric columns are supported'
+ )
+ node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode")
+ expect_true(dplyr::n_distinct(node_sizes) == 2) # leafCount sizing
+ # Check character
+ expect_warning(
+ pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "star"),
+ 'Only numeric columns are supported'
+ )
+ node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode")
+ expect_true(dplyr::n_distinct(node_sizes) == 2) # leafCount sizing
})
it("static plot", {
@@ -357,5 +470,20 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), {
expect_true(inherits(pl_tree, "model_tree_static"))
expect_true(inherits(pl_tree$png_array, "array"))
})
+
+ it("Check for missing columns", {
+ clean_test_enviroment(create_tree_models)
+ # Required columns are missing
+ log_df <- run_log(MODEL_DIR) %>% dplyr::select(-c("run", "based_on", "model_type"))
+ expect_error(
+ model_tree(log_df, add_summary = FALSE),
+ "columns are missing"
+ )
+ # Specified columns are missing
+ expect_error(
+ model_tree(MODEL_DIR, add_summary = FALSE, include_info = c("oops_I", "did_it_again")),
+ "columns are missing"
+ )
+ })
})
}) # closing withr::with_options
diff --git a/vignettes/model-tree.Rmd b/vignettes/model-tree.Rmd
index dd7fa1e15..830705dde 100644
--- a/vignettes/model-tree.Rmd
+++ b/vignettes/model-tree.Rmd
@@ -132,16 +132,19 @@ model_tree(MODEL_DIR)
```{r, echo=FALSE, eval=eval_model_tree}
# you dont need to specify the width when rendering in Rstudio viewer, but it makes a difference when rendered in Rmarkdown
# - relying on auto-sizing leads to a plot with a small width
-model_tree(MODEL_DIR, width = 800)
+model_tree(MODEL_DIR, width = 800, font_size = 12)
```
+
+## Adjust the coloring and tooltip
+
If coloring by a logical column, `FALSE` and `TRUE` values will correspond to white and red coloring respectively. Numeric or character columns will be colored as a gradient. `NA` values will appear grey regardless of the column type.
```{r, eval=FALSE}
model_tree(MODEL_DIR, color_by = "star")
```
```{r, echo=FALSE, eval=eval_model_tree}
-model_tree(MODEL_DIR, color_by = "star", width = 800)
+model_tree(MODEL_DIR, color_by = "star", width = 800, font_size = 12)
```
@@ -151,35 +154,49 @@ log_df <- run_log(MODEL_DIR)
log_df
```
+```{r, echo=FALSE, eval=eval_model_tree}
+log_df <- run_log(MODEL_DIR) %>% add_summary() %>% add_config() %>% suppressWarnings()
+ofv_nas <- which(is.na(log_df$ofv))
+# Overwrite ofv to reflect a typical modeling workflow
+set.seed(1234)
+log_df$ofv <- sort(abs(rnorm(nrow(log_df), mean = 2600, sd = 500)), decreasing = TRUE)
+# Make the "final model" the smallest ofv instead of the last one in the run log
+log_df$ofv[log_df$description == "final model"] <- min(log_df$ofv, na.rm = TRUE)
+log_df$ofv[nrow(log_df)] <- min(log_df$ofv, na.rm = TRUE)*1.1
+# retain existing NAs of objective function (mainly for bootstrap example)
+log_df$ofv[ofv_nas] <- NA
+```
+
In this example we define a new column, `out_of_date`, to denote whether the model or data has changed since the last run. We can color by this new column to determine if any of the models need to be re-run:
```{r, eval=FALSE}
-run_log(MODEL_DIR) %>%
- add_config() %>%
+log_df %>% add_config() %>%
dplyr::mutate(out_of_date = model_has_changed | data_has_changed) %>%
model_tree(
- include_info = c("model_has_changed", "data_has_changed", "nm_version"),
+ include_info = c("model_has_changed", "data_has_changed"),
color_by = "out_of_date"
)
```
```{r, echo=FALSE, eval=eval_model_tree}
# Since using fake model runs (appearing out of date), set most to FALSE
-log_df <- run_log(MODEL_DIR) %>% add_config() %>% suppressWarnings()
-log_df$model_has_changed[2:7] <- FALSE
+log_df2 <- log_df
+log_df2$model_has_changed[2:7] <- FALSE
+log_df2 <- log_df2 %>% dplyr::mutate(
+ out_of_date = model_has_changed | data_has_changed
+)
-log_df %>%
- dplyr::mutate(out_of_date = model_has_changed | data_has_changed) %>%
- model_tree(
- include_info = c("model_has_changed", "data_has_changed", "nm_version"),
- color_by = "out_of_date",
- width = 800
- )
+model_tree(
+ log_df2,
+ include_info = c("model_has_changed", "data_has_changed", "nm_version"),
+ color_by = "out_of_date",
+ width = 800,
+ font_size = 12
+)
```
The model tree can also be helpful for quickly determine if any heuristics were found during any model submissions, as well as displaying specific model summary output in the tooltip.
```{r, eval=FALSE}
-run_log(MODEL_DIR) %>%
- add_summary() %>%
+log_df %>% add_summary() %>%
model_tree(
include_info = c("tags", "param_count", "eta_pval_significant"),
color_by = "any_heuristics"
@@ -188,13 +205,35 @@ run_log(MODEL_DIR) %>%
```{r, echo=FALSE, eval=eval_model_tree}
# Since using fake models (all inheriting the same issues), set some to FALSE
-log_df <- run_log(MODEL_DIR) %>% add_summary()
-log_df$any_heuristics[c(5, 7, 9)] <- FALSE
+log_df2 <- log_df
+log_df2$any_heuristics[c(5, 7, 9)] <- FALSE
+
model_tree(
- log_df,
+ log_df2,
include_info = c("tags", "param_count", "eta_pval_significant"),
color_by = "any_heuristics",
- width = 800
+ width = 800,
+ font_size = 12
+)
+```
+
+## Size the nodes by a particular column
+
+Controlling the node size can be helpful for quickly determining the trend of a particular **numeric** column. Here, we use `color_by` and `size_by` to show the objective function value decreasing with each new model.
+
+ - **Note:** Like the `color_by` argument, only columns included in `log_df` can be passed. We have to call `add_summary()` to use `"ofv"` even though this is included in the tooltip when `add_summary = TRUE`.
+ - If `size_by` is _not_ specified, the nodes are sized based on how many other models/nodes stem from it (i.e. the "final model" will be smaller than the "base model").
+```{r, eval=FALSE}
+log_df %>% add_summary() %>%
+ model_tree(color_by = "ofv", size_by = "ofv")
+```
+
+```{r, echo=FALSE, eval=eval_model_tree}
+model_tree(
+ log_df,
+ color_by = "ofv", size_by = "ofv",
+ width = 800,
+ font_size = 12
)
```