From 55f6d440cf38d3c65e729b410af66239289e825f Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Mon, 30 Sep 2024 14:43:29 -0400 Subject: [PATCH 01/20] Add `size_by` argument to `model_tree()` - Allows users to control the node sizing by a particular column present in `.log_df` - Important to note that `ofv` cant be used without first calling `add_summary()`, which is consistent with the previous behavior of `color_by` and `include_info` - This argument does support logical columns, however I noticed that the normalized sizing can differ here depending when the first `TRUE` value originates. If it's the first value, all the nodes appear smaller. Anywhere else and the "base size" is larger. This observation is likely true for numeric columns too, though is less apparent when the values are all closer to each other. - `collapsibleTree::collapsibleTreeNetwork` is only intended to support numeric columns for sizing, so I opted to map these values to: TRUE=5, FALSE=1, NA=3 for the time being. We may want to revisit NA values for both numeric and logical columns - Added examples to docs --- R/model-tree.R | 111 +++++++++++++++++++++++++++++++++--------- man/make_tree_data.Rd | 9 +++- man/model_tree.Rd | 76 +++++++++++++---------------- 3 files changed, 130 insertions(+), 66 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index b221dc7a1..3fc0cd270 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -1,12 +1,15 @@ #' Create a tree diagram of a modeling directory #' -#' @param .log_df a `bbi_run_log_df` tibble (the output of `run_log()`) ***or*** +#' @param .log_df A `bbi_run_log_df` tibble (the output of `run_log()`) ***or*** #' a base directory to look in for models. See details for more options. -#' @param include_info vector of columns present in `.log_df` to include in the +#' @param include_info A vector of columns present in `.log_df` to include in the #' tooltip. -#' @param color_by a run log column to color the nodes by. Can be helpful for +#' @param color_by A run log column to color the nodes by. Can be helpful for #' identifying which models are starred, have heuristics, etc. See details #' for more information. +#' @param size_by A run log column to size the nodes by. Can be helpful for +#' sizing nodes by objective function values or otherwise emphasizing +#' notable differences in numeric columns. See details for more information. #' @param add_summary Logical (`TRUE`/`FALSE`). If `TRUE`, include key columns #' from [model_summary()] output. #' @param digits Number of digits to round decimal places to for display in @@ -16,9 +19,9 @@ #' @param static Logical (`TRUE`/`FALSE`). If `TRUE`, render the plot as a #' static image. This takes a little longer, as the interactive plot must #' be saved as a PNG and loaded into the viewer. -#' @param width width in pixels (optional, defaults to automatic sizing) -#' @param height height in pixels (optional, defaults to automatic sizing) -#' @param ... additional arguments passed to [run_log()]. Only used if `.log_df` +#' @param width Width in pixels (optional, defaults to automatic sizing) +#' @param height Height in pixels (optional, defaults to automatic sizing) +#' @param ... Additional arguments passed to [run_log()]. Only used if `.log_df` #' is a modeling directory. #' #' @section Required Columns: @@ -34,7 +37,7 @@ #' additional columns as tooltips. This is illustrated in the examples via #' `add_summary()` and `add_config()`. #' -#' @section Tooltip formatting and coloring: +#' @section Tooltip formatting, coloring, and sizing: #' **Tooltip formatting** #' #' Any column in `.log_df` can be chosen to include in the tooltip. However, @@ -61,6 +64,14 @@ #' where earlier runs are whiter, and later runs appear to be more red. You can #' pass `color_by = NULL` to make all model nodes `'red'`. #' +#' +#' **Sizing** +#' +#' Sizing is intended to be used for numeric columns (such as `'ofv'` when +#' `.log_df = run_log() %>% add_summary()`). Logical columns are supported, though +#' you may experience different sizing behavior depending on the occurrence of +#' `TRUE`/`FALSE` values in the run log. +#' #' @examples #' \dontrun{ #' @@ -72,19 +83,27 @@ #' model_tree(MODEL_DIR, color_by = "star") #' #' -#' # Run `add_config()`, `add_summary()`, and/or `mutate()` calls beforehand -#' run_log(MODEL_DIR) %>% -#' add_config() %>% -#' dplyr::mutate(out_of_date = model_has_changed | data_has_changed) %>% +#' ## Run `add_config()`, `add_summary()`, and/or `mutate()` calls beforehand +#' +#' +#' # Size nodes by objective function value +#' run_log(MODEL_DIR) %>% add_summary() %>% +#' model_tree(size_by = "ofv") +#' +#' # Determine if certain models need to be re-run +#' run_log(MODEL_DIR) %>% add_config() %>% +#' dplyr::mutate( +#' out_of_date = model_has_changed | data_has_changed +#' ) %>% #' model_tree( #' include_info = c("model_has_changed", "data_has_changed", "nm_version"), #' color_by = "out_of_date" #' ) #' -#' run_log(MODEL_DIR) %>% -#' add_summary() %>% +#' # Highlight models with any heuristics +#' run_log(MODEL_DIR) %>% add_summary() %>% #' model_tree( -#' include_info = c("tags", "param_count", "eta_pval_significant"), +#' include_info = c("param_count", "eta_pval_significant"), #' color_by = "any_heuristics" #' ) #' @@ -95,6 +114,7 @@ model_tree <- function( .log_df, include_info = c("description","star", "tags"), color_by = "run", + size_by = NULL, add_summary = TRUE, digits = 3, zoomable = FALSE, @@ -106,12 +126,13 @@ model_tree <- function( UseMethod("model_tree") } -#' @rdname model_tree + #' @export model_tree.character <- function( .log_df, include_info = c("description","star", "tags"), color_by = "run", + size_by = NULL, add_summary = TRUE, digits = 3, zoomable = FALSE, @@ -124,19 +145,20 @@ model_tree.character <- function( .log_df <- run_log(.log_df, ...) model_tree( .log_df, - include_info = include_info, color_by = color_by, + include_info = include_info, + color_by = color_by, size_by = size_by, add_summary = add_summary, digits = digits, zoomable = zoomable, static = static, width = width, height = height ) } -#' @rdname model_tree #' @export model_tree.bbi_log_df <- function( .log_df, - include_info = c("description","star", "tags"), + include_info = c("description", "star", "tags"), color_by = "run", + size_by = NULL, add_summary = TRUE, digits = 3, zoomable = FALSE, @@ -149,19 +171,23 @@ model_tree.bbi_log_df <- function( stop_if_tree_missing_deps(static = static) # Make tree data - tree_data <- make_tree_data(.log_df, include_info, color_by, add_summary) + tree_data <- make_tree_data(.log_df, include_info, color_by, size_by, add_summary) # Format coloring tree_data <- color_tree_by(tree_data, color_by = color_by) + tree_attr <- ifelse(is.null(color_by), "leafCount", color_by) + + # Format sizing + tree_data <- size_tree_by(tree_data, size_by = size_by) # Compile attributes into tooltip tree_data <- make_tree_tooltip(tree_data, digits = digits) # Create model tree - tree_attr <- ifelse(is.null(color_by), "leafCount", color_by) pl_tree <- collapsibleTree::collapsibleTreeNetwork( tree_data, zoomable = zoomable, attribute = tree_attr, - fill="col", collapsed = FALSE, nodeSize = "leafCount", + fill="col", collapsed = FALSE, nodeSize = "node_size", + aggFun = identity, tooltipHtml = "tooltip", width = width, height = height ) @@ -195,9 +221,10 @@ make_tree_data <- function( .log_df, include_info = c("description","star", "tags"), color_by = "run", + size_by = NULL, add_summary = TRUE ){ - cols_keep <- unique(c(include_info, color_by)) + cols_keep <- unique(c(include_info, color_by, size_by)) checkmate::assert_true(all(cols_keep %in% names(.log_df))) # Check for required columns and starting format @@ -681,6 +708,46 @@ color_tree_by <- function(tree_data, color_by = "run"){ return(tree_data) } +#' Create a size column based on the unique values of another column +#' @inheritParams make_tree_tooltip +#' @inheritParams model_tree +#' @noRd +size_tree_by <- function(tree_data, size_by = NULL){ + # Initialize new size column + if(!is.null(size_by)){ + checkmate::assert_true(size_by %in% names(tree_data)) + tree_data$node_size <- tree_data[[size_by]] + }else{ + tree_data$node_size <- 1 + } + + # Sizing only works for numeric columns, so additional handling is needed for + # logical and NA values + # - Logical values: TRUE = 5, FALSE = 1, NA = 3 + # - Numeric values with NA: NA = mean value + if(inherits(tree_data$node_size, "logical")){ + tree_data <- tree_data %>% dplyr::mutate( + node_size = dplyr::case_when( + node_size == TRUE ~ 5, + node_size == FALSE ~ 1, + is.na(node_size) ~ 3, + TRUE ~ 3 + ) + ) + }else if(inherits(tree_data$node_size, "numeric")){ + mean_val <- mean(tree_data$node_size, na.rm = TRUE) + # Set node sizes with NA values (including start node) to mean value + tree_data$node_size[is.na(tree_data$node_size)] <- mean_val + }else{ + tree_data$node_size <- 1 + cli::cli_warn( + "Only numeric and logical columns are supported for {.val size_by}" + ) + } + + return(tree_data) +} + #' Helper for coloring text and applying other styles #' @param txt text to format #' @param color color added to the text diff --git a/man/make_tree_data.Rd b/man/make_tree_data.Rd index b4e64320c..a4d05da97 100644 --- a/man/make_tree_data.Rd +++ b/man/make_tree_data.Rd @@ -8,19 +8,24 @@ make_tree_data( .log_df, include_info = c("description", "star", "tags"), color_by = "run", + size_by = NULL, add_summary = TRUE ) } \arguments{ \item{.log_df}{a \code{bbr} run log} -\item{include_info}{vector of columns present in \code{.log_df} to include in the +\item{include_info}{A vector of columns present in \code{.log_df} to include in the tooltip.} -\item{color_by}{a run log column to color the nodes by. Can be helpful for +\item{color_by}{A run log column to color the nodes by. Can be helpful for identifying which models are starred, have heuristics, etc. See details for more information.} +\item{size_by}{A run log column to size the nodes by. Can be helpful for +sizing nodes by objective function values or otherwise emphasizing +notable differences in numeric columns. See details for more information.} + \item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns from \code{\link[=model_summary]{model_summary()}} output.} } diff --git a/man/model_tree.Rd b/man/model_tree.Rd index 0c3fb7f2a..fe90b07a5 100644 --- a/man/model_tree.Rd +++ b/man/model_tree.Rd @@ -2,40 +2,13 @@ % Please edit documentation in R/model-tree.R \name{model_tree} \alias{model_tree} -\alias{model_tree.character} -\alias{model_tree.bbi_log_df} \title{Create a tree diagram of a modeling directory} \usage{ model_tree( .log_df, include_info = c("description", "star", "tags"), color_by = "run", - add_summary = TRUE, - digits = 3, - zoomable = FALSE, - static = FALSE, - width = NULL, - height = NULL, - ... -) - -\method{model_tree}{character}( - .log_df, - include_info = c("description", "star", "tags"), - color_by = "run", - add_summary = TRUE, - digits = 3, - zoomable = FALSE, - static = FALSE, - width = NULL, - height = NULL, - ... -) - -\method{model_tree}{bbi_log_df}( - .log_df, - include_info = c("description", "star", "tags"), - color_by = "run", + size_by = NULL, add_summary = TRUE, digits = 3, zoomable = FALSE, @@ -46,16 +19,20 @@ model_tree( ) } \arguments{ -\item{.log_df}{a \code{bbi_run_log_df} tibble (the output of \code{run_log()}) \emph{\strong{or}} +\item{.log_df}{A \code{bbi_run_log_df} tibble (the output of \code{run_log()}) \emph{\strong{or}} a base directory to look in for models. See details for more options.} -\item{include_info}{vector of columns present in \code{.log_df} to include in the +\item{include_info}{A vector of columns present in \code{.log_df} to include in the tooltip.} -\item{color_by}{a run log column to color the nodes by. Can be helpful for +\item{color_by}{A run log column to color the nodes by. Can be helpful for identifying which models are starred, have heuristics, etc. See details for more information.} +\item{size_by}{A run log column to size the nodes by. Can be helpful for +sizing nodes by objective function values or otherwise emphasizing +notable differences in numeric columns. See details for more information.} + \item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns from \code{\link[=model_summary]{model_summary()}} output.} @@ -69,11 +46,11 @@ dragging and scrolling.} static image. This takes a little longer, as the interactive plot must be saved as a PNG and loaded into the viewer.} -\item{width}{width in pixels (optional, defaults to automatic sizing)} +\item{width}{Width in pixels (optional, defaults to automatic sizing)} -\item{height}{height in pixels (optional, defaults to automatic sizing)} +\item{height}{Height in pixels (optional, defaults to automatic sizing)} -\item{...}{additional arguments passed to \code{\link[=run_log]{run_log()}}. Only used if \code{.log_df} +\item{...}{Additional arguments passed to \code{\link[=run_log]{run_log()}}. Only used if \code{.log_df} is a modeling directory.} } \description{ @@ -96,7 +73,7 @@ additional columns as tooltips. This is illustrated in the examples via \code{add_summary()} and \code{add_config()}. } -\section{Tooltip formatting and coloring}{ +\section{Tooltip formatting, coloring, and sizing}{ \strong{Tooltip formatting} @@ -127,6 +104,13 @@ Nodes will be colored \code{'white'} for \code{FALSE} and \code{'red'} for \code column types will be colored via a gradient between \code{'white'} and \code{'red'}, where earlier runs are whiter, and later runs appear to be more red. You can pass \code{color_by = NULL} to make all model nodes \code{'red'}. + +\strong{Sizing} + +Sizing is intended to be used for numeric columns (such as \code{'ofv'} when +\code{.log_df = run_log() \%>\% add_summary()}). Logical columns are supported, though +you may experience different sizing behavior depending on the occurrence of +\code{TRUE}/\code{FALSE} values in the run log. } \examples{ @@ -140,19 +124,27 @@ run_log(MODEL_DIR) \%>\% model_tree() model_tree(MODEL_DIR, color_by = "star") -# Run `add_config()`, `add_summary()`, and/or `mutate()` calls beforehand -run_log(MODEL_DIR) \%>\% - add_config() \%>\% - dplyr::mutate(out_of_date = model_has_changed | data_has_changed) \%>\% +## Run `add_config()`, `add_summary()`, and/or `mutate()` calls beforehand + + +# Size nodes by objective function value +run_log(MODEL_DIR) \%>\% add_summary() \%>\% + model_tree(size_by = "ofv") + +# Determine if certain models need to be re-run +run_log(MODEL_DIR) \%>\% add_config() \%>\% + dplyr::mutate( + out_of_date = model_has_changed | data_has_changed + ) \%>\% model_tree( include_info = c("model_has_changed", "data_has_changed", "nm_version"), color_by = "out_of_date" ) -run_log(MODEL_DIR) \%>\% - add_summary() \%>\% +# Highlight models with any heuristics +run_log(MODEL_DIR) \%>\% add_summary() \%>\% model_tree( - include_info = c("tags", "param_count", "eta_pval_significant"), + include_info = c("param_count", "eta_pval_significant"), color_by = "any_heuristics" ) From ca905f73895d52520bc290fb05ccabb66dbf5b79 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Fri, 4 Oct 2024 12:33:58 -0400 Subject: [PATCH 02/20] Add tests for new size_by argument - While the implementation and tests are done, I think we still want to revist the NA handling, which also impacts the "start" node size. --- tests/testthat/test-model-tree.R | 87 ++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/tests/testthat/test-model-tree.R b/tests/testthat/test-model-tree.R index 32ee3a949..829464c47 100644 --- a/tests/testthat/test-model-tree.R +++ b/tests/testthat/test-model-tree.R @@ -2,6 +2,10 @@ context("Model tree diagram") skip_if_not_ci_or_metworx("test-model-tree") skip_if_tree_missing_deps() +# These two functions ignore the 'start' node, as we are only comparing +# to the run_log + +# Count how many nodes appear in the model tree for each model count_nodes <- function(tree_list) { if(length(tree_list) == 0) return(0) # Iterate through each element in the list @@ -17,6 +21,26 @@ count_nodes <- function(tree_list) { return(total_nodes) } +# Get node attribute for each model +get_node_attribute <- function(tree_list, attr = 'SizeOfNode') { + if (length(tree_list) == 0) return(numeric(0)) + # Iterate through each element in the list + attribute_values <- numeric(0) + for (i in seq_along(tree_list)) { + # Check if the specified attribute exists in the current node + if (!is.null(tree_list[[i]][[attr]])) { + attr_value <- tree_list[[i]][[attr]] + if(is.factor(attr_value)) attr_value <- as.character(attr_value) + attribute_values <- c(attribute_values, attr_value) + } + # If the current node has children, recursively get the attribute from children + if (length(tree_list[[i]]$children) > 0) { + attribute_values <- c(attribute_values, get_node_attribute(tree_list[[i]]$children, attr)) + } + } + return(attribute_values) +} + withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { @@ -348,6 +372,69 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { ) }) + it("size_tree_by()", { + clean_test_enviroment(create_tree_models) + + log_df <- run_log(MODEL_DIR) %>% dplyr::mutate( + size_col = as.numeric(run) + ) + + # Checks that the size increases with each node (like size_col, i.e. run number) + pl_tree <- model_tree(log_df, add_summary = FALSE, size_by = "size_col") + node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") + expect_true(all(diff(node_sizes) > 0)) + + ### Data checks ### + # Test logical size_by + true_indices <- which(log_df$star) + false_indices <- which(!log_df$star) + pl_tree <- model_tree(log_df, add_summary = FALSE, size_by = "star") + node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") + + tree_data <- make_tree_data(log_df, add_summary = FALSE) + tree_data_star <- size_tree_by(tree_data, size_by = "star") + data_sizes <- tree_data_star$node_size[-1] + + # Checks that the TRUE values are larger than FALSE values + # - Checks the underlying data, and rendered node size + expect_true(all(node_sizes[true_indices] > node_sizes[false_indices])) + expect_true(all(data_sizes[true_indices] > data_sizes[false_indices])) + + # Check if all the same value (works the same if TRUE or NA) + log_df2 <- log_df + log_df2$star <- FALSE + false_indices <- which(!log_df2$star) + pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "star") + node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") + + tree_data <- make_tree_data(log_df2, add_summary = FALSE) + tree_data_star <- size_tree_by(tree_data, size_by = "star") + data_sizes <- tree_data_star$node_size[-1] + + # Checks that all values are the same size + # - Checks the underlying data, and rendered node size + expect_true(dplyr::n_distinct(node_sizes[false_indices]) == 1) + expect_true(dplyr::n_distinct(data_sizes[false_indices]) == 1) + + + # Test numeric size_by (gradient sizing) - mimics objective function + log_df <- log_df %>% dplyr::mutate( + size_col = abs(rnorm(nrow(log_df), mean = 1500, sd = 800)) + ) + size_col_vals <- log_df$size_col + pl_tree <- model_tree(log_df, add_summary = FALSE, size_by = "size_col") + node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") + + tree_data <- make_tree_data(log_df, add_summary = FALSE, size_by = "size_col") + tree_data_size <- size_tree_by(tree_data, size_by = "size_col") + data_sizes <- tree_data_size$node_size[-1] + + # Checks that the ordering is consistent + # - Checks the underlying data, and rendered node size + expect_equal(order(size_col_vals), order(node_sizes)) + expect_equal(order(size_col_vals), order(data_sizes)) + }) + it("static plot", { skip_if_tree_missing_deps(static = TRUE) clean_test_enviroment(create_tree_models) From 6d11e25bb779282ff79c040037bf413790384b21 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Fri, 4 Oct 2024 13:55:19 -0400 Subject: [PATCH 03/20] bug fix: coloring for numeric columns - While updating the vignette, I noticed that the size_by and color_by arguments occasionally didnt line up for numeric columns. Numeric columns need to be sorted first so the right colors are assigned. This was previously "tested" using the character 'run' column, though was more observable when testing against objective function values (which dont necessarily appear in ascending/descending order) --- R/model-tree.R | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index 3fc0cd270..03f718d6f 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -680,7 +680,19 @@ color_tree_by <- function(tree_data, color_by = "run"){ n_levels <- dplyr::n_distinct(vals) # To preview color palette: scales::show_col(pal_bbr) - if(inherits(vals, "logical")){ + if(inherits(vals, "numeric")){ + # Gradient coloring (sorted): get colors for unique values (excluding NA) + sorted_vals <- sort(unique(vals)) + pal_bbr <- scales::pal_gradient_n(bbr_cols)(seq(0, 1, length.out = n_levels)) + + # Assign colors based on sorted values + color_mapping <- setNames(pal_bbr, sorted_vals) + tree_data$col <- ifelse( + tree_data$col %in% sorted_vals, + color_mapping[as.character(tree_data$col)], + tree_data$col + ) + }else if(inherits(vals, "logical")){ # Ensure both TRUE and FALSE colors are extracted, even if only one occurs pal_bbr <- scales::pal_gradient_n(bbr_cols)(c(0,1)) # Explicitly set FALSE to white and TRUE to red @@ -693,7 +705,7 @@ color_tree_by <- function(tree_data, color_by = "run"){ ) ) }else{ - # Gradient coloring: get colors for unique values (excluding NA) + # Gradient coloring; doesn't need to be sorted pal_bbr <- scales::pal_gradient_n(bbr_cols)(seq(0, 1, length.out = n_levels)) tree_data$col <- factor(tree_data$col) levels(tree_data$col) <- c(node_colors, pal_bbr) From af33e7b0fa3dad0d5723fadc3e44171840bb9dfe Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Fri, 4 Oct 2024 14:16:31 -0400 Subject: [PATCH 04/20] Add regression test for coloring order (numeric cols) --- tests/testthat/test-model-tree.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-model-tree.R b/tests/testthat/test-model-tree.R index 829464c47..c2a7e642f 100644 --- a/tests/testthat/test-model-tree.R +++ b/tests/testthat/test-model-tree.R @@ -361,7 +361,7 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { c("#007319", rep("#EB003D", 4), "#C0C0C0") ) - # Test numeric/character color_by (gradient coloring) + # Test character color_by (gradient coloring) tree_data <- make_tree_data(run_log(MODEL_DIR), add_summary = FALSE) tree_data_run <- color_tree_by(tree_data, color_by = "run") expect_equal( @@ -370,6 +370,18 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { # Note: all gradient colors will shift if number of models change c("#007319", "#FFFFFF", "#F4DBD3", "#ED9D84", "#E35B44", "#EB003D") ) + + # Regression test: numeric color_by is sorted appropriately + log_df <- run_log(MODEL_DIR) + log_df$numeric_vals <- c(1534, 3892, 731, 2653, 3574) + pl_tree <- model_tree( + log_df, add_summary = FALSE, color_by = "numeric_vals", + include_info = "numeric_vals" + ) + node_colors <- get_node_attribute(pl_tree$x$data$children, attr = "fill") + expected_colors <- c("#F4DBD3", "#EB003D", "#FFFFFF", "#ED9D84", "#E35B44") + # Can inspect with `scales::show_col(node_colors)` + expect_equal(node_colors, expected_colors) }) it("size_tree_by()", { From b2666936a747bcd1e78bc86429a7bf3177e82967 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Fri, 4 Oct 2024 14:43:48 -0400 Subject: [PATCH 05/20] Restore previous sizing if size_by is not provided - uses leafCount, which makes the nodes containing more child nodes, larger --- R/model-tree.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index 03f718d6f..e216a40c9 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -179,15 +179,17 @@ model_tree.bbi_log_df <- function( # Format sizing tree_data <- size_tree_by(tree_data, size_by = size_by) + node_size <- ifelse(is.null(size_by), "leafCount", "node_size") # Compile attributes into tooltip tree_data <- make_tree_tooltip(tree_data, digits = digits) # Create model tree pl_tree <- collapsibleTree::collapsibleTreeNetwork( - tree_data, zoomable = zoomable, attribute = tree_attr, - fill="col", collapsed = FALSE, nodeSize = "node_size", - aggFun = identity, + tree_data, zoomable = zoomable, collapsed = FALSE, + # Coloring and sizing + attribute = tree_attr, fill="col", nodeSize = node_size, aggFun = identity, + # Tooltip and display tooltipHtml = "tooltip", width = width, height = height ) From d5bd39693c8d3092023c533f4e94e1badaa6db52 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Fri, 4 Oct 2024 14:44:12 -0400 Subject: [PATCH 06/20] Update vignette to include new example - Improve formatting of code in some cases --- vignettes/model-tree.Rmd | 69 ++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/vignettes/model-tree.Rmd b/vignettes/model-tree.Rmd index dd7fa1e15..4491432c8 100644 --- a/vignettes/model-tree.Rmd +++ b/vignettes/model-tree.Rmd @@ -135,6 +135,9 @@ model_tree(MODEL_DIR) model_tree(MODEL_DIR, width = 800) ``` + +## Adjust the coloring and tooltip + If coloring by a logical column, `FALSE` and `TRUE` values will correspond to white and red coloring respectively. Numeric or character columns will be colored as a gradient. `NA` values will appear grey regardless of the column type. ```{r, eval=FALSE} model_tree(MODEL_DIR, color_by = "star") @@ -151,10 +154,21 @@ log_df <- run_log(MODEL_DIR) log_df ``` +```{r, echo=FALSE, eval=eval_model_tree} +log_df <- run_log(MODEL_DIR) %>% add_summary() %>% add_config() %>% suppressWarnings() +ofv_nas <- which(is.na(log_df$ofv)) +# Overwrite ofv to reflect a typical modeling workflow +log_df$ofv <- sort(abs(rnorm(nrow(log_df), mean = 2600, sd = 900)), decreasing = TRUE) +# Make the "final model" the smallest ofv instead of the last one in the run log +log_df$ofv[log_df$description == "final model"] <- min(log_df$ofv, na.rm = TRUE) +log_df$ofv[nrow(log_df)] <- min(log_df$ofv, na.rm = TRUE)*1.1 +# retain existing NAs of objective function (mainly for bootstrap example) +log_df$ofv[ofv_nas] <- NA +``` + In this example we define a new column, `out_of_date`, to denote whether the model or data has changed since the last run. We can color by this new column to determine if any of the models need to be re-run: ```{r, eval=FALSE} -run_log(MODEL_DIR) %>% - add_config() %>% +log_df %>% add_config() %>% dplyr::mutate(out_of_date = model_has_changed | data_has_changed) %>% model_tree( include_info = c("model_has_changed", "data_has_changed", "nm_version"), @@ -164,22 +178,23 @@ run_log(MODEL_DIR) %>% ```{r, echo=FALSE, eval=eval_model_tree} # Since using fake model runs (appearing out of date), set most to FALSE -log_df <- run_log(MODEL_DIR) %>% add_config() %>% suppressWarnings() -log_df$model_has_changed[2:7] <- FALSE +log_df2 <- log_df +log_df2$model_has_changed[2:7] <- FALSE +log_df2 <- log_df2 %>% dplyr::mutate( + out_of_date = model_has_changed | data_has_changed +) -log_df %>% - dplyr::mutate(out_of_date = model_has_changed | data_has_changed) %>% - model_tree( - include_info = c("model_has_changed", "data_has_changed", "nm_version"), - color_by = "out_of_date", - width = 800 - ) +model_tree( + log_df2, + include_info = c("model_has_changed", "data_has_changed", "nm_version"), + color_by = "out_of_date", + width = 800 +) ``` The model tree can also be helpful for quickly determine if any heuristics were found during any model submissions, as well as displaying specific model summary output in the tooltip. ```{r, eval=FALSE} -run_log(MODEL_DIR) %>% - add_summary() %>% +log_df %>% add_summary() %>% model_tree( include_info = c("tags", "param_count", "eta_pval_significant"), color_by = "any_heuristics" @@ -188,16 +203,38 @@ run_log(MODEL_DIR) %>% ```{r, echo=FALSE, eval=eval_model_tree} # Since using fake models (all inheriting the same issues), set some to FALSE -log_df <- run_log(MODEL_DIR) %>% add_summary() -log_df$any_heuristics[c(5, 7, 9)] <- FALSE +log_df2 <- log_df +log_df2$any_heuristics[c(5, 7, 9)] <- FALSE + model_tree( - log_df, + log_df2, include_info = c("tags", "param_count", "eta_pval_significant"), color_by = "any_heuristics", width = 800 ) ``` +## Size the nodes by a particular column + +Controlling the node size can be helpful for quickly determining the trend of a particular numeric column, or further emphasizing a logical one (e.g., `'star'`). Here, we use `color_by` and `size_by` to show the objective function value decreasing with each new model. + - **Note:** Like the `color_by` argument, only columns included in `log_df` can be passed. We have to call `add_summary()` to use `"ofv"` even though this is included in the tooltip when `add_summary = TRUE`. +```{r, eval=FALSE} +log_df %>% add_summary() %>% + model_tree( + include_info = c("star", "description"), + color_by = "ofv", size_by = "ofv" + ) +``` + +```{r, echo=FALSE, eval=eval_model_tree} +model_tree( + log_df, + include_info = c("star", "description"), + color_by = "ofv", size_by = "ofv", + width = 800 +) +``` + ```{r cleanup bottom, include=FALSE, eval=eval_model_tree} # delete old files cleanup(mods) From 93b003e00033b910c3f1876a3acf82f8c5260d39 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Fri, 4 Oct 2024 15:00:50 -0400 Subject: [PATCH 07/20] add font_size argument --- R/model-tree.R | 44 ++++++++++++++++++++++++++++---------------- man/model_tree.Rd | 3 +++ 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index e216a40c9..62e67d6cc 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -21,6 +21,7 @@ #' be saved as a PNG and loaded into the viewer. #' @param width Width in pixels (optional, defaults to automatic sizing) #' @param height Height in pixels (optional, defaults to automatic sizing) +#' @param font_size Font size of the label text in pixels #' @param ... Additional arguments passed to [run_log()]. Only used if `.log_df` #' is a modeling directory. #' @@ -121,6 +122,7 @@ model_tree <- function( static = FALSE, width = NULL, height = NULL, + font_size = 10, ... ){ UseMethod("model_tree") @@ -139,6 +141,7 @@ model_tree.character <- function( static = FALSE, width = NULL, height = NULL, + font_size = 10, ... ){ checkmate::assert_directory_exists(.log_df) @@ -149,7 +152,8 @@ model_tree.character <- function( color_by = color_by, size_by = size_by, add_summary = add_summary, digits = digits, zoomable = zoomable, static = static, - width = width, height = height + width = width, height = height, + font_size = font_size ) } @@ -165,6 +169,7 @@ model_tree.bbi_log_df <- function( static = FALSE, width = NULL, height = NULL, + font_size = 10, ... ){ # Make sure required dependencies are installed @@ -182,7 +187,7 @@ model_tree.bbi_log_df <- function( node_size <- ifelse(is.null(size_by), "leafCount", "node_size") # Compile attributes into tooltip - tree_data <- make_tree_tooltip(tree_data, digits = digits) + tree_data <- make_tree_tooltip(tree_data, digits = digits, font_size = font_size) # Create model tree pl_tree <- collapsibleTree::collapsibleTreeNetwork( @@ -190,7 +195,8 @@ model_tree.bbi_log_df <- function( # Coloring and sizing attribute = tree_attr, fill="col", nodeSize = node_size, aggFun = identity, # Tooltip and display - tooltipHtml = "tooltip", width = width, height = height + tooltipHtml = "tooltip", fontSize = font_size, + width = width, height = height ) if(isTRUE(static)){ @@ -513,7 +519,7 @@ check_model_tree <- function(network_df){ #' defines the model network. #' @inheritParams model_tree #' @noRd -make_tree_tooltip <- function(tree_data, digits = 3){ +make_tree_tooltip <- function(tree_data, digits = 3, font_size = 10){ round_numeric <- function(x, digits){ # Round instead of signif - this can matter for objective functions @@ -528,12 +534,17 @@ make_tree_tooltip <- function(tree_data, digits = 3){ # executed (i.e. NA values will still be displayed for these columns) can_include <- function(txt) !is.na(txt) && txt != "" + bold_css <- "font-weight:bold;" + italics_css <- "font-style:italic;" + run_font_size <- font_size + 4 + run_css <- glue::glue("font-size:{run_font_size}px; {bold_css}") + # Tooltip from run log base_tt_cols <- attr(tree_data, "base_tt_cols") tooltip <- purrr::imap_chr(tree_data$to, function(.x, .y){ mod_name <- ifelse(.x == "Start", .x, paste("Run", .x)) mod_html <- style_html( - mod_name, color = "#538b01", "font-size:14px; font-weight:bold", br_after = TRUE + mod_name, color = "#538b01", run_css, br_after = TRUE ) # Model type @@ -547,7 +558,7 @@ make_tree_tooltip <- function(tree_data, digits = 3){ can_include(tree_data$addl_based_on[.y]), style_html( paste("Additional Based on:", tree_data$addl_based_on[.y]), - "font-weight:bold", br_after = TRUE + bold_css, br_after = TRUE ), "" ) @@ -555,7 +566,7 @@ make_tree_tooltip <- function(tree_data, digits = 3){ # Other parameters desc_html <- ifelse( "description" %in% base_tt_cols && can_include(tree_data$description[.y]), - style_html(tree_data$description[.y], "font-style:italic", br_after = TRUE), + style_html(tree_data$description[.y], italics_css, br_after = TRUE), "" ) tags_html <- ifelse( @@ -565,7 +576,7 @@ make_tree_tooltip <- function(tree_data, digits = 3){ ) star_html <- ifelse( "star" %in% base_tt_cols && can_include(tree_data$star[.y]) && isTRUE(tree_data$star[.y]), - style_html("Starred", color = "#ffa502", "font-weight:bold", br_after = TRUE), + style_html("Starred", color = "#ffa502", bold_css, br_after = TRUE), "" ) @@ -582,13 +593,13 @@ make_tree_tooltip <- function(tree_data, digits = 3){ # Conditional heuristics text any_heuristics <- tree_data$any_heuristics[.y] heuristics_txt <- if(!is.na(any_heuristics) && isTRUE(any_heuristics)){ - paste0("

", style_html("--Heuristics Found--", color = "#A30000", "font-weight:bold")) + paste0("

", style_html("--Heuristics Found--", color = "#A30000", bold_css)) }else{ "" } # Conditional simulation text has_sim_txt <- if(has_simulation(read_model(tree_data[[ABS_MOD_PATH]][.y]))){ - paste0("

", style_html("--Simulation attached--", color = "#ad7fa8", "font-weight:bold")) + paste0("

", style_html("--Simulation attached--", color = "#ad7fa8", bold_css)) }else{ "" } @@ -614,12 +625,12 @@ make_tree_tooltip <- function(tree_data, digits = 3){ ) # Combined tooltip paste0( - style_html(mod_status, color = status_col, "font-weight:bold", br_before = TRUE, br_after = TRUE), + style_html(mod_status, color = status_col, bold_css, br_before = TRUE, br_after = TRUE), ofv, n_sub, n_obs, heuristics_txt, has_sim_txt ) }else{ # If not run, just show the status - style_html(mod_status, color = status_col, "font-weight:bold", br_before = TRUE) + style_html(mod_status, color = status_col, bold_css, br_before = TRUE) } }) @@ -794,15 +805,16 @@ format_model_type <- function(model_type, fmt_html = FALSE, ...){ ) if(isTRUE(fmt_html)){ + bold_css <- "font-weight:bold;" mod_type_fmt <- dplyr::case_when( model_type == "nonmem" ~ - style_html(mod_type_fmt, color = "#119a9c", "font-weight:bold", ...), + style_html(mod_type_fmt, color = "#119a9c", bold_css, ...), model_type == "nmboot" ~ - style_html(mod_type_fmt, color = "#c49f02", "font-weight:bold", ...), + style_html(mod_type_fmt, color = "#c49f02", bold_css, ...), model_type == "nmsim" ~ - style_html(mod_type_fmt, color = "#ad7fa8", "font-weight:bold", ...), + style_html(mod_type_fmt, color = "#ad7fa8", bold_css, ...), TRUE ~ - style_html(mod_type_fmt, color = "black", "font-weight:bold", ...) + style_html(mod_type_fmt, color = "black", bold_css, ...) ) } diff --git a/man/model_tree.Rd b/man/model_tree.Rd index fe90b07a5..adca96ba1 100644 --- a/man/model_tree.Rd +++ b/man/model_tree.Rd @@ -15,6 +15,7 @@ model_tree( static = FALSE, width = NULL, height = NULL, + font_size = 10, ... ) } @@ -50,6 +51,8 @@ be saved as a PNG and loaded into the viewer.} \item{height}{Height in pixels (optional, defaults to automatic sizing)} +\item{font_size}{Font size of the label text in pixels} + \item{...}{Additional arguments passed to \code{\link[=run_log]{run_log()}}. Only used if \code{.log_df} is a modeling directory.} } From fc65532169cb5c31a13e11c291cd8e0f63bb0bb5 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Fri, 4 Oct 2024 15:14:40 -0400 Subject: [PATCH 08/20] Formatting fix: dont display tooltips for 'start' node - They were always NA previously --- R/model-tree.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/model-tree.R b/R/model-tree.R index 62e67d6cc..fedbb5a99 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -648,6 +648,7 @@ make_tree_tooltip <- function(tree_data, digits = 3, font_size = 10){ }) paste0(other_html, collapse = "") }) + other_tooltip[1] <- "" # Skip start node for other tooltips tooltip <- paste0(tooltip, other_tooltip, sum_tooltip) }else{ tooltip <- paste0(tooltip, sum_tooltip) From 69138206c398f36dca3a42988f6b24a1ddc0c0ed Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Fri, 4 Oct 2024 15:40:23 -0400 Subject: [PATCH 09/20] set seed for vignette and test; increase font_size in vignette --- tests/testthat/test-model-tree.R | 9 +++++---- vignettes/model-tree.Rmd | 16 ++++++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-model-tree.R b/tests/testthat/test-model-tree.R index c2a7e642f..063119271 100644 --- a/tests/testthat/test-model-tree.R +++ b/tests/testthat/test-model-tree.R @@ -23,18 +23,18 @@ count_nodes <- function(tree_list) { # Get node attribute for each model get_node_attribute <- function(tree_list, attr = 'SizeOfNode') { - if (length(tree_list) == 0) return(numeric(0)) + if(length(tree_list) == 0) return(numeric(0)) # Iterate through each element in the list attribute_values <- numeric(0) - for (i in seq_along(tree_list)) { + for(i in seq_along(tree_list)) { # Check if the specified attribute exists in the current node - if (!is.null(tree_list[[i]][[attr]])) { + if(!is.null(tree_list[[i]][[attr]])){ attr_value <- tree_list[[i]][[attr]] if(is.factor(attr_value)) attr_value <- as.character(attr_value) attribute_values <- c(attribute_values, attr_value) } # If the current node has children, recursively get the attribute from children - if (length(tree_list[[i]]$children) > 0) { + if(length(tree_list[[i]]$children) > 0){ attribute_values <- c(attribute_values, get_node_attribute(tree_list[[i]]$children, attr)) } } @@ -430,6 +430,7 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { # Test numeric size_by (gradient sizing) - mimics objective function + set.seed(1234) log_df <- log_df %>% dplyr::mutate( size_col = abs(rnorm(nrow(log_df), mean = 1500, sd = 800)) ) diff --git a/vignettes/model-tree.Rmd b/vignettes/model-tree.Rmd index 4491432c8..f0f0876f5 100644 --- a/vignettes/model-tree.Rmd +++ b/vignettes/model-tree.Rmd @@ -132,7 +132,7 @@ model_tree(MODEL_DIR) ```{r, echo=FALSE, eval=eval_model_tree} # you dont need to specify the width when rendering in Rstudio viewer, but it makes a difference when rendered in Rmarkdown # - relying on auto-sizing leads to a plot with a small width -model_tree(MODEL_DIR, width = 800) +model_tree(MODEL_DIR, width = 800, font_size = 12) ``` @@ -144,7 +144,7 @@ model_tree(MODEL_DIR, color_by = "star") ``` ```{r, echo=FALSE, eval=eval_model_tree} -model_tree(MODEL_DIR, color_by = "star", width = 800) +model_tree(MODEL_DIR, color_by = "star", width = 800, font_size = 12) ``` @@ -158,7 +158,8 @@ log_df log_df <- run_log(MODEL_DIR) %>% add_summary() %>% add_config() %>% suppressWarnings() ofv_nas <- which(is.na(log_df$ofv)) # Overwrite ofv to reflect a typical modeling workflow -log_df$ofv <- sort(abs(rnorm(nrow(log_df), mean = 2600, sd = 900)), decreasing = TRUE) +set.seed(1234) +log_df$ofv <- sort(abs(rnorm(nrow(log_df), mean = 2600, sd = 500)), decreasing = TRUE) # Make the "final model" the smallest ofv instead of the last one in the run log log_df$ofv[log_df$description == "final model"] <- min(log_df$ofv, na.rm = TRUE) log_df$ofv[nrow(log_df)] <- min(log_df$ofv, na.rm = TRUE)*1.1 @@ -188,7 +189,8 @@ model_tree( log_df2, include_info = c("model_has_changed", "data_has_changed", "nm_version"), color_by = "out_of_date", - width = 800 + width = 800, + font_size = 12 ) ``` @@ -210,7 +212,8 @@ model_tree( log_df2, include_info = c("tags", "param_count", "eta_pval_significant"), color_by = "any_heuristics", - width = 800 + width = 800, + font_size = 12 ) ``` @@ -231,7 +234,8 @@ model_tree( log_df, include_info = c("star", "description"), color_by = "ofv", size_by = "ofv", - width = 800 + width = 800, + font_size = 12 ) ``` From f6e65542d7d6cffdbdcecd12e4c130d6f7db99f3 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Mon, 7 Oct 2024 14:49:00 -0400 Subject: [PATCH 10/20] Dont support logical columns, improve error formatting - Adds additional tests when required or specified columns are missing - doc adjustments --- R/model-tree.R | 80 ++++++++---------- man/make_tree_data.Rd | 6 +- man/model_tree.Rd | 19 ++--- tests/testthat/helpers-create-example-model.R | 3 +- tests/testthat/test-model-tree.R | 81 +++++++++++-------- 5 files changed, 94 insertions(+), 95 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index fedbb5a99..268e9db17 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -7,9 +7,9 @@ #' @param color_by A run log column to color the nodes by. Can be helpful for #' identifying which models are starred, have heuristics, etc. See details #' for more information. -#' @param size_by A run log column to size the nodes by. Can be helpful for -#' sizing nodes by objective function values or otherwise emphasizing -#' notable differences in numeric columns. See details for more information. +#' @param size_by A **numeric** (or integer) run log column to size the nodes by. +#' Can be helpful for sizing nodes by objective function values or +#' otherwise emphasizing notable differences in numeric columns. #' @param add_summary Logical (`TRUE`/`FALSE`). If `TRUE`, include key columns #' from [model_summary()] output. #' @param digits Number of digits to round decimal places to for display in @@ -54,8 +54,8 @@ #' - Note that the above summary columns will only receive the special #' formatting if added via `add_summary = TRUE`. #' - i.e. if `.log_df = run_log() %>% add_summary()` and -#' `include_info = 'ofv'`, The `'OFV'` parameter will be formatted as any -#' other additional column. +#' `include_info = 'ofv'`, The `'OFV'` parameter will display the same as if +#' it was _not_ passed to `include_info`. #' #' **Coloring** #' @@ -66,13 +66,6 @@ #' pass `color_by = NULL` to make all model nodes `'red'`. #' #' -#' **Sizing** -#' -#' Sizing is intended to be used for numeric columns (such as `'ofv'` when -#' `.log_df = run_log() %>% add_summary()`). Logical columns are supported, though -#' you may experience different sizing behavior depending on the occurrence of -#' `TRUE`/`FALSE` values in the run log. -#' #' @examples #' \dontrun{ #' @@ -89,7 +82,7 @@ #' #' # Size nodes by objective function value #' run_log(MODEL_DIR) %>% add_summary() %>% -#' model_tree(size_by = "ofv") +#' model_tree(size_by = "ofv", color_by = "ofv") #' #' # Determine if certain models need to be re-run #' run_log(MODEL_DIR) %>% add_config() %>% @@ -232,20 +225,24 @@ make_tree_data <- function( size_by = NULL, add_summary = TRUE ){ - cols_keep <- unique(c(include_info, color_by, size_by)) - checkmate::assert_true(all(cols_keep %in% names(.log_df))) - # Check for required columns and starting format req_cols <- c(ABS_MOD_PATH, "run", "based_on", "model_type") if(!(all(req_cols %in% names(.log_df)))){ - cols_missing <- req_cols[!(req_cols %in% names(.log_df))] - cols_missing <- paste(cols_missing, collapse = ", ") - rlang::abort( - glue::glue("The following required columns are missing: {cols_missing}") + cols_missing <- setdiff(req_cols, names(.log_df)) + cli::cli_abort( + "The following {.emph required} columns are missing from `.log_df`: {.val {cols_missing}}" ) } checkmate::assert_true(inherits(.log_df$based_on, "list")) + cols_keep <- unique(c(include_info, color_by, size_by)) + if(!all(cols_keep %in% names(.log_df))){ + cols_missing <- setdiff(cols_keep, names(.log_df)) + cli::cli_abort( + "The following {.emph specified} columns are missing from `.log_df`: {.val {cols_missing}}" + ) + } + # These columns have special handling either here or in the tooltip base_log_cols <- c(req_cols, "description", "star", "tags") @@ -659,7 +656,7 @@ make_tree_tooltip <- function(tree_data, digits = 3, font_size = 10){ } -#' Create a color column based on the unique values of another column +#' Create a color column based on the unique values of another column. #' @inheritParams make_tree_tooltip #' @inheritParams model_tree #' @noRd @@ -734,41 +731,32 @@ color_tree_by <- function(tree_data, color_by = "run"){ return(tree_data) } -#' Create a size column based on the unique values of another column +#' Create a size column based on the unique _numeric_ (or integer) values of +#' another column. #' @inheritParams make_tree_tooltip #' @inheritParams model_tree #' @noRd size_tree_by <- function(tree_data, size_by = NULL){ - # Initialize new size column if(!is.null(size_by)){ checkmate::assert_true(size_by %in% names(tree_data)) tree_data$node_size <- tree_data[[size_by]] - }else{ - tree_data$node_size <- 1 - } - - # Sizing only works for numeric columns, so additional handling is needed for - # logical and NA values - # - Logical values: TRUE = 5, FALSE = 1, NA = 3 - # - Numeric values with NA: NA = mean value - if(inherits(tree_data$node_size, "logical")){ - tree_data <- tree_data %>% dplyr::mutate( - node_size = dplyr::case_when( - node_size == TRUE ~ 5, - node_size == FALSE ~ 1, - is.na(node_size) ~ 3, - TRUE ~ 3 + # Scale size with numeric value + if(inherits(tree_data$node_size, c("numeric", "integer"))){ + # Set node sizes with NA values (including start node) to mean value + mean_val <- mean(tree_data$node_size, na.rm = TRUE) + tree_data$node_size[is.na(tree_data$node_size)] <- mean_val + }else{ + col_class <- class(tree_data[[size_by]]) + cli::cli_warn( + c( + "Only numeric columns are supported. Column {.val {size_by}} is {.val {col_class}}", + "i" = "Setting node size to constant" + ) ) - ) - }else if(inherits(tree_data$node_size, "numeric")){ - mean_val <- mean(tree_data$node_size, na.rm = TRUE) - # Set node sizes with NA values (including start node) to mean value - tree_data$node_size[is.na(tree_data$node_size)] <- mean_val + tree_data$node_size <- 1 + } }else{ tree_data$node_size <- 1 - cli::cli_warn( - "Only numeric and logical columns are supported for {.val size_by}" - ) } return(tree_data) diff --git a/man/make_tree_data.Rd b/man/make_tree_data.Rd index a4d05da97..9a9e166f9 100644 --- a/man/make_tree_data.Rd +++ b/man/make_tree_data.Rd @@ -22,9 +22,9 @@ tooltip.} identifying which models are starred, have heuristics, etc. See details for more information.} -\item{size_by}{A run log column to size the nodes by. Can be helpful for -sizing nodes by objective function values or otherwise emphasizing -notable differences in numeric columns. See details for more information.} +\item{size_by}{A \strong{numeric} (or integer) run log column to size the nodes by. +Can be helpful for sizing nodes by objective function values or +otherwise emphasizing notable differences in numeric columns.} \item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns from \code{\link[=model_summary]{model_summary()}} output.} diff --git a/man/model_tree.Rd b/man/model_tree.Rd index adca96ba1..012fa95b6 100644 --- a/man/model_tree.Rd +++ b/man/model_tree.Rd @@ -30,9 +30,9 @@ tooltip.} identifying which models are starred, have heuristics, etc. See details for more information.} -\item{size_by}{A run log column to size the nodes by. Can be helpful for -sizing nodes by objective function values or otherwise emphasizing -notable differences in numeric columns. See details for more information.} +\item{size_by}{A \strong{numeric} (or integer) run log column to size the nodes by. +Can be helpful for sizing nodes by objective function values or +otherwise emphasizing notable differences in numeric columns.} \item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns from \code{\link[=model_summary]{model_summary()}} output.} @@ -95,8 +95,8 @@ formatted specially: \item Note that the above summary columns will only receive the special formatting if added via \code{add_summary = TRUE}. \item i.e. if \code{.log_df = run_log() \%>\% add_summary()} and -\code{include_info = 'ofv'}, The \code{'OFV'} parameter will be formatted as any -other additional column. +\code{include_info = 'ofv'}, The \code{'OFV'} parameter will display the same as if +it was \emph{not} passed to \code{include_info}. } } @@ -107,13 +107,6 @@ Nodes will be colored \code{'white'} for \code{FALSE} and \code{'red'} for \code column types will be colored via a gradient between \code{'white'} and \code{'red'}, where earlier runs are whiter, and later runs appear to be more red. You can pass \code{color_by = NULL} to make all model nodes \code{'red'}. - -\strong{Sizing} - -Sizing is intended to be used for numeric columns (such as \code{'ofv'} when -\code{.log_df = run_log() \%>\% add_summary()}). Logical columns are supported, though -you may experience different sizing behavior depending on the occurrence of -\code{TRUE}/\code{FALSE} values in the run log. } \examples{ @@ -132,7 +125,7 @@ model_tree(MODEL_DIR, color_by = "star") # Size nodes by objective function value run_log(MODEL_DIR) \%>\% add_summary() \%>\% - model_tree(size_by = "ofv") + model_tree(size_by = "ofv", color_by = "ofv") # Determine if certain models need to be re-run run_log(MODEL_DIR) \%>\% add_config() \%>\% diff --git a/tests/testthat/helpers-create-example-model.R b/tests/testthat/helpers-create-example-model.R index 80880408b..1c1464c10 100644 --- a/tests/testthat/helpers-create-example-model.R +++ b/tests/testthat/helpers-create-example-model.R @@ -157,7 +157,8 @@ make_fake_boot <- function(mod, n = 100, strat_cols = c("SEX", "ETN")){ # Unlike make_fake_boot however, the simulation will have a status of "Not Run" make_fake_sim <- function(mod, mod_id = "mod-sim", n = 100){ mod_sim <- copy_model_from(mod, mod_id) %>% update_model_id() - new_dir_path <- file.path(MODEL_DIR, mod_id) + model_dir <- get_model_working_directory(MOD1) + new_dir_path <- file.path(model_dir, mod_id) fs::dir_copy(mod$absolute_model_path, new_dir_path) mod_sim <- add_msf_opt(mod_sim) diff --git a/tests/testthat/test-model-tree.R b/tests/testthat/test-model-tree.R index 063119271..043147184 100644 --- a/tests/testthat/test-model-tree.R +++ b/tests/testthat/test-model-tree.R @@ -388,7 +388,7 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { clean_test_enviroment(create_tree_models) log_df <- run_log(MODEL_DIR) %>% dplyr::mutate( - size_col = as.numeric(run) + size_col = as.integer(run) ) # Checks that the size increases with each node (like size_col, i.e. run number) @@ -397,37 +397,6 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { expect_true(all(diff(node_sizes) > 0)) ### Data checks ### - # Test logical size_by - true_indices <- which(log_df$star) - false_indices <- which(!log_df$star) - pl_tree <- model_tree(log_df, add_summary = FALSE, size_by = "star") - node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") - - tree_data <- make_tree_data(log_df, add_summary = FALSE) - tree_data_star <- size_tree_by(tree_data, size_by = "star") - data_sizes <- tree_data_star$node_size[-1] - - # Checks that the TRUE values are larger than FALSE values - # - Checks the underlying data, and rendered node size - expect_true(all(node_sizes[true_indices] > node_sizes[false_indices])) - expect_true(all(data_sizes[true_indices] > data_sizes[false_indices])) - - # Check if all the same value (works the same if TRUE or NA) - log_df2 <- log_df - log_df2$star <- FALSE - false_indices <- which(!log_df2$star) - pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "star") - node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") - - tree_data <- make_tree_data(log_df2, add_summary = FALSE) - tree_data_star <- size_tree_by(tree_data, size_by = "star") - data_sizes <- tree_data_star$node_size[-1] - - # Checks that all values are the same size - # - Checks the underlying data, and rendered node size - expect_true(dplyr::n_distinct(node_sizes[false_indices]) == 1) - expect_true(dplyr::n_distinct(data_sizes[false_indices]) == 1) - # Test numeric size_by (gradient sizing) - mimics objective function set.seed(1234) @@ -446,6 +415,39 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { # - Checks the underlying data, and rendered node size expect_equal(order(size_col_vals), order(node_sizes)) expect_equal(order(size_col_vals), order(data_sizes)) + + # Check if all the same value + log_df2 <- log_df + log_df2$size_col <- 1 + size_col_vals <- log_df2$size_col + pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "size_col") + node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") + + tree_data <- make_tree_data(log_df2, add_summary = FALSE, size_by = "size_col") + tree_data_size <- size_tree_by(tree_data, size_by = "size_col") + data_sizes <- tree_data_size$node_size[-1] + + # Checks that all values are the same size + # - Checks the underlying data, and rendered node size + expect_true(dplyr::n_distinct(node_sizes) == 1) + expect_true(dplyr::n_distinct(data_sizes) == 1) + + ## Warns if non-numeric (or non-integer) column ## + log_df2 <- log_df2 %>% dplyr::mutate(run = as.character(run)) + # Check logical + expect_warning( + pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "star"), + 'Only numeric columns are supported' + ) + node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") + expect_true(dplyr::n_distinct(node_sizes) == 1) + # Check character + expect_warning( + pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "star"), + 'Only numeric columns are supported' + ) + node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") + expect_true(dplyr::n_distinct(node_sizes) == 1) }) it("static plot", { @@ -457,5 +459,20 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { expect_true(inherits(pl_tree, "model_tree_static")) expect_true(inherits(pl_tree$png_array, "array")) }) + + it("Check for missing columns", { + clean_test_enviroment(create_tree_models) + # Required columns are missing + log_df <- run_log(MODEL_DIR) %>% dplyr::select(-c("run", "based_on", "model_type")) + expect_error( + model_tree(log_df, add_summary = FALSE), + "columns are missing" + ) + # Specified columns are missing + expect_error( + model_tree(MODEL_DIR, add_summary = FALSE, include_info = c("oops_I", "did_it_again")), + "columns are missing" + ) + }) }) }) # closing withr::with_options From 7acae5fca97785e1019cb7ac0b4c56586c972730 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Mon, 7 Oct 2024 14:51:01 -0400 Subject: [PATCH 11/20] Include color_by and size_by columns in include_info - If color_by or size_by is specified, ensure these columns are also part of the tooltip. This avoids the need for specifying new columns (e.g., 'Out of Date') in both the tooltip and color_by --- R/model-tree.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/model-tree.R b/R/model-tree.R index 268e9db17..6e12b7143 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -169,6 +169,7 @@ model_tree.bbi_log_df <- function( stop_if_tree_missing_deps(static = static) # Make tree data + include_info <- unique(c(include_info, color_by, size_by)) tree_data <- make_tree_data(.log_df, include_info, color_by, size_by, add_summary) # Format coloring From 6dce4d812fe245a440a36cdcf21e898b74a81322 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Mon, 7 Oct 2024 15:05:44 -0400 Subject: [PATCH 12/20] vignette and test helper function fixes - doc and text adjustments - Revert change from previous commit (comment it out) until a decision has been made. --- R/model-tree.R | 7 ++++--- man/make_tree_data.Rd | 4 ++-- man/model_tree.Rd | 4 ++-- tests/testthat/helpers-create-example-model.R | 2 +- vignettes/model-tree.Rmd | 12 +++++------- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index 6e12b7143..4b47e868f 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -8,8 +8,8 @@ #' identifying which models are starred, have heuristics, etc. See details #' for more information. #' @param size_by A **numeric** (or integer) run log column to size the nodes by. -#' Can be helpful for sizing nodes by objective function values or -#' otherwise emphasizing notable differences in numeric columns. +#' If not specified, the default sizing is `'leafCount'`, which sizes the +#' nodes based on how many models are based on it. #' @param add_summary Logical (`TRUE`/`FALSE`). If `TRUE`, include key columns #' from [model_summary()] output. #' @param digits Number of digits to round decimal places to for display in @@ -169,7 +169,8 @@ model_tree.bbi_log_df <- function( stop_if_tree_missing_deps(static = static) # Make tree data - include_info <- unique(c(include_info, color_by, size_by)) + # TODO: decide if we want to append color_by and size_by to tooltip automatically + # include_info <- unique(c(include_info, color_by, size_by)) tree_data <- make_tree_data(.log_df, include_info, color_by, size_by, add_summary) # Format coloring diff --git a/man/make_tree_data.Rd b/man/make_tree_data.Rd index 9a9e166f9..0075639d6 100644 --- a/man/make_tree_data.Rd +++ b/man/make_tree_data.Rd @@ -23,8 +23,8 @@ identifying which models are starred, have heuristics, etc. See details for more information.} \item{size_by}{A \strong{numeric} (or integer) run log column to size the nodes by. -Can be helpful for sizing nodes by objective function values or -otherwise emphasizing notable differences in numeric columns.} +If not specified, the default sizing is \code{'leafCount'}, which sizes the +nodes based on how many models are based on it.} \item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns from \code{\link[=model_summary]{model_summary()}} output.} diff --git a/man/model_tree.Rd b/man/model_tree.Rd index 012fa95b6..b423dd3f1 100644 --- a/man/model_tree.Rd +++ b/man/model_tree.Rd @@ -31,8 +31,8 @@ identifying which models are starred, have heuristics, etc. See details for more information.} \item{size_by}{A \strong{numeric} (or integer) run log column to size the nodes by. -Can be helpful for sizing nodes by objective function values or -otherwise emphasizing notable differences in numeric columns.} +If not specified, the default sizing is \code{'leafCount'}, which sizes the +nodes based on how many models are based on it.} \item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns from \code{\link[=model_summary]{model_summary()}} output.} diff --git a/tests/testthat/helpers-create-example-model.R b/tests/testthat/helpers-create-example-model.R index 1c1464c10..b9e5f205a 100644 --- a/tests/testthat/helpers-create-example-model.R +++ b/tests/testthat/helpers-create-example-model.R @@ -157,7 +157,7 @@ make_fake_boot <- function(mod, n = 100, strat_cols = c("SEX", "ETN")){ # Unlike make_fake_boot however, the simulation will have a status of "Not Run" make_fake_sim <- function(mod, mod_id = "mod-sim", n = 100){ mod_sim <- copy_model_from(mod, mod_id) %>% update_model_id() - model_dir <- get_model_working_directory(MOD1) + model_dir <- get_model_working_directory(mod) new_dir_path <- file.path(model_dir, mod_id) fs::dir_copy(mod$absolute_model_path, new_dir_path) mod_sim <- add_msf_opt(mod_sim) diff --git a/vignettes/model-tree.Rmd b/vignettes/model-tree.Rmd index f0f0876f5..830705dde 100644 --- a/vignettes/model-tree.Rmd +++ b/vignettes/model-tree.Rmd @@ -172,7 +172,7 @@ In this example we define a new column, `out_of_date`, to denote whether the mod log_df %>% add_config() %>% dplyr::mutate(out_of_date = model_has_changed | data_has_changed) %>% model_tree( - include_info = c("model_has_changed", "data_has_changed", "nm_version"), + include_info = c("model_has_changed", "data_has_changed"), color_by = "out_of_date" ) ``` @@ -219,20 +219,18 @@ model_tree( ## Size the nodes by a particular column -Controlling the node size can be helpful for quickly determining the trend of a particular numeric column, or further emphasizing a logical one (e.g., `'star'`). Here, we use `color_by` and `size_by` to show the objective function value decreasing with each new model. +Controlling the node size can be helpful for quickly determining the trend of a particular **numeric** column. Here, we use `color_by` and `size_by` to show the objective function value decreasing with each new model. + - **Note:** Like the `color_by` argument, only columns included in `log_df` can be passed. We have to call `add_summary()` to use `"ofv"` even though this is included in the tooltip when `add_summary = TRUE`. + - If `size_by` is _not_ specified, the nodes are sized based on how many other models/nodes stem from it (i.e. the "final model" will be smaller than the "base model"). ```{r, eval=FALSE} log_df %>% add_summary() %>% - model_tree( - include_info = c("star", "description"), - color_by = "ofv", size_by = "ofv" - ) + model_tree(color_by = "ofv", size_by = "ofv") ``` ```{r, echo=FALSE, eval=eval_model_tree} model_tree( log_df, - include_info = c("star", "description"), color_by = "ofv", size_by = "ofv", width = 800, font_size = 12 From 48ca4e00e6bb2141f4bd84635f5c8c710c317faa Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Mon, 7 Oct 2024 15:09:43 -0400 Subject: [PATCH 13/20] prefix internal bbr function for vignette building --- tests/testthat/helpers-create-example-model.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/helpers-create-example-model.R b/tests/testthat/helpers-create-example-model.R index b9e5f205a..110211b1b 100644 --- a/tests/testthat/helpers-create-example-model.R +++ b/tests/testthat/helpers-create-example-model.R @@ -157,7 +157,7 @@ make_fake_boot <- function(mod, n = 100, strat_cols = c("SEX", "ETN")){ # Unlike make_fake_boot however, the simulation will have a status of "Not Run" make_fake_sim <- function(mod, mod_id = "mod-sim", n = 100){ mod_sim <- copy_model_from(mod, mod_id) %>% update_model_id() - model_dir <- get_model_working_directory(mod) + model_dir <- bbr:::get_model_working_directory(mod) new_dir_path <- file.path(model_dir, mod_id) fs::dir_copy(mod$absolute_model_path, new_dir_path) mod_sim <- add_msf_opt(mod_sim) From 531833a4dac4f20b8a8012c3281f5e39ac9d4638 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Tue, 14 Jan 2025 16:17:45 -0500 Subject: [PATCH 14/20] doc fixes and remove TODO comment --- R/model-tree.R | 8 +++----- man/make_tree_data.Rd | 4 ++-- man/model_tree.Rd | 6 +++--- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index 4b47e868f..56ad82dc4 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -8,8 +8,8 @@ #' identifying which models are starred, have heuristics, etc. See details #' for more information. #' @param size_by A **numeric** (or integer) run log column to size the nodes by. -#' If not specified, the default sizing is `'leafCount'`, which sizes the -#' nodes based on how many models are based on it. +#' If not specified, the default is to size the nodes based on how many +#' models are based on it. #' @param add_summary Logical (`TRUE`/`FALSE`). If `TRUE`, include key columns #' from [model_summary()] output. #' @param digits Number of digits to round decimal places to for display in @@ -54,7 +54,7 @@ #' - Note that the above summary columns will only receive the special #' formatting if added via `add_summary = TRUE`. #' - i.e. if `.log_df = run_log() %>% add_summary()` and -#' `include_info = 'ofv'`, The `'OFV'` parameter will display the same as if +#' `include_info = 'ofv'`, the `'OFV'` parameter will display the same as if #' it was _not_ passed to `include_info`. #' #' **Coloring** @@ -169,8 +169,6 @@ model_tree.bbi_log_df <- function( stop_if_tree_missing_deps(static = static) # Make tree data - # TODO: decide if we want to append color_by and size_by to tooltip automatically - # include_info <- unique(c(include_info, color_by, size_by)) tree_data <- make_tree_data(.log_df, include_info, color_by, size_by, add_summary) # Format coloring diff --git a/man/make_tree_data.Rd b/man/make_tree_data.Rd index 0075639d6..b6209ac02 100644 --- a/man/make_tree_data.Rd +++ b/man/make_tree_data.Rd @@ -23,8 +23,8 @@ identifying which models are starred, have heuristics, etc. See details for more information.} \item{size_by}{A \strong{numeric} (or integer) run log column to size the nodes by. -If not specified, the default sizing is \code{'leafCount'}, which sizes the -nodes based on how many models are based on it.} +If not specified, the default is to size the nodes based on how many +models are based on it.} \item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns from \code{\link[=model_summary]{model_summary()}} output.} diff --git a/man/model_tree.Rd b/man/model_tree.Rd index b423dd3f1..1fc520d57 100644 --- a/man/model_tree.Rd +++ b/man/model_tree.Rd @@ -31,8 +31,8 @@ identifying which models are starred, have heuristics, etc. See details for more information.} \item{size_by}{A \strong{numeric} (or integer) run log column to size the nodes by. -If not specified, the default sizing is \code{'leafCount'}, which sizes the -nodes based on how many models are based on it.} +If not specified, the default is to size the nodes based on how many +models are based on it.} \item{add_summary}{Logical (\code{TRUE}/\code{FALSE}). If \code{TRUE}, include key columns from \code{\link[=model_summary]{model_summary()}} output.} @@ -95,7 +95,7 @@ formatted specially: \item Note that the above summary columns will only receive the special formatting if added via \code{add_summary = TRUE}. \item i.e. if \code{.log_df = run_log() \%>\% add_summary()} and -\code{include_info = 'ofv'}, The \code{'OFV'} parameter will display the same as if +\code{include_info = 'ofv'}, the \code{'OFV'} parameter will display the same as if it was \emph{not} passed to \code{include_info}. } } From e2c0909c88736176ccee06a70505f66231115e22 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Wed, 15 Jan 2025 10:30:47 -0500 Subject: [PATCH 15/20] change size_by handling - Dont create a node_size column if size_by is NULL or an unsupported column type is specified. Instead set to default sizing (leafCount) and set as an attribute --- R/model-tree.R | 9 +++++---- tests/testthat/test-model-tree.R | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index 56ad82dc4..f493e0741 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -177,7 +177,7 @@ model_tree.bbi_log_df <- function( # Format sizing tree_data <- size_tree_by(tree_data, size_by = size_by) - node_size <- ifelse(is.null(size_by), "leafCount", "node_size") + node_size <- attributes(tree_data)$size_by # Compile attributes into tooltip tree_data <- make_tree_tooltip(tree_data, digits = digits, font_size = font_size) @@ -745,18 +745,19 @@ size_tree_by <- function(tree_data, size_by = NULL){ # Set node sizes with NA values (including start node) to mean value mean_val <- mean(tree_data$node_size, na.rm = TRUE) tree_data$node_size[is.na(tree_data$node_size)] <- mean_val + attr(tree_data, "size_by") <- "node_size" }else{ col_class <- class(tree_data[[size_by]]) cli::cli_warn( c( "Only numeric columns are supported. Column {.val {size_by}} is {.val {col_class}}", - "i" = "Setting node size to constant" + "i" = "Setting node size to Default" ) ) - tree_data$node_size <- 1 + attr(tree_data, "size_by") <- "leafCount" } }else{ - tree_data$node_size <- 1 + attr(tree_data, "size_by") <- "leafCount" } return(tree_data) diff --git a/tests/testthat/test-model-tree.R b/tests/testthat/test-model-tree.R index 043147184..2c65c6254 100644 --- a/tests/testthat/test-model-tree.R +++ b/tests/testthat/test-model-tree.R @@ -440,14 +440,14 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { 'Only numeric columns are supported' ) node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") - expect_true(dplyr::n_distinct(node_sizes) == 1) + expect_true(dplyr::n_distinct(node_sizes) == 2) # leafCount sizing # Check character expect_warning( pl_tree <- model_tree(log_df2, add_summary = FALSE, size_by = "star"), 'Only numeric columns are supported' ) node_sizes <- get_node_attribute(pl_tree$x$data$children, attr = "SizeOfNode") - expect_true(dplyr::n_distinct(node_sizes) == 1) + expect_true(dplyr::n_distinct(node_sizes) == 2) # leafCount sizing }) it("static plot", { From 69fcf62da010a7beb82bb42a1ae7afc293f253d2 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Wed, 15 Jan 2025 10:47:55 -0500 Subject: [PATCH 16/20] color_by fix: treat integers the same as numeric - Add a test for this. Perhaps not necessary, but good to have to capture any future change in handling --- R/model-tree.R | 2 +- tests/testthat/test-model-tree.R | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/R/model-tree.R b/R/model-tree.R index f493e0741..ce1a5666d 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -691,7 +691,7 @@ color_tree_by <- function(tree_data, color_by = "run"){ n_levels <- dplyr::n_distinct(vals) # To preview color palette: scales::show_col(pal_bbr) - if(inherits(vals, "numeric")){ + if(inherits(vals, c("numeric", "integer"))){ # Gradient coloring (sorted): get colors for unique values (excluding NA) sorted_vals <- sort(unique(vals)) pal_bbr <- scales::pal_gradient_n(bbr_cols)(seq(0, 1, length.out = n_levels)) diff --git a/tests/testthat/test-model-tree.R b/tests/testthat/test-model-tree.R index 2c65c6254..1cba0942a 100644 --- a/tests/testthat/test-model-tree.R +++ b/tests/testthat/test-model-tree.R @@ -382,6 +382,17 @@ withr::with_options(list(bbr.bbi_exe_path = read_bbi_path()), { expected_colors <- c("#F4DBD3", "#EB003D", "#FFFFFF", "#ED9D84", "#E35B44") # Can inspect with `scales::show_col(node_colors)` expect_equal(node_colors, expected_colors) + + # Check integer case + log_df$numeric_vals <- as.integer(log_df$numeric_vals) + pl_tree <- model_tree( + log_df, add_summary = FALSE, color_by = "numeric_vals", + include_info = "numeric_vals" + ) + node_colors <- get_node_attribute(pl_tree$x$data$children, attr = "fill") + expected_colors <- c("#F4DBD3", "#EB003D", "#FFFFFF", "#ED9D84", "#E35B44") + # Can inspect with `scales::show_col(node_colors)` + expect_equal(node_colors, expected_colors) }) it("size_tree_by()", { From 71be083f056db14eb61f3919b58609ddfcc353f9 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Wed, 15 Jan 2025 11:01:49 -0500 Subject: [PATCH 17/20] change color_by handling: same approach as size_by - Store color_by attribute within color_tree_by instead of using conditional logic outside of the function. - Add comments for attribute for both size_by and color_by handling setting to make this more clear --- R/model-tree.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/model-tree.R b/R/model-tree.R index ce1a5666d..d6a3cb719 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -173,7 +173,7 @@ model_tree.bbi_log_df <- function( # Format coloring tree_data <- color_tree_by(tree_data, color_by = color_by) - tree_attr <- ifelse(is.null(color_by), "leafCount", color_by) + tree_attr <- attributes(tree_data)$color_by # Format sizing tree_data <- size_tree_by(tree_data, size_by = size_by) @@ -721,11 +721,15 @@ color_tree_by <- function(tree_data, color_by = "run"){ tree_data$col <- factor(tree_data$col) levels(tree_data$col) <- c(node_colors, pal_bbr) } + # Set color_by attribute to color_by argument + attr(tree_data, "color_by") <- color_by }else{ # all bbr red color pal_bbr <- scales::pal_gradient_n(bbr_cols)(1) tree_data$col <- factor(tree_data$col) levels(tree_data$col) <- c(node_colors, pal_bbr) + # Set color_by attribute to leafCount (default) + attr(tree_data, "color_by") <- "leafCount" } return(tree_data) @@ -745,6 +749,7 @@ size_tree_by <- function(tree_data, size_by = NULL){ # Set node sizes with NA values (including start node) to mean value mean_val <- mean(tree_data$node_size, na.rm = TRUE) tree_data$node_size[is.na(tree_data$node_size)] <- mean_val + # Set size_by attribute to node_size column attr(tree_data, "size_by") <- "node_size" }else{ col_class <- class(tree_data[[size_by]]) @@ -757,6 +762,7 @@ size_tree_by <- function(tree_data, size_by = NULL){ attr(tree_data, "size_by") <- "leafCount" } }else{ + # Set size_by attribute to leafCount (default) attr(tree_data, "size_by") <- "leafCount" } From d6730dd2c69f266d491e843be226acc83bb51988 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Wed, 15 Jan 2025 13:02:59 -0500 Subject: [PATCH 18/20] size_tree_by: implement relative scaling - Instead of relying on the raw data, perform the following operations: - Normalize values based on standard deviation, which ensures sizes are comparable across datasets and prevents large values from dominating. - Scale all values to be within `rescale_to`, which also helps ensure consistent sizing, but also handles negative values (some of which may be introduced by the previous operation). - The larger the standard deviation is, the larger the the nodes can be. e.g. if rescale_to was c(0, 100) and the values in the data were c(-2, -1, 0, 1, 2), the largest node would almost fill the page. - `rescale_to` is not currently an argument that can be provided by the user. There may be reasons to want to visualize size differences to a greater degree in the future (perhaps objective function value --- R/model-tree.R | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index d6a3cb719..6cfdc4b88 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -739,16 +739,41 @@ color_tree_by <- function(tree_data, color_by = "run"){ #' another column. #' @inheritParams make_tree_tooltip #' @inheritParams model_tree +#' @param rescale_to A numeric vector of length 2 specifying the range to rescale +#' `size_by` values to. Defaults to `c(1, 3)`, where `1` is the smallest node +#' size and `3` is the largest. #' @noRd -size_tree_by <- function(tree_data, size_by = NULL){ +size_tree_by <- function(tree_data, size_by = NULL, rescale_to = c(1, 3)){ if(!is.null(size_by)){ checkmate::assert_true(size_by %in% names(tree_data)) + checkmate::assert_numeric(rescale_to, len = 2, lower = 1) + tree_data$node_size <- tree_data[[size_by]] + # Scale size with numeric value if(inherits(tree_data$node_size, c("numeric", "integer"))){ - # Set node sizes with NA values (including start node) to mean value + # Rescale values based on standard deviation + # - a large SD can lead to very large nodes mean_val <- mean(tree_data$node_size, na.rm = TRUE) - tree_data$node_size[is.na(tree_data$node_size)] <- mean_val + sd_val <- sd(tree_data$node_size, na.rm = TRUE) + + if(sd_val != 0){ + tree_data$node_size <- (tree_data$node_size - mean_val) / sd_val + }else{ + # Would only happen if all values are the same + # - avoids dividing by 0 + tree_data$node_size <- rep(1, length(tree_data$node_size)) + } + + # Rescale to specified range + # Note: node sizes cannot be negative + tree_data <- tree_data %>% dplyr::mutate( + node_size = scales::rescale(.data$node_size, to = rescale_to) + ) + + # Set node sizes with NA values (including start node) to mean rescale_to + tree_data$node_size[is.na(tree_data$node_size)] <- mean(rescale_to) + # Set size_by attribute to node_size column attr(tree_data, "size_by") <- "node_size" }else{ From 0cf42e40fa2ece78a977f4ab5db14f7942479c04 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Wed, 15 Jan 2025 15:27:53 -0500 Subject: [PATCH 19/20] Add context about aggFun=identity --- R/model-tree.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/model-tree.R b/R/model-tree.R index 6cfdc4b88..73c1a4e25 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -183,6 +183,14 @@ model_tree.bbi_log_df <- function( tree_data <- make_tree_tooltip(tree_data, digits = digits, font_size = font_size) # Create model tree + # - Notes about aggFun: + # - identity is not the same as base::identity. collapsibleTree has specific + # handling for `aggFun = identity`, and it's the only way for sizing to work + # based on a column without aggregating values. + # - Note: The node sizing logic has a known quirk where it appears to scale + # sizes relative to the first "parent" node (i.e., the first row where `from` + # is not NA). This can cause inconsistencies in relative node sizes depending + # on the value of the first parent node. pl_tree <- collapsibleTree::collapsibleTreeNetwork( tree_data, zoomable = zoomable, collapsed = FALSE, # Coloring and sizing From 1c4d4468ab4fb69854cf24d80dfa133767d3151b Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Wed, 15 Jan 2025 15:49:54 -0500 Subject: [PATCH 20/20] size_by: Remove scaling based on standard deviation - This didnt impact the final node_size and was redundant. `rescale_to` is the only parameter to be considered when scaling the relative sizes --- R/model-tree.R | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/R/model-tree.R b/R/model-tree.R index 73c1a4e25..2a5ae4d51 100644 --- a/R/model-tree.R +++ b/R/model-tree.R @@ -760,21 +760,10 @@ size_tree_by <- function(tree_data, size_by = NULL, rescale_to = c(1, 3)){ # Scale size with numeric value if(inherits(tree_data$node_size, c("numeric", "integer"))){ - # Rescale values based on standard deviation - # - a large SD can lead to very large nodes - mean_val <- mean(tree_data$node_size, na.rm = TRUE) - sd_val <- sd(tree_data$node_size, na.rm = TRUE) - - if(sd_val != 0){ - tree_data$node_size <- (tree_data$node_size - mean_val) / sd_val - }else{ - # Would only happen if all values are the same - # - avoids dividing by 0 - tree_data$node_size <- rep(1, length(tree_data$node_size)) - } # Rescale to specified range - # Note: node sizes cannot be negative + # - node sizes must be greater than 0 (decimals are fine) + # - A large SD can lead to large nodes (max of rescale_to should be small) tree_data <- tree_data %>% dplyr::mutate( node_size = scales::rescale(.data$node_size, to = rescale_to) )