Skip to content

Commit

Permalink
Fix filtering function, add parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
edelarua committed Feb 19, 2025
1 parent 8708969 commit 0168a6d
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 98 deletions.
85 changes: 34 additions & 51 deletions R/tbl_filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,29 @@
#'
#' @description `r lifecycle::badge('experimental')`\cr
#'
#' This function is used to filter hierarchical table rows.
#' This function is used to filter hierarchical table rows. Filters are not applied to summary or overall rows.
#'
#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
#' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
#' @param keep_empty_summary (scalar `logical`)\cr
#' Logical argument indicating whether to retain summary rows corresponding to table hierarchy sections that have had
#' all rows filtered out. Default is `TRUE`.
#' @inheritParams cards::ard_filter
#' @inheritParams rlang::args_dots_empty
#'
#' @details
#' The `filter` argument can be used to filter out rows of a table which do not meet the criteria provided as an
#' expression. Rows can be filtered on the values of any of the possible statistics (`n`, `p`, and `N`) provided they
#' are included at least once in the table, as well as the values of any `by` variables. Filtering is only applied to
#' rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at
#' least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria
#' themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e.
#' across all `by` variable values) by using aggregate functions such as `sum()` and `mean()`.
#' rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows are kept
#' regardless of whether they meet the filtering criteria themselves. In addition to filtering on individual statistic
#' values, filters can be applied across the row (i.e. across all `by` variable values) by using aggregate functions
#' such as `sum()` and `mean()`.
#'
#' Some examples of possible filters:
#' - `filter = n > 5`
#' - `filter = n == 2 & p < 0.05`
#' - `filter = sum(n) > 4`
#' - `filter = sum(n) >= 4`
#' - `filter = mean(n) > 4 | n > 3`
#' - `filter = any(n > 2 & TRTA == "Xanomeline High Dose")`
#'
Expand Down Expand Up @@ -61,46 +64,15 @@ tbl_filter <- function(x, ...) {

#' @export
#' @rdname tbl_filter
tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
tbl_filter.tbl_hierarchical <- function(x, filter, keep_empty_summary = TRUE, ...) {
set_cli_abort_call()

ard_args <- attributes(x$cards$tbl_hierarchical)$args
by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
x_ard <- x$cards$tbl_hierarchical

# add dummy rows for variables not in include so their label rows are filtered correctly
not_incl <- setdiff(ard_args$variables, ard_args$include)
if (length(not_incl) > 0) {
cli::cli_inform(
"Not all hierarchy variables present in the table were included in the {.arg include} argument.
These variables ({not_incl}) do not have event rate data available so the total sum of the event rates
for this hierarchy section will be used instead. To use true event rates for all sections of the table,
set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
)

for (v in not_incl) {
i <- length(ard_args$by) + which(ard_args$variables == v)
x_sum_rows <- x_ard |>
dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |>
dplyr::group_map(function(.df, .g) {
g_cur <- .g[[ncol(.g) - 1]]
if (!is.na(g_cur) && g_cur == v) {
# dummy summary row to add in
.df[1, ] |> mutate(
variable = g_cur,
variable_level = .g[[ncol(.g)]],
stat_name = "no_stat",
stat = list(0),
tmp = TRUE
)
} else {
NULL
}
}, .keep = TRUE)

x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows)
}
}
x_ard <- x_ard |> .append_not_incl(ard_args)

# add indices to ARD
x_ard <- x_ard |>
Expand Down Expand Up @@ -160,26 +132,37 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
unique() |>
setdiff(rm_idx)

# apply filtering while retaining original row order
idx_filter <- intersect(x$table_body$idx_nofilter, idx_filter)
x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ]

if ("tmp" %in% names(x_ard_filter)) {
x_ard_filter <- x_ard_filter |>
dplyr::filter(is.na(tmp)) |>
select(-"tmp")
}

# update x$cards
x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter")
# remove summary rows from empty sections if requested
if (!keep_empty_summary) {
if (!dplyr::last(ard_args$variables) %in% x$table_body$variable) {
x$table_body <- x$table_body |> dplyr::filter(!variable %in% outer_cols)
x_ard_filter <- x_ard_filter |> dplyr::filter(!variable %in% outer_cols)
} else {
for (v in rev(outer_cols)) {
empty_rows <- x$table_body |>
dplyr::filter(variable == dplyr::lead(variable) & variable == v) |>
dplyr::pull("idx_nofilter")
x$table_body <- x$table_body |> dplyr::filter(!idx_nofilter %in% empty_rows)
x_ard_filter <- x_ard_filter |> dplyr::filter(!idx_nofilter %in% empty_rows)
}
}
}

# update x$table_body
x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ] |> select(-"idx_nofilter")
x$table_body <- x$table_body |> select(-"idx_nofilter")

x
# update x$cards
x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter")

# if (nrow(x$table_body) > 0) {
# cli::cli_inform(
# "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept
# regardless of whether they meet the filtering criteria themselves.",
# .frequency = "once",
# .frequency_id = "sum_rows_lt"
# )
# }
x
}
87 changes: 47 additions & 40 deletions R/tbl_sort.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,46 +60,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
x_ard <- x$cards$tbl_hierarchical

# add dummy rows for variables not in include so their label rows are sorted correctly
not_incl <- setdiff(ard_args$variables, ard_args$include)
if (length(not_incl) > 0) {
cli::cli_inform(
"Not all hierarchy variables present in the table were included in the {.arg include} argument.
These variables ({not_incl}) do not have event rate data available so the total sum of the event rates
for this hierarchy section will be used instead. To use true event rates for all sections of the table,
set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
)

for (v in not_incl) {
i <- length(ard_args$by) + which(ard_args$variables == v)
x_sum_rows <- x_ard |>
dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |>
dplyr::group_map(function(.df, .g) {
# get pseudo-summary row stat value for descending sort
if (sort == "descending") {
stat_nm <- setdiff(.df$stat_name, "N")[1]
sum <- .df |>
dplyr::filter(stat_name == !!stat_nm) |>
dplyr::summarize(s = sum(unlist(stat))) |>
dplyr::pull(s)
}
g_cur <- .g[[ncol(.g) - 1]]
if (!is.na(g_cur) && g_cur == v) {
# dummy summary row to add in
.df[1, ] |> mutate(
variable = g_cur,
variable_level = .g[[ncol(.g)]],
stat_name = if (sort == "descending") stat_nm else "no_stat",
stat = if (sort == "descending") list(sum) else list(0),
tmp = TRUE
)
} else {
NULL
}
}, .keep = TRUE)

x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows)
}
}
x_ard <- x_ard |> .append_not_incl(ard_args, sort)

# add indices to ARD
x_ard <- x_ard |>
Expand Down Expand Up @@ -173,3 +134,49 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {

x
}

.append_not_incl <- function(x, ard_args, sort = NULL) {
# add dummy rows for variables not in include so their label rows are sorted correctly
not_incl <- setdiff(ard_args$variables, ard_args$include)
if (length(not_incl) > 0) {
cli::cli_inform(
"Not all hierarchy variables present in the table were included in the {.arg include} argument.
These variables ({not_incl}) do not have event rate data available so the total sum of the event rates
for this hierarchy section will be used instead. To use true event rates for all sections of the table,
set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
)

for (v in not_incl) {
i <- length(ard_args$by) + which(ard_args$variables == v)
x_sum_rows <- x |>
dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |>
dplyr::group_map(function(.df, .g) {
# get pseudo-summary row stat value for descending sort
if (!is.null(sort) && sort == "descending") {
stat_nm <- setdiff(.df$stat_name, "N")[1]
sum <- .df |>
dplyr::filter(stat_name == !!stat_nm) |>
dplyr::summarize(s = sum(unlist(stat))) |>
dplyr::pull(s)
}
g_cur <- .g[[ncol(.g) - 1]]
if (!is.na(g_cur) && g_cur == v) {
# dummy summary row to add in
.df[1, ] |> mutate(
variable = g_cur,
variable_level = .g[[ncol(.g)]],
stat_name = if (!is.null(sort) && sort == "descending") stat_nm else "no_stat",
stat = if (!is.null(sort) && sort == "descending") list(sum) else list(0),
tmp = TRUE
)
} else {
NULL
}
}, .keep = TRUE)

x <- x |> dplyr::bind_rows(x_sum_rows)
}
}

x
}
21 changes: 14 additions & 7 deletions man/tbl_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions tests/testthat/_snaps/tbl_filter.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# tbl_filter.tbl_hierarchical() works

Code
as.data.frame(tbl)
Output
**Sex** \n    **Race** \n        **Reported Term for the Adverse Event** **Placebo** \nN = 86 **Xanomeline High Dose** \nN = 84 **Xanomeline Low Dose** \nN = 84
1 Number of patients with event 26 (30%) 42 (50%) 40 (48%)
2 F 13 (25%) 18 (45%) 23 (46%)
3 BLACK OR AFRICAN AMERICAN 3 (60%) 4 (67%) 3 (50%)
4 WHITE 10 (21%) 14 (41%) 20 (45%)
5 APPLICATION SITE PRURITUS 2 (4.2%) 8 (24%) 10 (23%)
6 ERYTHEMA 6 (13%) 6 (18%) 8 (18%)
7 APPLICATION SITE ERYTHEMA 2 (4.2%) 5 (15%) 5 (11%)
8 M 13 (39%) 24 (55%) 17 (50%)
9 AMERICAN INDIAN OR ALASKA NATIVE 0 (NA%) 1 (100%) 0 (NA%)
10 BLACK OR AFRICAN AMERICAN 1 (33%) 1 (33%) 0 (NA%)
11 WHITE 12 (40%) 22 (55%) 17 (50%)
12 APPLICATION SITE PRURITUS 1 (3.3%) 12 (30%) 10 (29%)
13 DIARRHOEA 6 (20%) 3 (7.5%) 2 (5.9%)
14 ERYTHEMA 3 (10%) 5 (13%) 6 (18%)
15 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%)

# tbl_filter.tbl_hierarchical() error messaging works

Code
Expand Down

0 comments on commit 0168a6d

Please sign in to comment.