Skip to content

Commit

Permalink
Merge pull request #729 from metrumresearchgroup/fix/nm_filter_data
Browse files Browse the repository at this point in the history
Fix nm_data(filter = TRUE)
  • Loading branch information
barrettk authored Jan 14, 2025
2 parents de4cc8d + 3be0ccc commit 326cc04
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 22 deletions.
67 changes: 58 additions & 9 deletions R/filter-nm-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ invert_operator <- function(expr) {
#'
#' }
#' @keywords internal
#' @seealso [translate_nm_operator()], [invert_operator()]
#' @seealso [translate_nm_operator()], [invert_operator()], [add_na_filter()]
translate_nm_expr <- function(
nm_expr,
type = c("ignore", "accept"),
Expand All @@ -181,38 +181,87 @@ translate_nm_expr <- function(
# Translate NM operators
exprs <- translate_nm_operator(nm_expr)

r_exprs <- purrr::map_chr(exprs, function(expr){
# Compile list of all R filters.
# - A simplified version of the filter (no NA handling) or key symbol (#, @) is
# stored as the name for traceability and testing purposes.
r_exprs_list <- purrr::map(exprs, function(expr){
if(type == "ignore"){
# `IGNORE=#`, `IGNORE=@`, `IGNORE=c1`, `IGNORE=(list)`
if(expr == "#"){
# IGNORE=# is the default. That is, in the absence of IGNORE option, any
# record whose first character is '#' is treated as a comment record.
paste0("!grepl('^#', ", data_cols[1], ")")
paste0("!grepl('^#', ", data_cols[1], ")") %>%
stats::setNames(expr)
}else if(expr == "@"){
# IGNORE=@ signifies that any data record having an alphabetic character
# or `@` as its first non-blank character in column one should be ignored.
# - This permits a table file having header lines to be used as an
# NM-TRAN data set.
# - add extra `\\` for later parse()
paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")")
paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")") %>%
stats::setNames(expr)
}else if(grepl('^[a-zA-Z]$', expr)){
# This is for `IGNORE=C` columns. Meaning ignore rows if the _first_ column
# contains 'C' (this form always points to the _first_ column)
# - the above regex looks for characters of length>=1, and no symbols
paste0(data_cols[1], "!=", "'", expr, "'")
# - the above regex looks for characters of length=1, and no symbols
paste0("!grepl('^", expr, "', ", data_cols[1], ")") %>%
stats::setNames(expr)
}else{
# Invert list form expressions
invert_operator(expr)
r_expr <- invert_operator(expr)
add_na_filter(r_expr) %>% stats::setNames(r_expr)
}
}else{
# ACCEPT option only supports `ACCEPT=(list)` form --> no formatting needed
expr
# ACCEPT option only supports `ACCEPT=(list)` form
# - no formatting needed, but check for user error to avoid unintended or
# malformed filter expressions
nm_docs_msg <- "See NONMEM documentation for more details"
if(expr == "#" || expr == "@"){
cli::cli_abort(
c(
"{.val #} and {.val @} are only supported for IGNORE list expressions",
"i" = nm_docs_msg
)
)
}
if(grepl('^[a-zA-Z]$', expr)){
cli::cli_abort(
c(
"ACCEPT option only supports `ACCEPT=(list)` form",
"i" = nm_docs_msg
)
)
}

add_na_filter(expr) %>% stats::setNames(expr)
}
})

r_exprs <- purrr::list_c(r_exprs_list)

return(r_exprs)
}

#' Adjust filters to retain NA values for relevant columns
#'
#' This function serves to add a `| is.na(col)` filter to an `R` filter to
#' avoid filtering out `NA` rows. It is *not* needed when using `grepl` for
#' string detection (e.g., `IGNORE=#`, `IGNORE=@`).
#'
#' @param r_expr An `R` filter expression. e.g., `'BLQ!=1'`, `C!='C'`.
#' @keywords internal
#' @seealso [translate_nm_operator()], [invert_operator()]
add_na_filter <- function(r_expr){
column <- stringr::str_extract(r_expr, "^[^!=><]+") %>%
stringr::str_trim()

if(!is.na(column)){
paste0("(", r_expr, " | is.na(", column, "))")
}else{
r_expr
}
}

#' Filter `NONMEM` input data based on `IGNORE` and `ACCEPT` record options
#'
#' @param .mod A `bbi_nonmem_model` object
Expand Down
20 changes: 20 additions & 0 deletions man/add_na_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/translate_nm_expr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

111 changes: 99 additions & 12 deletions tests/testthat/test-filter-nm-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,33 +17,47 @@ test_that("translate_nm_expr() translates NONMEM filter expressions", {
test_exprs <- c("SEX==1", "ID.EQ.2", "WT/=70", "AGE.NE.30", "A=1", "WT.GT.40", "B.LE.20")

expect_equal(
translate_nm_expr(test_exprs, type = 'accept'),
translate_nm_expr(test_exprs, type = 'accept') %>% names(),
c("SEX==1", "ID==2", "WT!=70", "AGE!=30", "A==1", "WT>40", "B<=20")
)

expect_equal(
translate_nm_expr(test_exprs, type = 'ignore'),
translate_nm_expr(test_exprs, type = 'ignore') %>% names(),
c("SEX!=1", "ID!=2", "WT==70", "AGE==30", "A!=1", "WT<40", "B>=20")
)


# Use of `@`, `#`, or form `IGNORE=C2` require `data_cols` to be specified
# - only `data_cols[1]` is technically needed
data_cols <- c("C", "ID", "TIME", "EVID", "DV", "BLQ")
expect_equal(
expect_equivalent(
translate_nm_expr("#", data_cols = data_cols),
paste0("!grepl('^#', ", data_cols[1], ")")
"!grepl('^#', C)"
)

expect_equal(
expect_equivalent(
translate_nm_expr("C", data_cols = data_cols),
paste0(data_cols[1], "!='C'")
"!grepl('^C', C)"
)

# Extra `\\` is added for escape purposes when the expression is later parsed
expect_equal(
expect_equivalent(
translate_nm_expr("@", data_cols = data_cols),
paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")")
"!grepl('^\\\\s*[A-Za-z@]', C)"
)

# Using accept with these filters errors out
expect_error(
translate_nm_expr("#", type = "accept", data_cols = data_cols),
"only supported for IGNORE"
)
expect_error(
translate_nm_expr("@", type = "accept", data_cols = data_cols),
"only supported for IGNORE"
)
expect_error(
translate_nm_expr("C", type = "accept", data_cols = data_cols),
"ACCEPT option only supports"
)

# Error out for unsupported logical operators
Expand Down Expand Up @@ -75,18 +89,26 @@ test_that("filter_nm_data() filters input data using IGNORE/ACCEPT options", {
data_rec$values[[7]]$value <- "(ID.EQ.2, SEX=1, WT.LE.50)"
nmrec::write_ctl(ctl, get_model_path(mod2))

# Check expected expressions
## Check expected expressions ##
input_data <- nm_data(mod2) %>% suppressMessages()
nm_exprs <- get_data_filter_exprs(mod2)
r_filters <- translate_nm_expr(
nm_expr = nm_exprs$exprs, type = nm_exprs$type, data_cols = names(input_data)
)
filter_expression <- paste(r_filters, collapse = " & ")

# Check filters and names
# - names: simplified version of filter (no NA handling) or key symbol (#, @)
# stored as the name for traceability and testing purposes.
# - value: the actual filter expression supplied to the final filter
expect_equal(names(r_filters), c("@", "ID!=2", "SEX!=1", "WT>=50"))
expect_equal(
filter_expression, "!grepl('^\\\\s*[A-Za-z@]', ID) & ID!=2 & SEX!=1 & WT>=50"
)
unname(r_filters),
c("!grepl('^\\\\s*[A-Za-z@]', ID)", "(ID!=2 | is.na(ID))",
"(SEX!=1 | is.na(SEX))", "(WT>=50 | is.na(WT))")
)

# Check that filter expression works correctly
filter_expression <- paste(r_filters, collapse = " & ")
filtered_data <- filter_nm_data(mod2)
expect_equal(
nrow(filtered_data),
Expand Down Expand Up @@ -159,3 +181,68 @@ test_that("filter_nm_data() errors if expressions cant be parsed", {
"ignore/accept list could not be converted to filters"
)
})


test_that("filter_nm_data() works for IGNORE=C filters", {
mod2 <- copy_model_from(MOD1, "2")
on.exit(delete_models(mod2, .force = TRUE, .tags = NULL))

# Add additional IGNORE expressions and compare to dplyr filters
ctl <- get_model_ctl(mod2)
data_rec <- nmrec::select_records(ctl, "data")[[1]]
data_rec$parse()

# Include a filter for a column with NA values
data_rec$values[[7]]$value <- "=C"
nmrec::write_ctl(ctl, get_model_path(mod2))

# Test
# - 2 subjects lost due to 'C' filter
data <- nm_data(mod2)
data_test <- data %>% dplyr::mutate(C = NA) %>%
# 'C' must be the first column for this test
dplyr::relocate("C")
data_test$C[1] <- "C"
data_test$C[2] <- "Comment"
data_test$C[3] <- "."

data_f <- filter_nm_data(mod2, data = data_test)
expect_equal(nrow(data) - 2, nrow(data_f))
})

test_that("filter_nm_data() works when NA values are present", {
mod2 <- copy_model_from(MOD1, "2")
on.exit(delete_models(mod2, .force = TRUE, .tags = NULL))

# Add additional IGNORE expressions and compare to dplyr filters
ctl <- get_model_ctl(mod2)
data_rec <- nmrec::select_records(ctl, "data")[[1]]
data_rec$parse()

# Include a filter for a column with NA values
data_rec$values[[7]]$value <- "(C='C', BLQ=1)"
nmrec::write_ctl(ctl, get_model_path(mod2))

# Test 1
# - C column does *not* lead to loss of subjects due to NA values
# - 1 subject lost due to BLQ.
data <- nm_data(mod2)
data_test1 <- data %>% dplyr::mutate(C = NA, BLQ = 0)
data_test1$BLQ[nrow(data_test1)] <- 1

data_f1 <- filter_nm_data(mod2, data = data_test1)
expect_equal(nrow(data) - 1, nrow(data_f1))

# Test 2
# - filter works appropriately when found strings and NA values are present
# - 1 subject lost due to BLQ. 1 subject lost due to 'C' filter
data_test2 <- data_test1
data_test2$C[1] <- "C"
data_test2$C[2] <- "."
data_test2$BLQ[3] <- NA

data_f2 <- filter_nm_data(mod2, data = data_test2)
expect_equal(nrow(data) - 2, nrow(data_f2))
expect_true(is.na(data_f2$BLQ[2]))
expect_equal(data_f2$C[1], ".")
})

0 comments on commit 326cc04

Please sign in to comment.