From 972ab4918759c6f49fb54c4bb4a2f43d4d149b2d Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Thu, 2 Jan 2025 13:19:41 -0500 Subject: [PATCH 1/5] Fix nm_data(filter = TRUE) - Previously returned no rows if column is read in as all NA and a string detection filter was applied (e.g., C='C'). - Instead of adding `C!='C'`, we now append an `is.na()` expression, which would yield `(C!='C' | is.na(C)`. - A simplified version of the filter (or key symbold for `@` and `#` filters), is stored as the name for internal traceability and testing purposes (i.e. so we dont have to add `is.na(col)` for all the tests). --- R/filter-nm-data.R | 43 ++++++++++++++++--- man/add_na_filter.Rd | 20 +++++++++ man/translate_nm_expr.Rd | 2 +- tests/testthat/test-filter-nm-data.R | 64 ++++++++++++++++++++++++---- 4 files changed, 112 insertions(+), 17 deletions(-) create mode 100644 man/add_na_filter.Rd diff --git a/R/filter-nm-data.R b/R/filter-nm-data.R index 04c9081f..075823cf 100644 --- a/R/filter-nm-data.R +++ b/R/filter-nm-data.R @@ -169,7 +169,7 @@ invert_operator <- function(expr) { #' #' } #' @keywords internal -#' @seealso [translate_nm_operator()], [invert_operator()] +#' @seealso [translate_nm_operator()], [invert_operator()], [add_na_filter()] translate_nm_expr <- function( nm_expr, type = c("ignore", "accept"), @@ -181,38 +181,67 @@ translate_nm_expr <- function( # Translate NM operators exprs <- translate_nm_operator(nm_expr) - r_exprs <- purrr::map_chr(exprs, function(expr){ + # Compile list of all R filters. + # - A simplified version of the filter (no NA handling) or key symbol (#, @) is + # stored as the name for traceability and testing purposes. + r_exprs_list <- purrr::map(exprs, function(expr){ if(type == "ignore"){ # `IGNORE=#`, `IGNORE=@`, `IGNORE=c1`, `IGNORE=(list)` if(expr == "#"){ # IGNORE=# is the default. That is, in the absence of IGNORE option, any # record whose first character is '#' is treated as a comment record. - paste0("!grepl('^#', ", data_cols[1], ")") + paste0("!grepl('^#', ", data_cols[1], ")") %>% + stats::setNames(expr) }else if(expr == "@"){ # IGNORE=@ signifies that any data record having an alphabetic character # or `@` as its first non-blank character in column one should be ignored. # - This permits a table file having header lines to be used as an # NM-TRAN data set. # - add extra `\\` for later parse() - paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")") + paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")") %>% + stats::setNames(expr) }else if(grepl('^[a-zA-Z]$', expr)){ # This is for `IGNORE=C` columns. Meaning ignore rows if the _first_ column # contains 'C' (this form always points to the _first_ column) # - the above regex looks for characters of length>=1, and no symbols - paste0(data_cols[1], "!=", "'", expr, "'") + r_expr <- paste0(data_cols[1], "!=", "'", expr, "'") + add_na_filter(r_expr) %>% stats::setNames(r_expr) }else{ # Invert list form expressions - invert_operator(expr) + r_expr <- invert_operator(expr) + add_na_filter(r_expr) %>% stats::setNames(r_expr) } }else{ # ACCEPT option only supports `ACCEPT=(list)` form --> no formatting needed - expr + add_na_filter(expr) %>% stats::setNames(expr) } }) + r_exprs <- purrr::list_c(r_exprs_list) + return(r_exprs) } +#' Adjust filters to retain NA values for relevant columns +#' +#' This function serves to add a `| is.na(col)` filter to an `R` filter to +#' avoid filtering out `NA` rows. It is *not* needed when using `grepl` for +#' string detection (e.g., `IGNORE=#`, `IGNORE=@`). +#' +#' @param r_expr An `R` filter expression. e.g., `'BLQ!=1'`, `C!='C'`. +#' @keywords internal +#' @seealso [translate_nm_operator()], [invert_operator()] +add_na_filter <- function(r_expr){ + column <- stringr::str_extract(r_expr, "^[^!=><]+") %>% + stringr::str_trim() + + if(!is.na(column)){ + paste0("(", r_expr, " | is.na(", column, "))") + }else{ + r_expr + } +} + #' Filter `NONMEM` input data based on `IGNORE` and `ACCEPT` record options #' #' @param .mod A `bbi_nonmem_model` object diff --git a/man/add_na_filter.Rd b/man/add_na_filter.Rd new file mode 100644 index 00000000..c93252e1 --- /dev/null +++ b/man/add_na_filter.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter-nm-data.R +\name{add_na_filter} +\alias{add_na_filter} +\title{Adjust filters to retain NA values for relevant columns} +\usage{ +add_na_filter(r_expr) +} +\arguments{ +\item{r_expr}{An \code{R} filter expression. e.g., \code{'BLQ!=1'}, \code{C!='C'}.} +} +\description{ +This function serves to add a \verb{| is.na(col)} filter to an \code{R} filter to +avoid filtering out \code{NA} rows. It is \emph{not} needed when using \code{grepl} for +string detection (e.g., \verb{IGNORE=#}, \verb{IGNORE=@}). +} +\seealso{ +\code{\link[=translate_nm_operator]{translate_nm_operator()}}, \code{\link[=invert_operator]{invert_operator()}} +} +\keyword{internal} diff --git a/man/translate_nm_expr.Rd b/man/translate_nm_expr.Rd index 250f070a..bacbc51c 100644 --- a/man/translate_nm_expr.Rd +++ b/man/translate_nm_expr.Rd @@ -43,6 +43,6 @@ translate_nm_expr("@", data_cols = data_cols) } } \seealso{ -\code{\link[=translate_nm_operator]{translate_nm_operator()}}, \code{\link[=invert_operator]{invert_operator()}} +\code{\link[=translate_nm_operator]{translate_nm_operator()}}, \code{\link[=invert_operator]{invert_operator()}}, \code{\link[=add_na_filter]{add_na_filter()}} } \keyword{internal} diff --git a/tests/testthat/test-filter-nm-data.R b/tests/testthat/test-filter-nm-data.R index 8eae99b9..bb02cb50 100644 --- a/tests/testthat/test-filter-nm-data.R +++ b/tests/testthat/test-filter-nm-data.R @@ -17,12 +17,12 @@ test_that("translate_nm_expr() translates NONMEM filter expressions", { test_exprs <- c("SEX==1", "ID.EQ.2", "WT/=70", "AGE.NE.30", "A=1", "WT.GT.40", "B.LE.20") expect_equal( - translate_nm_expr(test_exprs, type = 'accept'), + translate_nm_expr(test_exprs, type = 'accept') %>% names(), c("SEX==1", "ID==2", "WT!=70", "AGE!=30", "A==1", "WT>40", "B<=20") ) expect_equal( - translate_nm_expr(test_exprs, type = 'ignore'), + translate_nm_expr(test_exprs, type = 'ignore') %>% names(), c("SEX!=1", "ID!=2", "WT==70", "AGE==30", "A!=1", "WT<40", "B>=20") ) @@ -31,18 +31,18 @@ test_that("translate_nm_expr() translates NONMEM filter expressions", { # - only `data_cols[1]` is technically needed data_cols <- c("C", "ID", "TIME", "EVID", "DV", "BLQ") expect_equal( - translate_nm_expr("#", data_cols = data_cols), + translate_nm_expr("#", data_cols = data_cols) %>% unname(), paste0("!grepl('^#', ", data_cols[1], ")") ) expect_equal( - translate_nm_expr("C", data_cols = data_cols), + translate_nm_expr("C", data_cols = data_cols) %>% names(), paste0(data_cols[1], "!='C'") ) # Extra `\\` is added for escape purposes when the expression is later parsed expect_equal( - translate_nm_expr("@", data_cols = data_cols), + translate_nm_expr("@", data_cols = data_cols) %>% unname(), paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")") ) @@ -75,18 +75,26 @@ test_that("filter_nm_data() filters input data using IGNORE/ACCEPT options", { data_rec$values[[7]]$value <- "(ID.EQ.2, SEX=1, WT.LE.50)" nmrec::write_ctl(ctl, get_model_path(mod2)) - # Check expected expressions + ## Check expected expressions ## input_data <- nm_data(mod2) %>% suppressMessages() nm_exprs <- get_data_filter_exprs(mod2) r_filters <- translate_nm_expr( nm_expr = nm_exprs$exprs, type = nm_exprs$type, data_cols = names(input_data) ) - filter_expression <- paste(r_filters, collapse = " & ") + + # Check filters and names + # - names: simplified version of filter (no NA handling) or key symbol (#, @) + # stored as the name for traceability and testing purposes. + # - value: the actual filter expression supplied to the final filter + expect_equal(names(r_filters), c("@", "ID!=2", "SEX!=1", "WT>=50")) expect_equal( - filter_expression, "!grepl('^\\\\s*[A-Za-z@]', ID) & ID!=2 & SEX!=1 & WT>=50" - ) + unname(r_filters), + c("!grepl('^\\\\s*[A-Za-z@]', ID)", "(ID!=2 | is.na(ID))", + "(SEX!=1 | is.na(SEX))", "(WT>=50 | is.na(WT))") + ) # Check that filter expression works correctly + filter_expression <- paste(r_filters, collapse = " & ") filtered_data <- filter_nm_data(mod2) expect_equal( nrow(filtered_data), @@ -159,3 +167,41 @@ test_that("filter_nm_data() errors if expressions cant be parsed", { "ignore/accept list could not be converted to filters" ) }) + + +test_that("filter_nm_data() works when NA values are present", { + mod2 <- copy_model_from(MOD1, "2") + on.exit(delete_models(mod2, .force = TRUE, .tags = NULL)) + + # Add additional IGNORE expressions and compare to dplyr filters + ctl <- get_model_ctl(mod2) + data_rec <- nmrec::select_records(ctl, "data")[[1]] + data_rec$parse() + + # Include a filter for a column with NA values + data_rec$values[[7]]$value <- "(C='C', BLQ=1)" + nmrec::write_ctl(ctl, get_model_path(mod2)) + + # Test 1 + # - C column does *not* lead to loss of subjects due to NA values + # - 1 subject lost due to BLQ. + data <- nm_data(mod2) + data_test1 <- data %>% dplyr::mutate(C = NA, BLQ = 0) + data_test1$BLQ[nrow(data_test1)] <- 1 + + data_f1 <- filter_nm_data(mod2, data = data_test1) + expect_equal(nrow(data) - 1, nrow(data_f1)) + + # Test 2 + # - filter works appropriately when found strings and NA values are present + # - 1 subject lost due to BLQ. 1 subject lost due to 'C' filter + data_test2 <- data_test1 + data_test2$C[1] <- "C" + data_test2$C[2] <- "." + data_test2$BLQ[3] <- NA + + data_f2 <- filter_nm_data(mod2, data = data_test2) + expect_equal(nrow(data) - 2, nrow(data_f2)) + expect_true(is.na(data_f2$BLQ[2])) + expect_equal(data_f2$C[1], ".") +}) From 7f089470cf682fd157adb377f9b9d4ff52e78a19 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Thu, 2 Jan 2025 14:05:43 -0500 Subject: [PATCH 2/5] add more checks to avoid malformed filter expressions - unlikely scenario since NONMEM doesnt support these types of filters, but better to have an informative error here than to create a bad filter expression --- R/filter-nm-data.R | 22 +++++++++++++++++++++- tests/testthat/test-filter-nm-data.R | 14 ++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/R/filter-nm-data.R b/R/filter-nm-data.R index 075823cf..08c2c963 100644 --- a/R/filter-nm-data.R +++ b/R/filter-nm-data.R @@ -212,7 +212,27 @@ translate_nm_expr <- function( add_na_filter(r_expr) %>% stats::setNames(r_expr) } }else{ - # ACCEPT option only supports `ACCEPT=(list)` form --> no formatting needed + # ACCEPT option only supports `ACCEPT=(list)` form + # - no formatting needed, but check for user error to avoid unintended or + # malformed filter expressions + nm_docs_msg <- "See NONMEM documentation for more details" + if(expr == "#" || expr == "@"){ + cli::cli_abort( + c( + "{.val #} and {.val @} are only supported for IGNORE list expressions", + "i" = nm_docs_msg + ) + ) + } + if(grepl('^[a-zA-Z]$', expr)){ + cli::cli_abort( + c( + "ACCEPT option only supports `ACCEPT=(list)` form", + "i" = nm_docs_msg + ) + ) + } + add_na_filter(expr) %>% stats::setNames(expr) } }) diff --git a/tests/testthat/test-filter-nm-data.R b/tests/testthat/test-filter-nm-data.R index bb02cb50..75a3a68a 100644 --- a/tests/testthat/test-filter-nm-data.R +++ b/tests/testthat/test-filter-nm-data.R @@ -46,6 +46,20 @@ test_that("translate_nm_expr() translates NONMEM filter expressions", { paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")") ) + # Using accept with these filters errors out + expect_error( + translate_nm_expr("#", type = "accept", data_cols = data_cols), + "only supported for IGNORE" + ) + expect_error( + translate_nm_expr("@", type = "accept", data_cols = data_cols), + "only supported for IGNORE" + ) + expect_error( + translate_nm_expr("C", type = "accept", data_cols = data_cols), + "ACCEPT option only supports" + ) + # Error out for unsupported logical operators test_exprs_bad <- c(test_exprs, "GEN=1 .AND. AGE > 60") expect_error( From 8ae0f3599ca199b2ea5980c90799b9e1ae0e3dc8 Mon Sep 17 00:00:00 2001 From: kyleb Date: Fri, 3 Jan 2025 17:21:22 -0500 Subject: [PATCH 3/5] update IGNORE=C to work like IGNORE=@ --- R/filter-nm-data.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/filter-nm-data.R b/R/filter-nm-data.R index 08c2c963..abf6f502 100644 --- a/R/filter-nm-data.R +++ b/R/filter-nm-data.R @@ -203,9 +203,9 @@ translate_nm_expr <- function( }else if(grepl('^[a-zA-Z]$', expr)){ # This is for `IGNORE=C` columns. Meaning ignore rows if the _first_ column # contains 'C' (this form always points to the _first_ column) - # - the above regex looks for characters of length>=1, and no symbols - r_expr <- paste0(data_cols[1], "!=", "'", expr, "'") - add_na_filter(r_expr) %>% stats::setNames(r_expr) + # - the above regex looks for characters of length=1, and no symbols + paste0("!grepl('^", expr, "', ", data_cols[1], ")") %>% + stats::setNames(expr) }else{ # Invert list form expressions r_expr <- invert_operator(expr) From 048cf3947ec477aefa638a6f77d5687fb0c08261 Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Tue, 7 Jan 2025 15:51:02 -0500 Subject: [PATCH 4/5] fix test and add regression test --- tests/testthat/test-filter-nm-data.R | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-filter-nm-data.R b/tests/testthat/test-filter-nm-data.R index 75a3a68a..b2811a4d 100644 --- a/tests/testthat/test-filter-nm-data.R +++ b/tests/testthat/test-filter-nm-data.R @@ -36,8 +36,8 @@ test_that("translate_nm_expr() translates NONMEM filter expressions", { ) expect_equal( - translate_nm_expr("C", data_cols = data_cols) %>% names(), - paste0(data_cols[1], "!='C'") + translate_nm_expr("C", data_cols = data_cols) %>% unname(), + paste0("!grepl('^", data_cols[1], "', ", data_cols[1], ")") ) # Extra `\\` is added for escape purposes when the expression is later parsed @@ -183,6 +183,33 @@ test_that("filter_nm_data() errors if expressions cant be parsed", { }) +test_that("filter_nm_data() works for IGNORE=C filters", { + mod2 <- copy_model_from(MOD1, "2") + on.exit(delete_models(mod2, .force = TRUE, .tags = NULL)) + + # Add additional IGNORE expressions and compare to dplyr filters + ctl <- get_model_ctl(mod2) + data_rec <- nmrec::select_records(ctl, "data")[[1]] + data_rec$parse() + + # Include a filter for a column with NA values + data_rec$values[[7]]$value <- "='C'" + nmrec::write_ctl(ctl, get_model_path(mod2)) + + # Test + # - 2 subjects lost due to 'C' filter + data <- nm_data(mod2) + data_test <- data %>% dplyr::mutate(C = NA) %>% + # 'C' must be the first column for this test + dplyr::relocate("C") + data_test$C[1] <- "C" + data_test$C[2] <- "Comment" + data_test$C[3] <- "." + + data_f <- filter_nm_data(mod2, data = data_test) + expect_equal(nrow(data) - 2, nrow(data_f)) +}) + test_that("filter_nm_data() works when NA values are present", { mod2 <- copy_model_from(MOD1, "2") on.exit(delete_models(mod2, .force = TRUE, .tags = NULL)) From 3be0ccc1968faf2f6c185fce05765e6f6026a7ad Mon Sep 17 00:00:00 2001 From: Kyle Barrett Date: Thu, 9 Jan 2025 13:43:33 -0500 Subject: [PATCH 5/5] Use expect_equivalent instead of unnaming the vector --- tests/testthat/test-filter-nm-data.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-filter-nm-data.R b/tests/testthat/test-filter-nm-data.R index b2811a4d..ef43d16d 100644 --- a/tests/testthat/test-filter-nm-data.R +++ b/tests/testthat/test-filter-nm-data.R @@ -30,20 +30,20 @@ test_that("translate_nm_expr() translates NONMEM filter expressions", { # Use of `@`, `#`, or form `IGNORE=C2` require `data_cols` to be specified # - only `data_cols[1]` is technically needed data_cols <- c("C", "ID", "TIME", "EVID", "DV", "BLQ") - expect_equal( - translate_nm_expr("#", data_cols = data_cols) %>% unname(), - paste0("!grepl('^#', ", data_cols[1], ")") + expect_equivalent( + translate_nm_expr("#", data_cols = data_cols), + "!grepl('^#', C)" ) - expect_equal( - translate_nm_expr("C", data_cols = data_cols) %>% unname(), - paste0("!grepl('^", data_cols[1], "', ", data_cols[1], ")") + expect_equivalent( + translate_nm_expr("C", data_cols = data_cols), + "!grepl('^C', C)" ) # Extra `\\` is added for escape purposes when the expression is later parsed - expect_equal( - translate_nm_expr("@", data_cols = data_cols) %>% unname(), - paste0("!grepl('^\\\\s*[A-Za-z@]', ", data_cols[1], ")") + expect_equivalent( + translate_nm_expr("@", data_cols = data_cols), + "!grepl('^\\\\s*[A-Za-z@]', C)" ) # Using accept with these filters errors out @@ -193,7 +193,7 @@ test_that("filter_nm_data() works for IGNORE=C filters", { data_rec$parse() # Include a filter for a column with NA values - data_rec$values[[7]]$value <- "='C'" + data_rec$values[[7]]$value <- "=C" nmrec::write_ctl(ctl, get_model_path(mod2)) # Test