diff --git a/DESCRIPTION b/DESCRIPTION index fcf3eee..20175a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: fcds Title: Process Data from the Florida Cancer Data System -Version: 0.1.3 +Version: 0.1.4 Authors@R: c(person(given = "Garrick", family = "Aden-Buie", diff --git a/NEWS.md b/NEWS.md index dce8ad6..2a25ab4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +## fcds 0.1.4 + +* Added `county_name` argument to `count_fcds()` that adds the county into the + count groups. This value can be a vector of counties, or `TRUE` to include all + counties in the source data. Alternatively, if the value is `"moffitt"` then + the counts are filtered to counties in Moffitt's catchment area. + ## fcds 0.1.2 * Add ICD-O-3 codes from [IACR](http://www.iacr.com.fr/index.php?option=com_content&view=category&layout=blog&id=100&Itemid=577) diff --git a/R/fcds.R b/R/fcds.R index c76882f..6f6ea4d 100644 --- a/R/fcds.R +++ b/R/fcds.R @@ -62,7 +62,16 @@ join_population_by_year <- function( #' #' fcds_example %>% #' dplyr::filter(county_name == "Pinellas") %>% -#' count_fcds(cancer_site_group, sex = "Male") +#' count_fcds(cancer_site_group, sex = "Male", county_name = TRUE) %>% +#' head() +#' +#' fcds_example %>% +#' filter_age_groups(age_gt = 20, age_lt = 25) %>% +#' count_fcds(sex = TRUE, county_name = c("Pinellas", "Hillsborough")) +#' +#' fcds_example %>% +#' count_fcds(county_name = "moffitt") %>% +#' head() #' #' @return A grouped data frame with counts. The output groups includes the #' union of the groups of the original input `data`, the groups specified by @@ -80,8 +89,13 @@ join_population_by_year <- function( #' `TRUE` to include all values of `race` present in input data #' @param origin Character vector of values of `origin` to be included in count, #' or `TRUE` to include all values of `origin` present in input data -#' @param moffitt_catchment Limit counties to those in the catchment area of the -#' [Moffitt Cancer Center](https://moffitt.org). +#' @param county_name Character vector of values of `county_name` to be included +#' in count, or `TRUE` to include all values of `county_name` present in the +#' input data, or `"moffitt"` to limit to the counties in the +#' catchment area of the [Moffitt Cancer Center](https://moffitt.org). +#' @param moffitt_catchment **Deprecated.** Please use `county_name = +#' "moffitt"` instead to limit counties to those in the catchment +#' area of the [Moffitt Cancer Center](https://moffitt.org). #' @param default_groups Variables that should be included in the grouping, #' prior to counting cancer cases. Set to `NULL` to use only the groups #' already present in the input data. @@ -96,14 +110,31 @@ count_fcds <- function( sex = NULL, race = NULL, origin = NULL, - moffitt_catchment = FALSE, + county_name = NULL, default_groups = c("year_group", "year", "age_group"), - discard_unseen_levels = TRUE + discard_unseen_levels = TRUE, + moffitt_catchment = FALSE ) { + if (!missing(moffitt_catchment)) { + .Deprecated(msg = paste( + "The moffitt_catchment argument is deprecated, please use", + "county_name = \"moffitt\"" + )) + if (!missing(county_name)) { + warning( + "Both `county` and `moffitt_catchment` were specified, ", + "only `county` will be used." + ) + } else { + county_name <- if (moffitt_catchment) "moffitt" + } + } + filters <- list( sex = sex, race = race, - origin = origin + origin = origin, + county_name = county_name ) for (var in names(filters)) { @@ -116,7 +147,11 @@ count_fcds <- function( )) } - if (moffitt_catchment) filters$county_name <- fcds_const("moffitt_catchment") + if (!is.null(filters$county_name) && length(filters$county_name) == 1) { + if (grepl("moffitt", tolower(filters$county_name))) { + filters$county_name <- fcds_const("moffitt_catchment") + } + } filters <- purrr::compact(filters) diff --git a/man/count_fcds.Rd b/man/count_fcds.Rd index d11b670..4a32adf 100644 --- a/man/count_fcds.Rd +++ b/man/count_fcds.Rd @@ -5,8 +5,9 @@ \title{Count FCDS Cases} \usage{ count_fcds(data, ..., sex = NULL, race = NULL, origin = NULL, - moffitt_catchment = FALSE, default_groups = c("year_group", "year", - "age_group"), discard_unseen_levels = TRUE) + county_name = NULL, default_groups = c("year_group", "year", + "age_group"), discard_unseen_levels = TRUE, + moffitt_catchment = FALSE) } \arguments{ \item{data}{A data frame} @@ -23,8 +24,10 @@ and subsequent counting.} \item{origin}{Character vector of values of \code{origin} to be included in count, or \code{TRUE} to include all values of \code{origin} present in input data} -\item{moffitt_catchment}{Limit counties to those in the catchment area of the -\href{https://moffitt.org}{Moffitt Cancer Center}.} +\item{county_name}{Character vector of values of \code{county_name} to be included +in count, or \code{TRUE} to include all values of \code{county_name} present in the +input data, or \code{"moffitt"} to limit to the counties in the +catchment area of the \href{https://moffitt.org}{Moffitt Cancer Center}.} \item{default_groups}{Variables that should be included in the grouping, prior to counting cancer cases. Set to \code{NULL} to use only the groups @@ -34,6 +37,9 @@ already present in the input data.} in the results. If \code{FALSE}, then no changes are made to the factor levels. If a character string of column names, then only unobserved levels in those columns are dropped.} + +\item{moffitt_catchment}{\strong{Deprecated.} Please use \code{county_name = "moffitt"} instead to limit counties to those in the catchment +area of the \href{https://moffitt.org}{Moffitt Cancer Center}.} } \value{ A grouped data frame with counts. The output groups includes the @@ -59,6 +65,15 @@ directly. fcds_example \%>\% dplyr::filter(county_name == "Pinellas") \%>\% - count_fcds(cancer_site_group, sex = "Male") + count_fcds(cancer_site_group, sex = "Male", county_name = TRUE) \%>\% + head() + +fcds_example \%>\% + filter_age_groups(age_gt = 20, age_lt = 25) \%>\% + count_fcds(sex = TRUE, county_name = c("Pinellas", "Hillsborough")) + +fcds_example \%>\% + count_fcds(county_name = "moffitt") \%>\% + head() } diff --git a/tests/testthat/test-fcds.R b/tests/testthat/test-fcds.R index 1d41f7e..83fdadb 100644 --- a/tests/testthat/test-fcds.R +++ b/tests/testthat/test-fcds.R @@ -98,8 +98,40 @@ describe("count_fcds()", { it("subsets to Moffitt counties", { r_count_fcds_moffitt <- fcds::fcds_example %>% - count_fcds(moffitt_catchment = TRUE) - expect_known_hash(r_count_fcds_moffitt %>% dplyr::ungroup(), "a4ff52c455") + count_fcds(county_name = "moffitt_catchment") %>% + dplyr::ungroup() + + r_count_fcds_moffitt2 <- fcds::fcds_example %>% + count_fcds(county_name = "Moffitt Cancer Center") %>% + dplyr::ungroup() + + expect_known_hash(r_count_fcds_moffitt, "a4ff52c455") + expect_known_hash(r_count_fcds_moffitt2, "a4ff52c455") + }) + + it("moffitt_catchment is deprecated", { + expect_warning( + fcds::fcds_example %>% count_fcds(moffitt_catchment = TRUE), + "deprecated" + ) + + expect_identical( + suppressWarnings(fcds::fcds_example %>% count_fcds(moffitt_catchment = TRUE)), + fcds::fcds_example %>% count_fcds(county_name = "moffitt_catchment") + ) + }) + + it("county_name = TRUE includes county_name in group vars", { + fcds_county <- fcds::fcds_example %>% count_fcds(county_name = TRUE) + expect_identical( + dplyr::group_vars(fcds_county), + c("county_name", "year_group", "year", "age_group") + ) + + expect_identical( + fcds_county %>% .$county_name %>% paste() %>% unique() %>% sort(), + fcds::fcds_example %>% .$county_name %>% paste() %>% unique() %>% sort() + ) }) it("errors when invalid FCDS constants are provided", { @@ -138,7 +170,7 @@ describe("count_fcds()", { it("removes un-observed factor levels in output groups", { r_cfl <- fcds::fcds_example %>% filter(year > 2000) %>% - count_fcds(moffitt_catchment = TRUE, sex = "Male") + count_fcds(county_name = "Moffitt", sex = "Male") expect_true( length(setdiff(levels(r_cfl$county_name), fcds_const("moffitt_catchment"))) == 0