Skip to content

Commit b710544

Browse files
Merge pull request #369 from USEPA/358-measurequalifiercode-definitions
358 measurequalifiercode definitions
2 parents b2c0bb7 + 1c38e95 commit b710544

File tree

5 files changed

+78
-13
lines changed

5 files changed

+78
-13
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Imports:
5555
data.table,
5656
dplyr,
5757
tidyr,
58+
purrr,
5859
grDevices,
5960
magrittr,
6061
stringr,

R/Maintenance.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ FindSynonyms <- function() {
131131
#
132132
# testing2 <- TADA_FlagMeasureQualifierCode(testing)
133133
#
134-
# #expect_true(all(testing2$TADA.MeasureQualifierCode.Flag != "uncategorized"))
134+
# #expect_true(all(testing2$TADA.MeasureQualifierCode.Flag != "Not Reviewed"))
135135
#
136136
# #print(unique(testing2$TADA_FlagMeasureQualifierCode))
137137
# #print(unique(testing2$MeasureQualifierCode))
@@ -144,7 +144,7 @@ FindSynonyms <- function() {
144144
# codes = unique(testing2$MeasureQualifierCode)
145145
# missing_codes = codes[!codes %in% qc.ref$MeasureQualifierCode]
146146
#
147-
# missing_codes_df <- data.frame(MeasureQualifierCode = missing_codes, TADA.MeasureQualifierCode.Flag = "uncategorized")
147+
# missing_codes_df <- data.frame(MeasureQualifierCode = missing_codes, TADA.MeasureQualifierCode.Flag = "Not Reviewed")
148148
#
149149
# View(missing_codes_df)
150150
#

R/ResultFlagsDependent.R

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,11 @@ TADA_AutoFilter <- function(.data) {
624624
#' @param flaggedonly Boolean argument; the default is flaggedonly = FALSE. When
625625
#' flaggedonly = TRUE, the function will filter the dataframe to show only the
626626
#' rows of data flagged as Suspect.
627+
#'
628+
#' @param define Boolean argument; the default is define = TRUE. When define = TRUE,
629+
#' the function will add an additional column (TADA.MeasureQualifierCode.Def) providing
630+
#' all available definitions for the MethodQualifierCodes for each result. When
631+
#' define = FALSE, no additional column is added.
627632
#'
628633
#' @return This function adds the column "TADA.MeasureQualifierCode.Flag" to the dataframe
629634
#' which flags suspect samples based on the "MeasureQualifierCode" column. When
@@ -646,7 +651,7 @@ TADA_AutoFilter <- function(.data) {
646651
#'
647652
#' # Remove all suspect samples:
648653
#' MeasureQualifierCode_clean <- TADA_FlagMeasureQualifierCode(Data_6Tribes_5y, clean = TRUE)
649-
TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FALSE) {
654+
TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FALSE, define = TRUE) {
650655
# check .data is data.frame
651656
TADA_CheckType(.data, "data.frame", "Input object")
652657
# check that clean is boolean
@@ -665,26 +670,71 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
665670
# load in ResultMeasureQualifier Flag Table
666671
qc.ref <- utils::read.csv(system.file("extdata", "WQXMeasureQualifierCodeRef.csv", package = "TADA")) %>%
667672
dplyr::rename(MeasureQualifierCode = Code) %>%
668-
dplyr::select(MeasureQualifierCode, TADA.MeasureQualifierCode.Flag)
669-
673+
dplyr::select(MeasureQualifierCode, TADA.MeasureQualifierCode.Flag, Description)
674+
675+
# add TADA.MeasureQualifierCode, qualifier code definitions
676+
# Create TADA.MeasureQualifierCode by concatenating MeasureQualifierCode with description from MeasureQualifierCodeRef.
677+
if (define == FALSE) {
678+
.data <- .data
679+
}
680+
681+
if (define == TRUE) {
682+
mqc.ref <- qc.ref %>%
683+
dplyr::select(MeasureQualifierCode, Description) %>%
684+
dplyr::group_by(MeasureQualifierCode) %>%
685+
dplyr::mutate(Concat = paste(MeasureQualifierCode, "-", Description, collapse = "")) %>%
686+
dplyr::select(MeasureQualifierCode, Concat)
687+
688+
mqc.TADA <- .data %>%
689+
dplyr::mutate(MeasureQualifierCode = stringr::str_split(MeasureQualifierCode, ";")) %>%
690+
tidyr::unnest(MeasureQualifierCode) %>%
691+
merge(mqc.ref) %>%
692+
dplyr::group_by(ResultIdentifier) %>%
693+
dplyr::summarize(TADA.MeasureQualifierCode.Def = paste(Concat, collapse = "; "))
694+
695+
.data$TADA.MeasureQualifierCode.Def <- mqc.TADA$TADA.MeasureQualifierCode.Def[match(.data$ResultIdentifier, mqc.TADA$ResultIdentifier)]
696+
697+
rm(mqc.ref, mqc.TADA)
698+
}
670699

700+
# populate flag column in data
701+
flag.lists <- split(qc.ref$MeasureQualifierCode, qc.ref$TADA.MeasureQualifierCode.Flag) %>%
702+
stats::setNames(stringr::str_remove_all(stringr::str_remove_all(tolower(names(.)), "-"), " "))
703+
704+
705+
flag.data <- .data %>%
706+
dplyr::mutate(MeasureQualifierCode.Split = strsplit(MeasureQualifierCode, ";")) %>%
707+
dplyr::mutate(TADA.MeasureQualifierCode.Flag = ifelse(
708+
purrr::map_lgl(MeasureQualifierCode.Split, ~ any(.x %in% flag.lists$suspect)), "Suspect",
709+
ifelse(purrr::map_lgl(MeasureQualifierCode.Split, ~ any(.x %in% flag.lists$nondetect)), "Non-Detect",
710+
ifelse(purrr::map_lgl(MeasureQualifierCode.Split, ~ any(.x %in% flag.lists$overdetect)), "Over-Detect",
711+
ifelse(purrr::map_lgl(MeasureQualifierCode.Split, ~ any(.x %in% flag.lists$pass)), "Pass",
712+
ifelse(purrr::map_lgl(MeasureQualifierCode.Split, ~ any(.x %in% flag.lists$notreviewed)), "Not Reviewed", NA)
713+
)
714+
)
715+
)
716+
)) %>%
717+
dplyr::select(-MeasureQualifierCode.Split)
718+
719+
flag.data <- flag.data %>% dplyr::distinct()
720+
671721
# identify any ResultMeasureQualifier Codes not in reference table
672-
codes <- unique(.data$MeasureQualifierCode)
722+
codes <- stringr::str_split(unique(.data$MeasureQualifierCode), ";") %>%
723+
unlist() %>%
724+
unique()
725+
673726
if (any(!codes %in% qc.ref$MeasureQualifierCode)) {
674727
missing_codes <- codes[!codes %in% qc.ref$MeasureQualifierCode]
675728
missing_codes_df <- data.frame(
676729
MeasureQualifierCode = missing_codes,
677-
TADA.MeasureQualifierCode.Flag = "uncategorized"
730+
TADA.MeasureQualifierCode.Flag = "Not Reviewed",
731+
Description = ""
678732
)
679733
qc.ref <- rbind(qc.ref, missing_codes_df)
680734
missing_codes <- paste(missing_codes, collapse = ", ")
681735
print(paste0("MeasureQualifierCode column in dataset contains value(s) ", missing_codes, " which is/are not represented in the MeasureQualifierCode WQX domain table. These data records are placed under the TADA.MeasureQualifierCode.Flag: 'uncategorized'. Please contact TADA administrators to resolve."))
682736
}
683737

684-
# populate flag column in data
685-
flag.data <- dplyr::left_join(.data, qc.ref, by = "MeasureQualifierCode")
686-
flag.data <- flag.data %>% dplyr::distinct()
687-
688738
# rename ResultMeasureQualifier NA values to Pass in TADA.MeasureQualifierCode.Flag column, not needed?
689739
# flag.data["TADA.MeasureQualifierCode.Flag"][is.na(flag.data["MeasureQualifierCode"])] <- "Pass"
690740

@@ -721,6 +771,7 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
721771
}
722772
}
723773

774+
724775
# return final dataframe
725776
return(final.data)
726777
}

R/Utilities.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ utils::globalVariables(c(
5050
"SummationName", "SummationRank", "SummationFractionNotes", "SummationSpeciationNotes",
5151
"SummationSpeciationConversionFactor", "SummationNote", "NutrientGroup",
5252
"Target.Speciation", "TADA.NearbySiteGroups", "numres", "TADA.SingleOrgDupGroupID",
53-
"TADA.MeasureQualifierCode.Flag", "MeasureQualifierCode", "value", "Flag_Column",
53+
"TADA.MeasureQualifierCode.Flag", "TADA.MeasureQualifierCode.Def", "MeasureQualifierCode", "value", "Flag_Column",
5454
"Data_NCTCShepherdstown_HUC12", "ActivityStartDateTime", "TADA.MultipleOrgDupGroupID",
5555
"TADA.WQXVal.Flag"
5656
))
@@ -591,6 +591,7 @@ TADA_OrderCols <- function(.data) {
591591
"TADA.ResultMeasureValueDataTypes.Flag",
592592
"TADA.ResultValueAggregation.Flag",
593593
"TADA.MeasureQualifierCode.Flag",
594+
"TADA.MeasureQualifierCode.Def",
594595
"TADA.CensoredData.Flag",
595596
"TADA.CensoredMethod",
596597
"TADA.NutrientSummation.Flag",
@@ -1157,6 +1158,8 @@ TADA_CheckRequiredFields <- function(.data) {
11571158
"TADA.ResultMeasureValueDataTypes.Flag",
11581159
"TADA.LatitudeMeasure",
11591160
"TADA.LongitudeMeasure",
1161+
"TADA.MeasureQualifierCode.Def",
1162+
"TADA.MeasureQualifierCode.Flag",
11601163
"OrganizationFormalName",
11611164
"ActivityTypeCode",
11621165
"ActivityMediaName",

man/TADA_FlagMeasureQualifierCode.Rd

Lines changed: 11 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)