@@ -624,6 +624,11 @@ TADA_AutoFilter <- function(.data) {
624
624
# ' @param flaggedonly Boolean argument; the default is flaggedonly = FALSE. When
625
625
# ' flaggedonly = TRUE, the function will filter the dataframe to show only the
626
626
# ' rows of data flagged as Suspect.
627
+ # '
628
+ # ' @param define Boolean argument; the default is define = TRUE. When define = TRUE,
629
+ # ' the function will add an additional column (TADA.MeasureQualifierCode.Def) providing
630
+ # ' all available definitions for the MethodQualifierCodes for each result. When
631
+ # ' define = FALSE, no additional column is added.
627
632
# '
628
633
# ' @return This function adds the column "TADA.MeasureQualifierCode.Flag" to the dataframe
629
634
# ' which flags suspect samples based on the "MeasureQualifierCode" column. When
@@ -646,7 +651,7 @@ TADA_AutoFilter <- function(.data) {
646
651
# '
647
652
# ' # Remove all suspect samples:
648
653
# ' MeasureQualifierCode_clean <- TADA_FlagMeasureQualifierCode(Data_6Tribes_5y, clean = TRUE)
649
- TADA_FlagMeasureQualifierCode <- function (.data , clean = FALSE , flaggedonly = FALSE ) {
654
+ TADA_FlagMeasureQualifierCode <- function (.data , clean = FALSE , flaggedonly = FALSE , define = TRUE ) {
650
655
# check .data is data.frame
651
656
TADA_CheckType(.data , " data.frame" , " Input object" )
652
657
# check that clean is boolean
@@ -665,26 +670,71 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
665
670
# load in ResultMeasureQualifier Flag Table
666
671
qc.ref <- utils :: read.csv(system.file(" extdata" , " WQXMeasureQualifierCodeRef.csv" , package = " TADA" )) %> %
667
672
dplyr :: rename(MeasureQualifierCode = Code ) %> %
668
- dplyr :: select(MeasureQualifierCode , TADA.MeasureQualifierCode.Flag )
669
-
673
+ dplyr :: select(MeasureQualifierCode , TADA.MeasureQualifierCode.Flag , Description )
674
+
675
+ # add TADA.MeasureQualifierCode, qualifier code definitions
676
+ # Create TADA.MeasureQualifierCode by concatenating MeasureQualifierCode with description from MeasureQualifierCodeRef.
677
+ if (define == FALSE ) {
678
+ .data <- .data
679
+ }
680
+
681
+ if (define == TRUE ) {
682
+ mqc.ref <- qc.ref %> %
683
+ dplyr :: select(MeasureQualifierCode , Description ) %> %
684
+ dplyr :: group_by(MeasureQualifierCode ) %> %
685
+ dplyr :: mutate(Concat = paste(MeasureQualifierCode , " -" , Description , collapse = " " )) %> %
686
+ dplyr :: select(MeasureQualifierCode , Concat )
687
+
688
+ mqc.TADA <- .data %> %
689
+ dplyr :: mutate(MeasureQualifierCode = stringr :: str_split(MeasureQualifierCode , " ;" )) %> %
690
+ tidyr :: unnest(MeasureQualifierCode ) %> %
691
+ merge(mqc.ref ) %> %
692
+ dplyr :: group_by(ResultIdentifier ) %> %
693
+ dplyr :: summarize(TADA.MeasureQualifierCode.Def = paste(Concat , collapse = " ; " ))
694
+
695
+ .data $ TADA.MeasureQualifierCode.Def <- mqc.TADA $ TADA.MeasureQualifierCode.Def [match(.data $ ResultIdentifier , mqc.TADA $ ResultIdentifier )]
696
+
697
+ rm(mqc.ref , mqc.TADA )
698
+ }
670
699
700
+ # populate flag column in data
701
+ flag.lists <- split(qc.ref $ MeasureQualifierCode , qc.ref $ TADA.MeasureQualifierCode.Flag ) %> %
702
+ stats :: setNames(stringr :: str_remove_all(stringr :: str_remove_all(tolower(names(. )), " -" ), " " ))
703
+
704
+
705
+ flag.data <- .data %> %
706
+ dplyr :: mutate(MeasureQualifierCode.Split = strsplit(MeasureQualifierCode , " ;" )) %> %
707
+ dplyr :: mutate(TADA.MeasureQualifierCode.Flag = ifelse(
708
+ purrr :: map_lgl(MeasureQualifierCode.Split , ~ any(.x %in% flag.lists $ suspect )), " Suspect" ,
709
+ ifelse(purrr :: map_lgl(MeasureQualifierCode.Split , ~ any(.x %in% flag.lists $ nondetect )), " Non-Detect" ,
710
+ ifelse(purrr :: map_lgl(MeasureQualifierCode.Split , ~ any(.x %in% flag.lists $ overdetect )), " Over-Detect" ,
711
+ ifelse(purrr :: map_lgl(MeasureQualifierCode.Split , ~ any(.x %in% flag.lists $ pass )), " Pass" ,
712
+ ifelse(purrr :: map_lgl(MeasureQualifierCode.Split , ~ any(.x %in% flag.lists $ notreviewed )), " Not Reviewed" , NA )
713
+ )
714
+ )
715
+ )
716
+ )) %> %
717
+ dplyr :: select(- MeasureQualifierCode.Split )
718
+
719
+ flag.data <- flag.data %> % dplyr :: distinct()
720
+
671
721
# identify any ResultMeasureQualifier Codes not in reference table
672
- codes <- unique(.data $ MeasureQualifierCode )
722
+ codes <- stringr :: str_split(unique(.data $ MeasureQualifierCode ), " ;" ) %> %
723
+ unlist() %> %
724
+ unique()
725
+
673
726
if (any(! codes %in% qc.ref $ MeasureQualifierCode )) {
674
727
missing_codes <- codes [! codes %in% qc.ref $ MeasureQualifierCode ]
675
728
missing_codes_df <- data.frame (
676
729
MeasureQualifierCode = missing_codes ,
677
- TADA.MeasureQualifierCode.Flag = " uncategorized"
730
+ TADA.MeasureQualifierCode.Flag = " Not Reviewed" ,
731
+ Description = " "
678
732
)
679
733
qc.ref <- rbind(qc.ref , missing_codes_df )
680
734
missing_codes <- paste(missing_codes , collapse = " , " )
681
735
print(paste0(" MeasureQualifierCode column in dataset contains value(s) " , missing_codes , " which is/are not represented in the MeasureQualifierCode WQX domain table. These data records are placed under the TADA.MeasureQualifierCode.Flag: 'uncategorized'. Please contact TADA administrators to resolve." ))
682
736
}
683
737
684
- # populate flag column in data
685
- flag.data <- dplyr :: left_join(.data , qc.ref , by = " MeasureQualifierCode" )
686
- flag.data <- flag.data %> % dplyr :: distinct()
687
-
688
738
# rename ResultMeasureQualifier NA values to Pass in TADA.MeasureQualifierCode.Flag column, not needed?
689
739
# flag.data["TADA.MeasureQualifierCode.Flag"][is.na(flag.data["MeasureQualifierCode"])] <- "Pass"
690
740
@@ -721,6 +771,7 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
721
771
}
722
772
}
723
773
774
+
724
775
# return final dataframe
725
776
return (final.data )
726
777
}
0 commit comments