Skip to content

Commit

Permalink
Merge pull request #368 from USEPA/12-7-23-cm
Browse files Browse the repository at this point in the history
12 7 23 cm
  • Loading branch information
cristinamullin authored Dec 15, 2023
2 parents 6089220 + 785c5e4 commit b2c0bb7
Show file tree
Hide file tree
Showing 37 changed files with 71,336 additions and 71,055 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export(TADA_GetMeasureUnitRef)
export(TADA_GetNutrientSummationRef)
export(TADA_GetSynonymRef)
export(TADA_GetTemplate)
export(TADA_GetUSGSSynonymRef)
export(TADA_GetWQXCharValRef)
export(TADA_HarmonizeSynonyms)
export(TADA_Histogram)
Expand Down
13 changes: 8 additions & 5 deletions R/CensoredDataSuite.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,21 @@ TADA_IDCensoredData <- function(.data) {

if (dim(cens)[1] > 0) {
## Bring in det cond reference table
cond.ref <- TADA_GetDetCondRef() %>%
cond.ref <- utils::read.csv(system.file("extdata", "WQXResultDetectionConditionRef.csv", package = "TADA")) %>%
dplyr::rename(ResultDetectionConditionText = Name) %>%
dplyr::select(ResultDetectionConditionText, TADA.Detection_Type)

## Join to censored data
cens <- dplyr::left_join(cens, cond.ref, by = "ResultDetectionConditionText")

## Flag censored data that does not havedet cond populated
## Flag censored data that does not have det cond populated
cens$TADA.Detection_Type <- ifelse(is.na(cens$ResultDetectionConditionText), "ResultDetectionConditionText missing", cens$TADA.Detection_Type)

## Fill in detection type when result measure value = "ND"
cens$TADA.Detection_Type <- ifelse(cens$ResultMeasureValue %in% c("ND"), "Non-Detect", cens$TADA.Detection_Type)
## Fill in detection type when text result measure value indicates it is a nondetect
cens$TADA.Detection_Type <- ifelse(cens$ResultMeasureValue %in%
c("ND", "BPQL", "BDL"),
"Non-Detect",
cens$TADA.Detection_Type)

## Let user know when detection condition text is missing from one or more results
# NOTE that at this point, TADA.Detection_Type may be NA if there are detection conditions in dataset that are not present in domain table
Expand All @@ -75,7 +78,7 @@ TADA_IDCensoredData <- function(.data) {
}

## Bring in det limit type reference table
limtype.ref <- TADA_GetDetLimitRef() %>%
limtype.ref <- utils::read.csv(system.file("extdata", "WQXDetectionQuantitationLimitTypeRef.csv", package = "TADA")) %>%
dplyr::rename(DetectionQuantitationLimitTypeName = Name) %>%
dplyr::select(DetectionQuantitationLimitTypeName, TADA.Limit_Type)

Expand Down
6 changes: 4 additions & 2 deletions R/DataDiscoveryRetrieval.R
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,6 @@ TADA_BigDataRetrieval <- function(startDate = "null",
}

if (!"null" %in% statecode) {
# state_cd_cont = utils::read.csv(file = "inst/extdata/statecode.csv",colClasses=c("STATE"="character"))
load(system.file("extdata", "statecodes_df.Rdata", package = "TADA"))
statecode <- as.character(statecode)
statecodes_sub <- statecodes_df %>% dplyr::filter(STUSAB %in% statecode)
Expand Down Expand Up @@ -744,6 +743,7 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
if (length(Projects.df) > 1) {
if (nrow(Projects.df) > 0) {
join3 <- join2 %>%

dplyr::left_join(
dplyr::select(
Projects.df, OrganizationIdentifier, OrganizationFormalName,
Expand All @@ -755,7 +755,9 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
"OrganizationIdentifier", "OrganizationFormalName",
"ProjectIdentifier", "ProjectName"
),
multiple = "all"
multiple = "all",
# need to specify that this is expected to be a 1-to-many relationship
relationship = "many-to-many"
)
} else {
join3 <- join2
Expand Down
21 changes: 20 additions & 1 deletion R/Maintenance.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#' Update TADA Reference Files
#' @return Saves updated reference files
#' This is only needed for ref tables in WQXRefTables.R
#'
TADA_UpdateAllRefs <- function() {
TADA_UpdateWQXCharValRef()
Expand Down Expand Up @@ -87,6 +88,24 @@ TADA_UpdateExampleData <- function() {
rm(Data_NCTCShepherdstown_HUC12)
}

## Find char-frac-spec-unit combos not present in TADA HarmonizationTemplate.
## Add new combinations when found to the HarmonizationTemplate.csv and
## NPsummation_key.csv (if relevant to TN or TP summation).

FindSynonyms <- function() {
test <- TADA_RandomNationalTestingSet()
test1 <- TADA_RunKeyFlagFunctions(test)
ref <- TADA_GetSynonymRef()
ref_chars <- unique(ref$TADA.CharacteristicName)
test_chars <- unique(subset(test1, test1$TADA.CharacteristicName%in%ref_chars)[,c("TADA.CharacteristicName","TADA.ResultSampleFractionText","TADA.MethodSpecificationName","TADA.ResultMeasure.MeasureUnitCode")])
test_chars_ref <- merge(test_chars, ref, all.x = TRUE)
new_combos <- subset(test_chars_ref, is.na(test_chars_ref$HarmonizationGroup))[,c("TADA.CharacteristicName","TADA.ResultSampleFractionText","TADA.MethodSpecificationName","TADA.ResultMeasure.MeasureUnitCode")]
if(dim(new_combos)[1]>0){
print("New combinations found in random dataset test.")
}
return(new_combos)
}


# TADA_OvernightTesting
#
Expand All @@ -108,7 +127,7 @@ TADA_UpdateExampleData <- function() {
#
# for (i in 1:num_iterations) {
#
# testing <- TADA_RandomTestingSet()
# testing <- TADA_RandomNationalTestingSet()
#
# testing2 <- TADA_FlagMeasureQualifierCode(testing)
#
Expand Down
10 changes: 5 additions & 5 deletions R/ResultFlagsDependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
.data <- dplyr::select(.data, -TADA.SampleFraction.Flag)
}
# read in sample fraction reference table from extdata and filter
frac.ref <- TADA_GetWQXCharValRef() %>%
frac.ref <- utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "TADA")) %>%
dplyr::filter(Type == "CharacteristicFraction")

# join "Status" column to .data by CharacteristicName and Value (SampleFraction)
Expand Down Expand Up @@ -204,7 +204,7 @@ TADA_FlagSpeciation <- function(.data, clean = c("invalid_only", "nonstandardize
}

# read in speciation reference table from extdata and filter
spec.ref <- TADA_GetWQXCharValRef() %>%
spec.ref <- utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "TADA")) %>%
dplyr::filter(Type == "CharacteristicSpeciation")

# join "Status" column to .data by CharacteristicName and Value (Speciation)
Expand Down Expand Up @@ -362,7 +362,7 @@ TADA_FlagResultUnit <- function(.data, clean = c("invalid_only", "nonstandardize
}

# read in unit reference table from extdata and filter
unit.ref <- TADA_GetWQXCharValRef() %>%
unit.ref <- utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "TADA")) %>%
dplyr::filter(Type == "CharacteristicUnit")

# join "Status" column to .data by CharacteristicName, Source (Media), and Value (unit)
Expand Down Expand Up @@ -501,7 +501,7 @@ TADA_FindQCActivities <- function(.data, clean = FALSE, flaggedonly = FALSE) {
}

# load in ActivityTypeRef Table
qc.ref <- TADA_GetActivityTypeRef() %>%
qc.ref <- utils::read.csv(system.file("extdata", "WQXActivityTypeRef.csv", package = "TADA")) %>%
dplyr::rename(ActivityTypeCode = Code) %>%
dplyr::select(ActivityTypeCode, TADA.ActivityType.Flag)

Expand Down Expand Up @@ -663,7 +663,7 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
}

# load in ResultMeasureQualifier Flag Table
qc.ref <- TADA_GetMeasureQualifierCodeRef() %>%
qc.ref <- utils::read.csv(system.file("extdata", "WQXMeasureQualifierCodeRef.csv", package = "TADA")) %>%
dplyr::rename(MeasureQualifierCode = Code) %>%
dplyr::select(MeasureQualifierCode, TADA.MeasureQualifierCode.Flag)

Expand Down
23 changes: 10 additions & 13 deletions R/ResultFlagsIndependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ TADA_FlagMethod <- function(.data, clean = TRUE, flaggedonly = FALSE) {
.data <- dplyr::select(.data, -TADA.AnalyticalMethod.Flag)
}
# read in WQX val reference table and filter
meth.ref <- TADA_GetWQXCharValRef() %>%
meth.ref <- utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "TADA")) %>%
dplyr::filter(Type == "CharacteristicMethod")

# join "TADA.WQXVal.Flag" column to .data by CharacteristicName, Source (Media), and Value (unit)
Expand Down Expand Up @@ -350,21 +350,20 @@ TADA_FlagAboveThreshold <- function(.data, clean = TRUE, flaggedonly = FALSE) {
.data <- dplyr::select(.data, -TADA.ResultValueAboveUpperThreshold.Flag)
}

# filter WQXcharVal.ref to include only valid CharacteristicUnit in water media
unit.ref <- TADA_GetWQXCharValRef() %>%
dplyr::filter(Type == "CharacteristicUnit" & Source == "WATER" &
Status == "Valid")
# filter WQXcharVal.ref to include only valid CharacteristicUnit
unit.ref <- utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "TADA")) %>%
dplyr::filter(Type == "CharacteristicUnit" & Status == "Accepted")

# join unit.ref to raw.data
check.data <- merge(.data, unit.ref[, c(
"Characteristic", "Source",
"Value", "Maximum"
"Value.Unit", "Maximum"
)],
by.x = c(
"TADA.CharacteristicName", "TADA.ActivityMediaName",
"TADA.ResultMeasure.MeasureUnitCode"
),
by.y = c("Characteristic", "Source", "Value"), all.x = TRUE
by.y = c("Characteristic", "Source", "Value.Unit"), all.x = TRUE
)

# Create flag column, flag rows where ResultMeasureValue > Maximum
Expand Down Expand Up @@ -392,7 +391,6 @@ TADA_FlagAboveThreshold <- function(.data, clean = TRUE, flaggedonly = FALSE) {
if (flaggedonly == TRUE) {
print("This dataframe is empty because no data above the WQX Upper Threshold was found in your dataframe")
emptyflag.data <- dplyr::filter(flag.data, TADA.ResultValueAboveUpperThreshold.Flag %in% "Y")
# emptyflag.data <- dplyr::select(emptyflag.data, -TADA.ResultValueAboveUpperThreshold.Flag)
emptyflag.data <- TADA_OrderCols(emptyflag.data)
return(emptyflag.data)
}
Expand Down Expand Up @@ -507,20 +505,19 @@ TADA_FlagBelowThreshold <- function(.data, clean = TRUE, flaggedonly = FALSE) {
}

# filter WQXcharVal.ref to include only valid CharacteristicUnit in water media
unit.ref <- TADA_GetWQXCharValRef() %>%
dplyr::filter(Type == "CharacteristicUnit" & Source == "WATER" &
Status == "Valid")
unit.ref <- utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "TADA")) %>%
dplyr::filter(Type == "CharacteristicUnit" & Status == "Accepted")

# join unit.ref to raw.data
check.data <- merge(.data, unit.ref[, c(
"Characteristic", "Source",
"Value", "Minimum"
"Value.Unit", "Minimum"
)],
by.x = c(
"TADA.CharacteristicName", "TADA.ActivityMediaName",
"TADA.ResultMeasure.MeasureUnitCode"
),
by.y = c("Characteristic", "Source", "Value"), all.x = TRUE
by.y = c("Characteristic", "Source", "Value.Unit"), all.x = TRUE
)

# Create flag column, flag rows where TADA.ResultMeasureValue < Minimum
Expand Down
135 changes: 135 additions & 0 deletions R/TADARefTables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#' Nutrient Summation Reference Key
#'
#' Function downloads and returns the newest available nutrient summation
#' reference dataframe. This dataframe is used in TADA_CalculateTotalNitrogen as
#' the basis for the combinations added together to get total nitrogen. Users
#' may customize this reference table for their own dataset and use the custom
#' dataframe as an input in TADA_CalculateTotalNitrogen.
#'
#' @return Dataframe of nutrient summation combinations
#'
#' @export

TADA_GetNutrientSummationRef <- function() {
ref <- utils::read.csv(system.file("extdata", "NPsummation_key.csv", package = "TADA"))
return(ref)
}


#' Generate Unique Synonym Reference Table
#'
#' Function generates a synonym reference table containing all unique
#' combinations of TADA.CharacteristicName, TADA.ResultSampleFractionText,
#' TADA.MethodSpecificationName, and TADA.ResultMeasure.MeasureUnitCode. The
#' function also joins in some TADA-specific suggested synonyms for nutrients
#' and priority parameters. These target synonyms (denoted in the reference
#' table with the prefix "Target.") are intended to help the user aggregate
#' synonymous data that may be uploaded with slightly different metadata
#' conventions and prepare nutrient data for total N and P summations. Users can
#' review how their input data relates to target synonyms for
#' TADA.CharacteristicName, TADA.ResultSampleFractionText,
#' TADA.MethodSpecificationName, and TADA.ResultMeasure.MeasureUnitCode. Once
#' the synonym table is created, users may optionally edit the target columns in
#' the reference table to meet their needs. Additionally, the function assumes
#' the user has already removed any data containing invalid
#' characteristic-unit-fraction-speciation combinations (i.e. user has already
#' run TADA_FlagFraction, TADA_FlagSpeciation, TADA_FlagResultUnit, etc.).
#'
#' @param .data TADA dataframe. If a data frame is not provided, the function will return the default internal reference table.
#'
#' @return Synonym Reference Table unique to the input dataframe
#'
#' @export
#'
#' @examples
#' # Load example dataset:
#' data(Data_6Tribes_5y)
#'
#' # Create a synonym reference table for flagged, cleaned dataframe:
#' Data_6Tribes_5yClean <- subset(Data_6Tribes_5y, !is.na(Data_6Tribes_5y$TADA.ResultMeasureValue))
#' Data_6Tribes_5yClean <- TADA_FlagFraction(Data_6Tribes_5yClean, clean = TRUE)
#' Data_6Tribes_5yClean <- TADA_FlagResultUnit(Data_6Tribes_5yClean, clean = "invalid_only")
#' Data_6Tribes_5yClean <- TADA_FlagSpeciation(Data_6Tribes_5yClean, clean = "invalid_only")
#' Data_6Tribes_5yClean <- TADA_FlagMethod(Data_6Tribes_5yClean, clean = TRUE)
#' CreateRefTable <- TADA_GetSynonymRef(Data_6Tribes_5yClean)
#'
#' # Get internal synonym reference table
#' reference <- TADA_GetSynonymRef()
TADA_GetSynonymRef <- function(.data) {
if (missing(.data)) {
ref <- utils::read.csv(system.file("extdata", "HarmonizationTemplate.csv", package = "TADA"))
return(ref)
}

# check .data is data.frame
TADA_CheckType(.data, "data.frame", "Input object")

# check .data has the required columns
expected_cols <- c(
"TADA.CharacteristicName",
"TADA.ResultSampleFractionText",
"TADA.MethodSpecificationName",
"TADA.ResultMeasure.MeasureUnitCode"
)
TADA_CheckColumns(.data, expected_cols)

if (!any(c("TADA.MethodSpeciation.Flag", "TADA.SampleFraction.Flag", "TADA.ResultUnit.Flag") %in% names(.data))) {
print("Warning: This dataframe is missing TADA QC flagging columns, indicating that you have not yet run the TADA_FlagResultUnit, TADA_FlagFraction, or TADA_FlagSpeciation functions. It is highly recommended you run these flagging functions and remove Invalid combinations before proceeding to this step.")
}

# check to see if any invalid data flags exist
check_inv <- .data[, names(.data) %in% c("TADA.MethodSpeciation.Flag", "TADA.SampleFraction.Flag", "TADA.ResultUnit.Flag")]
check_inv <- check_inv %>%
tidyr::pivot_longer(cols = names(check_inv), names_to = "Flag_Column") %>%
dplyr::filter(value == "Invalid")

if (dim(check_inv)[1] > 0) {
check_inv <- check_inv %>%
dplyr::group_by(Flag_Column) %>%
dplyr::summarise("Result Count" = length(value))
print("Warning: Your dataframe contains invalid metadata combinations in the following flag columns:")
print(as.data.frame(check_inv))
}

# execute function after checks are passed
# define raw harmonization table as an object
harm.raw <- utils::read.csv(system.file("extdata", "HarmonizationTemplate.csv", package = "TADA"))

join.data <- merge(unique(.data[, expected_cols]),
harm.raw,
by = expected_cols,
all.x = TRUE
)

# trim join.data to include only unique combos of char-frac-spec-unit
unique.data <- join.data %>% dplyr::distinct()

unique.data <- unique.data[, names(harm.raw)]

# return unique.data
return(unique.data)
}


#' Nutrient Summation Reference Key
#'
#' This internal reference file includes USGS only units/speciations. It was
#' created in July 2023 using the pcodes domain table from NWIS
#' (https://help.waterdata.usgs.gov/codes-and-parameters/parameters). All USGS units
#' and speciations are given a target unit and speciation that is synonymous, but
#' adheres to the WQX schema (WQX measure unit domain table).
#'
#' This reference file is used in the TADA_ConvertResultUnits() function where
#' synonymous units and speciations are harmonized before units are then also
#' harmonized/converted to WQX targets.
#'
#'
#' @return Dataframe of USGS only units and speciations and their WQX compatible
#' targets/synonyms.
#'
#' @export

TADA_GetUSGSSynonymRef <- function() {
ref <- utils::read.csv(system.file("extdata", "USGS_units_speciation.csv", package = "TADA"))
return(ref)
}
Loading

0 comments on commit b2c0bb7

Please sign in to comment.