From 6ad665b32f18e598551badb3b453f894b64088ff Mon Sep 17 00:00:00 2001 From: Kenny Wong Date: Thu, 30 Jan 2025 13:18:05 -0500 Subject: [PATCH] styler and document updates Updated minor excel output bug, and updated documentation for greater clarity in Mod 3 functions. --- R/ATTAINSRefTables.R | 16 ++-- R/CriteriaInputs.R | 161 +++++++++++++++++----------------- R/CrosswalksFromATTAINS.R | 148 +++++++++++++++++-------------- R/GeospatialFunctions.R | 14 +-- R/Maintenance.R | 77 +++++++++------- R/ResultFlagsDependent.R | 20 +++-- R/TADARefTables.R | 16 ++-- R/WQPWQXRefTables.R | 17 ++-- man/TADA_CreateParamRef.Rd | 15 ++-- man/TADA_CreateParamUseRef.Rd | 17 ++-- man/TADA_GetEPA304aRef.Rd | 5 +- vignettes/TADAModule3.Rmd | 25 +++--- 12 files changed, 283 insertions(+), 248 deletions(-) diff --git a/R/ATTAINSRefTables.R b/R/ATTAINSRefTables.R index 54f9006c..390b1abf 100644 --- a/R/ATTAINSRefTables.R +++ b/R/ATTAINSRefTables.R @@ -15,32 +15,32 @@ TADA_GetATTAINSOrgIDsRef <- function() { if (!is.null(ATTAINSOrgIDsRef_Cached)) { return(ATTAINSOrgIDsRef_Cached) } - + # Try to download up-to-date raw data - + raw.data <- tryCatch( { # get data from ATTAINS - rATTAINS::domain_values(domain_name = "OrgName") + rATTAINS::domain_values(domain_name = "OrgName") }, error = function(err) { NULL } ) - + # If the download failed fall back to internal data (and report it) if (is.null(raw.data)) { message("Downloading latest ATTAINS Organization Reference Table failed!") message("Falling back to (possibly outdated) internal file.") return(utils::read.csv(system.file("extdata", "ATTAINSOrgIDsRef.csv", package = "EPATADA"))) } - + ATTAINSOrgIDsRef <- raw.data %>% dplyr::distinct() - + # Save updated table in cache ATTAINSOrgIDsRef_Cached <- ATTAINSOrgIDsRef - + ATTAINSOrgIDsRef } @@ -53,4 +53,4 @@ TADA_UpdateATTAINSOrgIDsRef <- function() { # Used to store cached Measure Unit Reference Table -ATTAINSOrgIDsRef_Cached <- NULL \ No newline at end of file +ATTAINSOrgIDsRef_Cached <- NULL diff --git a/R/CriteriaInputs.R b/R/CriteriaInputs.R index 7cdf997d..653b403e 100644 --- a/R/CriteriaInputs.R +++ b/R/CriteriaInputs.R @@ -25,13 +25,9 @@ #' missing cell values that were either not addressed in the creation of the original crosswalk or #' which pertain to TADA.ComparableDataIdentifiers not included in the original crosswalk. #' -#' The user-supplied crosswalk table must contain the required columns. Users will have two options: -#' -#' 1) Supply a paramRef data frame which contains at least these four column names: -#' TADA.CharacteristicName, TADA.MethodSpeciationName, TADA.ResultSampleFractionText, -#' and ATTAINS.ParameterName. -#' 2) Supply a paramRef data frame which contains at least these two column names: -#' TADA.ComparableDataIdentifier and ATTAINS.ParameterName . +#' The user-supplied crosswalk table must contain the required columns. Users will need to +#' provide a paramRef data frame which contains at least these two column names: +#' TADA.ComparableDataIdentifier and ATTAINS.ParameterName. #' #' Users who are interested in doing an assessment or comparing criteria for more than organization #' also need to include an additional column name: 'organization_identifier'. This ensures that @@ -40,7 +36,9 @@ #' #' A draft crosswalk between TADA.CharacteristicName and EPA 304A pollutant names (sourced from the #' Criteria Search Tool) has been created by the EPATADA team. This crosswalk is still in -#' development and only focuses on the TADA priority characteristics (add link to list?). +#' development and only focuses on the TADA priority characteristics. +#' Please run the following below in the R environment to view current crosswalks: +#' 'utils::read.csv(system.file("extdata", "TADAPriorityCharUnitRef.csv", package = "EPATADA"))'. #' #' @param .data A TADA dataframe. The user should run all desired data cleaning, processing, #' harmonization, filtering, and addition of geospatial components (via TADA_GetATTAINS) functions @@ -326,7 +324,7 @@ TADA_CreateParamRef <- function(.data, org_id = NULL, paramRef = NULL, excel = T '="No parameter match for TADA.ComparableDataIdentifier"),"No parameter crosswalk provided for TADA.ComparableDataIdentifier. Parameter will not be used for assessment", IF(ISNA(MATCH(E', i + 1, ',Index!H:H,0)), "Parameter name is not included in ATTAINS, contact ATTAINS to add parameter name to Domain List", - IF(ISNA(MATCH(1,(E', i + 1, "=ATTAINSOrgNamesParamRef!E:E)*(C", i + 1, '=ATTAINSOrgNamesParamRef!B:B),0)), + IF(ISNA(MATCH(1,(E', i + 1, "=ATTAINSOrgNamesParamRef!E:E)*(C", i + 1, '=ATTAINSOrgNamesParamRef!A:A),0)), "Parameter name is listed as a prior cause in ATTAINS, but not for this organization", "Parameter name is listed as a prior cause in ATTAINS for this organization")))' ) @@ -362,14 +360,13 @@ TADA_CreateParamRef <- function(.data, org_id = NULL, paramRef = NULL, excel = T return(CreateParamRef) } -#' Parameter and Use Name crosswalk +#' ATTAINS Parameter Name and Use Name crosswalk #' #' Users will be required to validate the use name crosswalk for each combination of ATTAIN #' parameter name and associated use_name that applies to their org(s) with its associated #' TADA.ComparableDataIdentifier(s). This can be accomplished by determining which 'use_name'(s) #' from the drop-down menu in the excel spreadsheet generated by this function correspond to the #' TADA.ComparableDataIdentifier(s) found in the TADA dataframe. -#' `. #' #' Before running this function, users must run TADA_CreateParamRef() to create the #' crosswalk that defines the ATTAINS.ParameterName(s) and use_name(s) needing validation. @@ -385,11 +382,13 @@ TADA_CreateParamRef <- function(.data, org_id = NULL, paramRef = NULL, excel = T #' Otherwise, users can still proceed by overriding the data validation by value pasting. #' Users will be warned in the ATTAINS.FlagUseName column if they choose to include an #' ATTAINS use name that was not named in prior ATTAINS assessment cycles as: -#' 'Suspect: use name is not found as a prior use name for this organization' or -#' 'Suspect: use name is not found as a prior use name for this parameter' +#' 'Use name is not listed as a prior cause in ATTAINS for this organization' or +#' 'Use name is listed as a prior cause in this organization, but not for this parameter name' #' #' Users will have the flexibility to include the EPA304a standards by including this string in the org_id -#' function argument. Users who only want the EPA304a standards would input as an argument input: +#' function argument. +#' +#' Users who only want the EPA304a standards would input as an argument input: #' #' org_id = "EPA304a" #' @@ -397,11 +396,12 @@ TADA_CreateParamRef <- function(.data, org_id = NULL, paramRef = NULL, excel = T #' #' org_id = c("EPA304a", "UTAHDWQ") #' -#' The use_name for EPA304a standards are matched from the CriteriaSearchTool: -#' (CST) https://www.epa.gov/wqs-tech/state-specific-water-quality-standards-effective-under-clean-water-act-cwa +#' NOTE: The EPA304a standards are not a part of the ATTAINS domain value, these standards +#' have been crosswalk to a list of priority TADA.ComparableDataIdentifier by the internal EPATADA team. +#' The use_name for EPA304a standards are matched from the CriteriaSearchTool (CST): +#' www.epa.gov/wqs-tech/state-specific-water-quality-standards-effective-under-clean-water-act-cwa #' while the use_name for any other ATTAINS organization identifiers come from ATTAINS domain value for use_name. #' -#' #' @param .data A TADA dataframe. Users are expected to have already run the appropriate data #' cleaning, processing, harmonization and filtering functions prior to this step as well as #' provide the geospatial components with Module 2 TADA_GetATTAINS() function. @@ -443,9 +443,9 @@ TADA_CreateParamRef <- function(.data, org_id = NULL, paramRef = NULL, excel = T #' include in the paramRef data frame which contains an additional column name: 'organization_identifier' #' in order to determine the proper crosswalk between TADA.ComparableDataIdentifier and #' ATTAINS.ParameterName by organization identifier. -#' +#' #' @param paramUseRef A data frame which contains a completed crosswalk of org specific -#' domain value of use_name by ATTAINS.ParameterName. +#' domain value of use_name by ATTAINS.ParameterName. #' #' @return A data frame which contains the columns: TADA.ComparableDataIdentifier, organization_identifier, #' EPA304A.PollutantName, ATTAINS.ParameterName, and ATTAINS.FlagUseName. Users will need to review @@ -470,7 +470,7 @@ TADA_CreateParamRef <- function(.data, org_id = NULL, paramRef = NULL, excel = T #' # Users can include the EPA304a standards by itself or compared to their org(s) #' paramUseRef_UT2 <- TADA_CreateParamUseRef(Data_Nutrients_UT, paramRef = paramRef_UT3, org_id = c("EPA304a", "UTAHDWQ"), excel = FALSE) #' paramUseRef_UT3 <- TADA_CreateParamUseRef(Data_Nutrients_UT, paramRef = paramRef_UT3, org_id = c("EPA304a"), excel = FALSE) -#' +#' TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramUseRef = NULL, excel = FALSE, overwrite = FALSE) { # overwrite argument should only be used when creating an excel file. if (excel == FALSE && overwrite == TRUE) { @@ -528,20 +528,20 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU } } } - + # check to see if user-supplied parameter-use ref is a df with appropriate columns and is filled out. if (!is.null(paramRef) & !is.character(paramRef)) { if (!is.data.frame(paramRef)) { stop("TADA_CreateParamUseRef: 'paramUseRef' must be a data frame with these 6 columns: TADA.ComparableDataIdentifier, organization_identifier, EPA304A.PollutantName, ATTAINS.ParameterName, use_name, IncludeOrExclude") } - + if (is.data.frame(paramUseRef)) { col.names <- c( - "TADA.ComparableDataIdentifier", "organization_identifier", "EPA304A.PollutantName", "ATTAINS.ParameterName", "use_name", "IncludeOrExclude" + "TADA.ComparableDataIdentifier", "organization_identifier", "EPA304A.PollutantName", "ATTAINS.ParameterName", "use_name", "IncludeOrExclude" ) - + ref.names <- names(paramUseRef) - + if (length(setdiff(col.names, ref.names)) > 0 && !("TADA.ComparableDataIdentifier" %in% names(paramUseRef))) { stop("TADA_CreateParamUseRef: 'paramUseRef' must be a data frame with these 6 columns: TADA.ComparableDataIdentifier, organization_identifier, EPA304A.PollutantName, ATTAINS.ParameterName, use_name, IncludeOrExclude") } @@ -574,7 +574,7 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU "One or more organization identifiers entered by user is not found in ATTAINS. " )) } - + # Checks if org_id are found in the user supplied paramRef argument. if (sum(!org_id[org_id != "EPA304a"] %in% paramRef$organization_identifier) > 0) { warning(paste0( @@ -608,7 +608,7 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU dplyr::left_join(CST_param, c("EPA304A.PollutantName"), relationship = "many-to-many") %>% dplyr::select(TADA.ComparableDataIdentifier, organization_identifier = organization_identifier.y, ATTAINS.ParameterName, EPA304A.PollutantName, use_name = use_name.y) %>% dplyr::distinct() - + # remove intermediate object CST_param rm(CST_param) @@ -621,9 +621,9 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU # remove intermediate object EPA_param rm(EPA_param) } - - # If a user provide a paramUseRef argument, this will - if (!is.null(paramUseRef)){ + + # If a user provide a paramUseRef argument, this will + if (!is.null(paramUseRef)) { CreateParamUseRef <- paramUseRef %>% dplyr::left_join(CreateParamUseRef, c("TADA.ComparableDataIdentifier", "ATTAINS.ParameterName", "organization_identifier", "EPA304A.PollutantName", "use_name")) %>% dplyr::select(TADA.ComparableDataIdentifier, organization_identifier, EPA304A.PollutantName, ATTAINS.ParameterName, use_name) %>% @@ -747,9 +747,9 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU "Use name does not apply for this ATTAINS.ParameterName. Will exclude from assessment.", IF(ISBLANK(E', i + 1, '), "No use name is provided. Consider choosing an appropriate use_name that applies for assessment", - IF(ISNA(MATCH(1,(E', i + 1, "=Index!G:G)*(B", i + 1, '=Index!E:E),0)), + IF(ISNA(MATCH(1,(E', i + 1, "=Index!G:G)*(B", i + 1, '=Index!D:D),0)), "Use name is not listed as a prior cause in ATTAINS for this organization", - IF(ISNA(MATCH(1,(D', i + 1, "=Index!H:H)*(E", i + 1, "=Index!G:G)*(B", i + 1, '=Index!E:E),0)), + IF(ISNA(MATCH(1,(D', i + 1, "=Index!H:H)*(E", i + 1, "=Index!G:G)*(B", i + 1, '=Index!D:D),0)), "Use name is listed as a prior cause in this organization, but not for this parameter name", "Use name is listed as prior cause in ATTAINS for this org")))))') ) @@ -778,34 +778,34 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU return(CreateParamUseRef) } -# #' +# #' # #' Assessment Unit and MonitoringLocationName/MonitoringLocationType/MonitoringLocationId Crosswalk -# #' +# #' # #' This function will pull in all MonitoringLocationName/MonitoringLocationType/MonitoringLocationId # #' for AU(s) from a TADA dataframe with ATTAINS data. This function requires users to have already # #' ran TADA_GetATTAINS(). Users are able to specify which AU(S) to pull in from this ref file # #' when creating the final CriteriaRef file to be compatible with an organization's WQS assessments. -# #' +# #' # #' Users are expected to modify this AU ref file with the appropriate AU and # #' MonitoringLocationName/MonitoringLocationType/MonitoringLocationId crosswalk # #' for the current Assessment cycle. Users can decide to "Include or Exclude" a MonitoringLocation # #' within an AU if desired. This can be used if a MoniotringLocation would still like to be # #' crosswalk to the AU but may only be applicable for certain parameters. -# #' +# #' # #' @param .data A TADA dataframe with TADA_GetATTAINS() geospatial function ran. -# #' +# #' # #' @param AU Character argument. Users can specify which AU they are interested in # #' defining WQS criteria for. If this argument is left as NULL, then all unique AU # #' records will be displayed in this ref file for users to define. -# #' +# #' # #' @return A data frame with all the MonitoringLocationIdentifier Sites for a defined AU. -# #' +# #' # #' @export -# #' -# +# #' +# # TADA_CreateAURef <- function(.data, AU = NULL, excel = TRUE, overwrite = FALSE, returnSites = c("all","matched-only")){ -# +# # # data <- rATTAINS::assessments(organization_id = "MDE_EASP") # # # # use_assessments <- data$use_assessment @@ -827,28 +827,28 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU # if(!is.null(downloads_path)){ # downloads_path <- downloads_path # } -# +# # library(rATTAINS) -# +# # if(!is.data.frame(.data)){ # if (!any(c( # "TADA_with_ATTAINS", "ATTAINS_catchments", "ATTAINS_points", "ATTAINS_lines", "ATTAINS_polygons" # ) %in% names(.data))) { # stop("Your input dataframe was not produced from `TADA_GetATTAINS()` or it was modified. Please create your list of ATTAINS features using `TADA_GetATTAINS(return_sf = TRUE)`") # } -# +# # .data <- .data[["TADA_with_ATTAINS"]] # } -# -# +# +# # if(is.null(AU)){ # print("Creating AURef dataframe for all unique combinations of AU found in the TADA dataframe by MonitoringLocationName/MonitoringLocationType/MonitoringLocationId.") # } -# +# # if(!is.null(AU)){ # print(paste0("Filtering by AUs = ", AU, ". Creating a dataframe for unique combinations of MonitoringLocationName/MonitoringLocationType/MonitoringLocationId.")) # } -# +# # # Filters by AU if desired, otherwise creates a dataframe of all unique AU in the TADA dataframe pull # CreateAURef <- .data %>% # dplyr::filter(if (is.null(AU)) TRUE @@ -869,10 +869,10 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU # #ATTAINS.assessmentunitname, ATTAINS.assessmentunitidentifier, # MonitoringLocationIdentifier # ) -# +# # if(!"ATTAINS.assessmentunitidentifier" %in% colnames(CreateAURef)){ # print(paste0("No Monitoring Location to Assessment Unit crosswalk provided. Consider providing this crosswalk if you would like to summarize assessments on an Assessment Unit level.")) -# +# # CreateAURef <- CreateAURef %>% # dplyr::mutate(ATTAINS.assessmentunitname = NA) %>% # dplyr::mutate(ATTAINS.assessmentunitidentifier = NA) %>% @@ -883,12 +883,12 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU # "IncludeOrExclude", "ExcludeStationReason", "ApplyUniqueSpatialCriteria" # )) # ) -# +# # } -# +# # if (excel == TRUE) { # wb <- openxlsx::loadWorkbook(wb, downloads_path) -# +# # tryCatch({ # openxlsx::addWorksheet(wb, "CreateAURef") # }, @@ -897,7 +897,7 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU # openxlsx::addWorksheet(wb, "CreateAURef") # } # ) -# +# # # Format column header # header_st <- openxlsx::createStyle(textDecoration = "Bold") # # Format Column widths @@ -906,14 +906,14 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU # set_zoom <- function(x) gsub('(?<=zoomScale=")[0-9]+', x, sV, perl = TRUE) # sV <- wb$worksheets[[4]]$sheetViews # wb$worksheets[[4]]$sheetViews <- set_zoom(90) -# +# # # writes CreateAURef dataframe # openxlsx::writeData(wb, "CreateAURef", startCol = 1, x = CreateAURef, headerStyle = header_st) -# +# # # data validation drop down list created below. # suppressWarnings(openxlsx::dataValidation(wb, sheet = "CreateAURef", cols = 9, rows = 2:1000, type = "list", value = sprintf("'Index'!$B$2:$B$5"), allowBlank = TRUE, showErrorMsg = TRUE, showInputMsg = TRUE)) -# -# +# +# # # Conditional Formatting # openxlsx::conditionalFormatting(wb, "CreateAURef", # cols = 9, rows = 2:(nrow(CreateAURef) + 1), @@ -930,21 +930,21 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU # openxlsx::conditionalFormatting(wb, "CreateAURef", # cols = 10:12, rows = 2:(nrow(CreateAURef) + 1), # type = "notBlanks", style = openxlsx::createStyle(bgFill = TADA_ColorPalette()[8])) # using yellow to indicate modified cell -# +# # if(overwrite == TRUE){ # openxlsx::saveWorkbook(wb, downloads_path, overwrite = T) # } -# +# # if(overwrite == FALSE){ # warning("If you would like to replace the file, use overwrite = TRUE argument in TADA_CreateParamRef") # openxlsx::saveWorkbook(wb, downloads_path, overwrite = F) # } -# +# # cat("File saved to:", gsub("/","\\\\",downloads_path), "\n") -# +# # CreateAURef <- openxlsx::read.xlsx(downloads_path, sheet = "CreateAURef") # } -# +# # return(CreateAURef) # } @@ -960,22 +960,25 @@ TADA_CreateParamUseRef <- function(.data, org_id = NULL, paramRef = NULL, paramU #' @export #' -TADA_CSVExport <- function(ref = NULL){ - if(is.null(ref)){ +TADA_CSVExport <- function(ref = NULL) { + if (is.null(ref)) { print("No dataframe provided. Please enter a dataframe to return") } - - data <-DT::datatable(ref, extensions = c('Buttons', 'FixedColumns'), - options = list(paging = TRUE, - dom = 'Bfrtip', - autoWidth = TRUE, - pageLength = 5, - scrollX = TRUE, - scrollCollapse = TRUE, - buttons = c('copy','csv', 'excel', 'pdf') - #fixedColumns = list(leftColumns = 1 - ), class = 'display') %>% - DT::formatStyle(columns = colnames(ref), 'fontSize' = '80%') - + + data <- DT::datatable(ref, + extensions = c("Buttons", "FixedColumns"), + options = list( + paging = TRUE, + dom = "Bfrtip", + autoWidth = TRUE, + pageLength = 5, + scrollX = TRUE, + scrollCollapse = TRUE, + buttons = c("copy", "csv", "excel", "pdf") + # fixedColumns = list(leftColumns = 1 + ), class = "display" + ) %>% + DT::formatStyle(columns = colnames(ref), "fontSize" = "80%") + return(data) } diff --git a/R/CrosswalksFromATTAINS.R b/R/CrosswalksFromATTAINS.R index 89eff8c1..3cf0ab92 100644 --- a/R/CrosswalksFromATTAINS.R +++ b/R/CrosswalksFromATTAINS.R @@ -2,19 +2,19 @@ #' Crosswalk from ATTAINS #' #' Tribes and States who participate in electronic reporting of water quality -#' conditions through EPA ATTAINS may also submit a crosswalk of WQP -#' MonitoringLocationIdentifiers associated with their Assessment Units to -#' ATTAINS. If the organization has recorded MonitoringLocationIdentifiers +#' conditions through EPA ATTAINS may also submit a crosswalk of WQP +#' MonitoringLocationIdentifiers associated with their Assessment Units to +#' ATTAINS. If the organization has recorded MonitoringLocationIdentifiers #' associated with their Assessment Units in ATTAINS, this function can be used #' to create a crosswalk of known MonitoringLocationIdentifiers and Assessment -#' Units. All tribal nations record this crosswalk in ATTAINS but only a few -#' states. If a state has not supplied Monitoring Location information to +#' Units. All tribal nations record this crosswalk in ATTAINS but only a few +#' states. If a state has not supplied Monitoring Location information to #' ATTAINS, the function will not return a data frame. #' -#' @param org_id The ATTAINS organization identifier must be supplied by the -#' user. A list of organization identifiers can be found by downloading the -#' ATTAINS Domains Excel file: -#' https://www.epa.gov/system/files/other-files/2023-09/DOMAINS.xlsx. +#' @param org_id The ATTAINS organization identifier must be supplied by the +#' user. A list of organization identifiers can be found by downloading the +#' ATTAINS Domains Excel file: +#' https://www.epa.gov/system/files/other-files/2023-09/DOMAINS.xlsx. #' Organization identifiers are listed in the "OrgName" tab. The "code" column #' contains the organization identifiers that should be used for this param. #' @@ -32,7 +32,8 @@ #' #' # Pueblo of Tesuque example #' PUEBLOOFTESUQUE_crosswalk <- TADA_GetATTAINSAUSiteCrosswalk( -#' org_id = "PUEBLOOFTESUQUE") +#' org_id = "PUEBLOOFTESUQUE" +#' ) #' #' # Arizona example, returns blank dataframe as of 1/21/25 #' AZ_crosswalk <- TADA_GetATTAINSAUSiteCrosswalk(org_id = "21ARIZ") @@ -68,12 +69,13 @@ TADA_GetATTAINSAUSiteCrosswalk <- function(org_id = NULL) { MonitoringDataLinkText = monitoring_data_link_text ) %>% # paste org_id in front of MLs from the specified org if they are missing - #from ATTAINS + # from ATTAINS dplyr::mutate(MonitoringLocationIdentifier = ifelse(( OrganizationIdentifier == org_id & - stringr::str_detect(MonitoringLocationIdentifier, - org_id, - negate = TRUE)), + stringr::str_detect(MonitoringLocationIdentifier, + org_id, + negate = TRUE + )), paste0(org_id, "-", MonitoringLocationIdentifier), MonitoringLocationIdentifier )) @@ -108,7 +110,7 @@ TADA_GetATTAINSAUSiteCrosswalk <- function(org_id = NULL) { #' This function creates the batch upload files needed to add or update #' Monitoring Location Identifiers in ATTAINS Assessment Unit profiles. Users #' can specify whether all records should be overwritten (replaced) or if new -#' Monitoring Location Identifiers should be appended (added) to existing +#' Monitoring Location Identifiers should be appended (added) to existing #' records. #' #' @param org_id Character argument. The ATTAINS organization identifier must @@ -118,39 +120,39 @@ TADA_GetATTAINSAUSiteCrosswalk <- function(org_id = NULL) { #' Organization identifiers are listed in the "OrgName" tab. The "code" column #' contains the organization identifiers that should be used for this param. #' -#' @param wqp_data_links Character argument. When wqp_data_links is equal to -#' "add" or "replace", the function will build the URL for the Water Quality -#' Portal Data Site page for each Monitoring Location Identifier in the data -#' frame. It will examine the response code of each URL and only retain those -#' with a 200 response, which indicates the URL is valid. When -#' wqp_data_links = "add", the URL will be added to any existing text in the -#' MS_DATA_LINK_TEXT column. When wqp_data_links = "replace", the URL will -#' replace any existing text in the MS_DATA_LINK_TEXT column. When -#' wqp_data_links = "none", no URLs will be created or added to the returned +#' @param wqp_data_links Character argument. When wqp_data_links is equal to +#' "add" or "replace", the function will build the URL for the Water Quality +#' Portal Data Site page for each Monitoring Location Identifier in the data +#' frame. It will examine the response code of each URL and only retain those +#' with a 200 response, which indicates the URL is valid. When +#' wqp_data_links = "add", the URL will be added to any existing text in the +#' MS_DATA_LINK_TEXT column. When wqp_data_links = "replace", the URL will +#' replace any existing text in the MS_DATA_LINK_TEXT column. When +#' wqp_data_links = "none", no URLs will be created or added to the returned #' data frame. Default is wqp_data_links = "add". #' #' @param attains_replace Character argument. When attains_replace = FALSE, all -#' Monitoring Location Identifiers in the user supplied crosswalk will be -#' appended to the existing ATTAINS crosswalk. When attains_replace = TRUE, -#' Monitoring Location Identifiers will only be retained if they are in the +#' Monitoring Location Identifiers in the user supplied crosswalk will be +#' appended to the existing ATTAINS crosswalk. When attains_replace = TRUE, +#' Monitoring Location Identifiers will only be retained if they are in the #' user supplied crosswalk. Default equals FALSE. #' -#' @param crosswalk A user-supplied dataframe with the columns +#' @param crosswalk A user-supplied dataframe with the columns #' ASSESSMENT_UNIT_ID, MS_LOCATION_ID, MS_ORG_ID, and MONITORING_DATA_LINK_TEXT -#' is required. The ASSESSMENT_UNIT_ID and MS_LOCATION_ID must be filled out -#' in order to use this function. The additional columns, -#' MONITORING_DATA_LINK_TEXT, containing a single URL or "; " separated URLs -#' linking to information about the Monitoring Location, and MS_ORG_ID, -#' containing the WQP organization identifier for the Monitoring Location can -#' be left blank and the function will still run. Data link URLS to WQP site -#' pages cannot be automatically generated by this function unless the -#' MS_ORG_ID column is populated with the WQP OrganizationIdentifier. When +#' is required. The ASSESSMENT_UNIT_ID and MS_LOCATION_ID must be filled out +#' in order to use this function. The additional columns, +#' MONITORING_DATA_LINK_TEXT, containing a single URL or "; " separated URLs +#' linking to information about the Monitoring Location, and MS_ORG_ID, +#' containing the WQP organization identifier for the Monitoring Location can +#' be left blank and the function will still run. Data link URLS to WQP site +#' pages cannot be automatically generated by this function unless the +#' MS_ORG_ID column is populated with the WQP OrganizationIdentifier. When #' crosswalk = NULL, the crosswalk will be downloaded from ATTAINS. This allows #' users to add URLs for the Water Quality Portal Data Site pages to the ATTAINS #' Assessment Unit profile where possible without updating other information #' in ATTAINS. #' -#' @return The csv batch upload files for ATTAINS to add or update +#' @return The csv batch upload files for ATTAINS to add or update #' Monitoring Locations. #' #' @export @@ -165,7 +167,7 @@ TADA_GetATTAINSAUSiteCrosswalk <- function(org_id = NULL) { #' wqp_data_links = "replace" #' ) #' -#' # Alaska example using a user supplied crosswalk to update entries in +#' # Alaska example using a user supplied crosswalk to update entries in #' # ATTAINS by appending user supplied information to ATTAINS crosswalk #' #' # example monitoring location identifiers @@ -174,31 +176,36 @@ TADA_GetATTAINSAUSiteCrosswalk <- function(org_id = NULL) { #' "AK_M_1021109_013", "AK_M_1021109_013", #' "AK_M_1021109_013" #' ) -#' +#' #' # example organization identifiers #' MS_ORG_ID <- c("AKDECWQ", "AKDECWQ", "AKDECWQ", "AKDECWQ", "AKDECWQ") -#' +#' #' # example assessment units -#' MS_LOCATION_ID <- c("ExampleSite1", "ExampleSite2", "ExampleSite3", -#' "ExampleSite4", "ExampleSite5") +#' MS_LOCATION_ID <- c( +#' "ExampleSite1", "ExampleSite2", "ExampleSite3", +#' "ExampleSite4", "ExampleSite5" +#' ) #' # example urls #' MONITORING_DATA_LINK_TEXT <- c( -#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/", -#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/", -#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/", -#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/", -#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/") +#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/", +#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/", +#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/", +#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/", +#' "https://www.waterqualitydata.us/provider/STORET/AKDECWQ/" +#' ) #' #' # create example crosswalk data frame -#' ex.user.cw <- data.frame(MS_LOCATION_ID, MS_ORG_ID, ASSESSMENT_UNIT_ID, -#' MONITORING_DATA_LINK_TEXT) +#' ex.user.cw <- data.frame( +#' MS_LOCATION_ID, MS_ORG_ID, ASSESSMENT_UNIT_ID, +#' MONITORING_DATA_LINK_TEXT +#' ) #' #' AK_appenduserdata <- TADA_UpdateMonitoringLocationsInATTAINS( -#' org_id = "AKDECWQ", -#' crosswalk = ex.user.cw, -#' attains_replace = FALSE, -#' wqp_data_links = "none") -#' +#' org_id = "AKDECWQ", +#' crosswalk = ex.user.cw, +#' attains_replace = FALSE, +#' wqp_data_links = "none" +#' ) #' } #' TADA_UpdateMonitoringLocationsInATTAINS <- function(org_id = NULL, @@ -206,8 +213,9 @@ TADA_UpdateMonitoringLocationsInATTAINS <- function(org_id = NULL, attains_replace = FALSE, wqp_data_links = "add") { # get list of organization identifiers from ATTAINS - org.ref <- utils::read.csv(system.file("extdata", "ATTAINSOrgIDsRef.csv", - package = "EPATADA")) + org.ref <- utils::read.csv(system.file("extdata", "ATTAINSOrgIDsRef.csv", + package = "EPATADA" + )) # stop function if organization identifiers is not found in ATTAINS if (!org_id %in% org.ref$code) { @@ -373,8 +381,10 @@ TADA_UpdateMonitoringLocationsInATTAINS <- function(org_id = NULL, dplyr::mutate( MONITORING_DATA_LINK_TEXT = ifelse( grepl("200", response.code), - paste0(MONITORING_DATA_LINK_TEXT, "; ", - MONITORING_DATA_LINK_TEXT.New), + paste0( + MONITORING_DATA_LINK_TEXT, "; ", + MONITORING_DATA_LINK_TEXT.New + ), MONITORING_DATA_LINK_TEXT ), MONITORING_DATA_LINK_TEXT = stringr::str_remove_all( @@ -384,14 +394,20 @@ TADA_UpdateMonitoringLocationsInATTAINS <- function(org_id = NULL, ) %>% tidyr::separate_rows(MONITORING_DATA_LINK_TEXT, sep = ", ") %>% dplyr::group_by(ASSESSMENT_UNIT_ID, MS_ORG_ID, MS_LOCATION_ID) %>% - suppressMessages(dplyr::summarise(MONITORING_DATA_LINK_TEXT = - paste( - unique( - MONITORING_DATA_LINK_TEXT), - collapse = ", "))) %>% - dplyr::select(ASSESSMENT_UNIT_ID, - MS_ORG_ID, MS_LOCATION_ID, - MONITORING_DATA_LINK_TEXT) %>% + suppressMessages(dplyr::summarise( + MONITORING_DATA_LINK_TEXT = + paste( + unique( + MONITORING_DATA_LINK_TEXT + ), + collapse = ", " + ) + )) %>% + dplyr::select( + ASSESSMENT_UNIT_ID, + MS_ORG_ID, MS_LOCATION_ID, + MONITORING_DATA_LINK_TEXT + ) %>% dplyr::distinct() } return(update.crosswalk) diff --git a/R/GeospatialFunctions.R b/R/GeospatialFunctions.R index a1786e1e..8c086ecd 100644 --- a/R/GeospatialFunctions.R +++ b/R/GeospatialFunctions.R @@ -479,10 +479,12 @@ fetchATTAINS <- function(.data, catchments_only = FALSE) { #' applyautoclean = TRUE #' ) #' -#' nhd_data <- fetchNHD(.data = tada_data, resolution = "Hi", -#' features = c("catchments", "waterbodies", "flowlines")) +#' nhd_data <- fetchNHD( +#' .data = tada_data, resolution = "Hi", +#' features = c("catchments", "waterbodies", "flowlines") +#' ) #' } -#' +#' fetchNHD <- function(.data, resolution = "Hi", features = "catchments") { suppressMessages(suppressWarnings({ # sf::sf_use_s2(TRUE) @@ -925,13 +927,13 @@ fetchNHD <- function(.data, resolution = "Hi", features = "catchments") { #' #' Link catchment-based ATTAINS assessment unit data (EPA snapshot of NHDPlus HR catchments associated with entity submitted assessment unit features - points, lines, and polygons) to Water Quality Portal observations, often imported via `TADA_DataRetrieval()`. This function returns the objects that can be mapped in `TADA_ViewATTAINS()`. Check out the #' TADAModule2.Rmd for an example workflow. Note that approximately 80% of state submitted assessment units in ATTAINS were developed based on high res NHDPlus, so we are using that as the default. -#' +#' #' The ATTAINS snapshot of NHDPlus HR catchments is not available for areas that do not have existing Assessment Units in ATTAINS. For these areas where there are WQP sites, but no existing ATTAINS assessment units, a user can choose to associate the WQP sites with NHDPlus catchments available from the USGS nhdplusTools package (USGS snapshot) using the optional function param 'fill_catchments'. In theory, if desired by the user, these high res catchments could be created as new assessment unit polygons in ATTAINS (that process is outside of TADA). #' #' Adds one new column to input dataframe, 'index', which identifies rows that are the same observation but are linked to multiple ATTAINS assessment units. It is possible for a single TADA WQP observation to have multiple ATTAINS assessment units linked to it and subsequently more than one row of data. #' -#' If TADA_MakeSpatial has not yet been run, this function runs it which also adds another new column to the input dataframe, 'geometry', which allows for mapping and additional geospatial capabilities. -#' +#' If TADA_MakeSpatial has not yet been run, this function runs it which also adds another new column to the input dataframe, 'geometry', which allows for mapping and additional geospatial capabilities. +#' #' Please review the output of this function carefully, especially waterbody intersections and lake/ocean coasts where imprecise WQP monitoring location coordinates can be problematic. Note that many WQP locations will not fall within the bounds of NHDPlus (estuaries, oceans). Manual adjustments and quality control checks are strongly encouraged. WQP monitoring location metadata may also be helpful for matching waterbody names with ATTAINS waterbody names instead of relying solely on the geospatial location (lat/long). #' #' @param .data A dataframe created by `TADA_DataRetrieval()` or the sf equivalent made by `TADA_MakeSpatial()`. diff --git a/R/Maintenance.R b/R/Maintenance.R index 05b9605a..5aa99516 100644 --- a/R/Maintenance.R +++ b/R/Maintenance.R @@ -135,14 +135,15 @@ TADA_UpdateExampleData <- function() { # save(Data_R5_TADAPackageDemo, file = "data/Data_R5_TADAPackageDemo.rda") usethis::use_data(Data_R5_TADAPackageDemo, internal = FALSE, overwrite = TRUE, compress = "xz", version = 3, ascii = FALSE) rm(Data_R5_TADAPackageDemo) - + # MODULE 3 VIGNETTE EXAMPLE DATA # Get data Data_WV <- TADA_DataRetrieval( startDate = "2020-03-14", huc = "02070004", - applyautoclean = FALSE) - + applyautoclean = FALSE + ) + # Remove non-surface water media # OPTIONAL Data_WV_2 <- TADA_AnalysisDataFilter( @@ -150,21 +151,24 @@ TADA_UpdateExampleData <- function() { clean = TRUE, surface_water = TRUE, ground_water = FALSE, - sediment = FALSE) - + sediment = FALSE + ) + # Remove single org duplicates # REQUIRED Data_WV_3 <- TADA_FindPotentialDuplicatesSingleOrg( - Data_WV_2) - + Data_WV_2 + ) + Data_WV_4 <- dplyr::filter( Data_WV_3, - TADA.SingleOrgDup.Flag == "Unique") - + TADA.SingleOrgDup.Flag == "Unique" + ) + # Run autoclean # REQUIRED Data_WV_5 <- TADA_AutoClean(Data_WV_4) - + # Prepare censored results # REQUIRED Data_WV_6 <- TADA_SimpleCensoredMethods( @@ -172,59 +176,64 @@ TADA_UpdateExampleData <- function() { nd_method = "multiplier", nd_multiplier = 0.5, od_method = "as-is", - od_multiplier = "null") - + od_multiplier = "null" + ) + # Remove multiple org duplicates # OPTIONAL Data_WV_7 <- TADA_FindPotentialDuplicatesMultipleOrgs( - Data_WV_6) - + Data_WV_6 + ) + Data_WV_8 <- dplyr::filter( Data_WV_7, - TADA.ResultSelectedMultipleOrgs == "Y") - + TADA.ResultSelectedMultipleOrgs == "Y" + ) + # Filter out remaining irrelevant data, NA's and empty cols # REQUIRED unique(Data_WV_8$TADA.ResultMeasureValueDataTypes.Flag) sum(is.na(Data_WV_8$TADA.ResultMeasureValue)) - Data_WV_9 = TADA_AutoFilter(Data_WV_8) + Data_WV_9 <- TADA_AutoFilter(Data_WV_8) unique(Data_WV_9$TADA.ResultMeasureValueDataTypes.Flag) sum(is.na(Data_WV_9$TADA.ResultMeasureValue)) - + # Remove results with QC issues # REQUIRED Data_WV_10 <- TADA_RunKeyFlagFunctions( Data_WV_9, - clean = TRUE) - - # CM note for team discussion: Should results with NA units be dealt with now as well within TADA_AutoFilter? - + clean = TRUE + ) + + # CM note for team discussion: Should results with NA units be dealt with now as well within TADA_AutoFilter? + # Flag above and below threshold. Do not remove # OPTIONAL Data_WV_11 <- TADA_FlagAboveThreshold(Data_WV_10, clean = FALSE, flaggedonly = FALSE) Data_WV_12 <- TADA_FlagBelowThreshold(Data_WV_11, clean = FALSE, flaggedonly = FALSE) - + # Harmonize synonyms # OPTIONAL Data_WV_13 <- TADA_HarmonizeSynonyms(Data_WV_12) - + # Review - Data_WV_14 <- dplyr::filter(Data_WV_13, TADA.CharacteristicName %in% c("ZINC", "PH","NITRATE")) - + Data_WV_14 <- dplyr::filter(Data_WV_13, TADA.CharacteristicName %in% c("ZINC", "PH", "NITRATE")) + TADA_FieldValuesTable(Data_WV_14, field = "TADA.ComparableDataIdentifier") - + # Save example data Data_WV_Mod1_Output <- Data_WV_14 - + print("Data_WV_Mod1_Output:") print(dim(Data_WV_Mod1_Output)) - + usethis::use_data(Data_WV_Mod1_Output, - internal = FALSE, - overwrite = TRUE, - compress = "xz", - version = 3, - ascii = FALSE) + internal = FALSE, + overwrite = TRUE, + compress = "xz", + version = 3, + ascii = FALSE + ) rm(Data_WV_Mod1_Output) } diff --git a/R/ResultFlagsDependent.R b/R/ResultFlagsDependent.R index 6e731823..0dc91e2a 100644 --- a/R/ResultFlagsDependent.R +++ b/R/ResultFlagsDependent.R @@ -411,17 +411,19 @@ TADA_FlagResultUnit <- function(.data, clean = c("suspect_only", "nonstandardize check.data["TADA.ResultUnit.Flag"][is.na(check.data["TADA.ResultUnit.Flag"])] <- "Not Reviewed" # Flag additional combinations that are invalid regardless of media type (and media type was left blank - NWIS only issue) - if(any(check.data$TADA.CharacteristicName == "PH")) { + if (any(check.data$TADA.CharacteristicName == "PH")) { check.data <- check.data %>% - dplyr::mutate(TADA.ResultUnit.Flag = - ifelse(TADA.CharacteristicName == "PH" & - is.na(TADA.ActivityMediaName) & - TADA.ResultMeasure.MeasureUnitCode == "MOLE/L" - | TADA.ResultMeasure.MeasureUnitCode == "MMOL/L", - "Suspect", TADA.ResultUnit.Flag) + dplyr::mutate( + TADA.ResultUnit.Flag = + ifelse(TADA.CharacteristicName == "PH" & + is.na(TADA.ActivityMediaName) & + TADA.ResultMeasure.MeasureUnitCode == "MOLE/L" | + TADA.ResultMeasure.MeasureUnitCode == "MMOL/L", + "Suspect", TADA.ResultUnit.Flag + ) ) - } - + } + # if all rows are "Pass", return input with flag column if (any(c("NonStandardized", "Suspect", "Not Reviewed") %in% unique(check.data$TADA.ResultUnit.Flag)) == FALSE) { diff --git a/R/TADARefTables.R b/R/TADARefTables.R index 4e293c3f..8ec07f29 100644 --- a/R/TADARefTables.R +++ b/R/TADARefTables.R @@ -98,8 +98,9 @@ EPA304aRef_Cached <- NULL #' pollutant name and use_name for assessment under the CWA. #' #' Currently only numeric priority characteristic in TADA are the focus. -#' This list can be found on the bottom of the web page: -#' https://www.epa.gov/wqs-tech/state-specific-water-quality-standards-effective-under-clean-water-act-cwa +#' For a list of characteristics that have a crosswalk between the CST and +#' TADA.CharacteristicName, please run the following below in the R environment: +#' 'utils::read.csv(system.file("extdata", "TADAPriorityCharUnitRef.csv", package = "EPATADA"))' #' #' @return Dataframe of EPA304a recommended standards for a pollutant and use name. #' @@ -143,11 +144,12 @@ TADA_GetEPA304aRef <- function() { utils::tail(-CST.begin) %>% dplyr::filter(ENTITY_ABBR == "304A") %>% dplyr::left_join(tada.char.ref, by = c("POLLUTANT_NAME" = "CST.PollutantName"), relationship = "many-to-many") %>% - dplyr::select(TADA.CharacteristicName, POLLUTANT_NAME, organization_identifier = ENTITY_ABBR, - use_name = USE_CLASS_NAME_LOCATION_ETC, CRITERION_VALUE, - CRITERIATYPEAQUAHUMHLTH, CRITERIATYPEFRESHSALTWATER, - CRITERIATYPE_ACUTECHRONIC, CRITERIATYPE_WATERORG, UNIT_NAME - ) + dplyr::select(TADA.CharacteristicName, POLLUTANT_NAME, + organization_identifier = ENTITY_ABBR, + use_name = USE_CLASS_NAME_LOCATION_ETC, CRITERION_VALUE, + CRITERIATYPEAQUAHUMHLTH, CRITERIATYPEFRESHSALTWATER, + CRITERIATYPE_ACUTECHRONIC, CRITERIATYPE_WATERORG, UNIT_NAME + ) # Remove intermediate variables rm(CST.begin, tada.char.ref, raw.data) diff --git a/R/WQPWQXRefTables.R b/R/WQPWQXRefTables.R index 943a95cb..2ef79735 100644 --- a/R/WQPWQXRefTables.R +++ b/R/WQPWQXRefTables.R @@ -910,10 +910,10 @@ WQPProviderRef_Cached <- NULL #' Get Organization and Provider Reference Table #' -#' This function creates a crosswalk of all OrganizationIdentifiers, +#' This function creates a crosswalk of all OrganizationIdentifiers, #' OrganizationFormalNames, and ProviderNames in the Water Quality Portal (WQP). #' -#' @return A crosswalk dataframe including the following columns: +#' @return A crosswalk dataframe including the following columns: #' OrganizationIdentifier, OrganizationFormalName, ProviderName. #' #' @export @@ -928,7 +928,7 @@ TADA_GetWQPOrgProviderRef <- function() { if (!is.null(WQPProviderRef_Cached)) { return(WQPProviderRef_Cached) } - + # Try to download up-to-date raw data raw.data <- tryCatch( { @@ -941,21 +941,21 @@ TADA_GetWQPOrgProviderRef <- function() { NULL } ) - + # need to remove providers w/ no sites on date site pages - + # If the download failed fall back to internal data (and report it) if (is.null(raw.data)) { message("Downloading latest WQP Organization and Provider Reference Table failed!") message("Falling back to (possibly outdated) internal file.") return(utils::read.csv(system.file("extdata", "WQXProviderRef.csv", package = "EPATADA"))) } - + # Save updated table in cache WQPProviderRef <- raw.data - + WQPProviderRef_Cached <- WQPProviderRef - + WQPProviderRef } @@ -964,4 +964,3 @@ TADA_GetWQPOrgProviderRef <- function() { TADA_UpdateWQPOrgProviderRef <- function() { utils::write.csv(TADA_GetWQPOrgProviderRef(), file = "inst/extdata/WQXProviderRef.csv", row.names = FALSE) } - diff --git a/man/TADA_CreateParamRef.Rd b/man/TADA_CreateParamRef.Rd index 24928210..a197dbfc 100644 --- a/man/TADA_CreateParamRef.Rd +++ b/man/TADA_CreateParamRef.Rd @@ -88,14 +88,9 @@ the original TADA df will be added to the new crosswalk. Users can then identify missing cell values that were either not addressed in the creation of the original crosswalk or which pertain to TADA.ComparableDataIdentifiers not included in the original crosswalk. -The user-supplied crosswalk table must contain the required columns. Users will have two options: -\enumerate{ -\item Supply a paramRef data frame which contains at least these four column names: -TADA.CharacteristicName, TADA.MethodSpeciationName, TADA.ResultSampleFractionText, -and ATTAINS.ParameterName. -\item Supply a paramRef data frame which contains at least these two column names: -TADA.ComparableDataIdentifier and ATTAINS.ParameterName . -} +The user-supplied crosswalk table must contain the required columns. Users will need to +provide a paramRef data frame which contains at least these two column names: +TADA.ComparableDataIdentifier and ATTAINS.ParameterName. Users who are interested in doing an assessment or comparing criteria for more than organization also need to include an additional column name: 'organization_identifier'. This ensures that @@ -104,7 +99,9 @@ each organization. A draft crosswalk between TADA.CharacteristicName and EPA 304A pollutant names (sourced from the Criteria Search Tool) has been created by the EPATADA team. This crosswalk is still in -development and only focuses on the TADA priority characteristics (add link to list?). +development and only focuses on the TADA priority characteristics. +Please run the following below in the R environment to view current crosswalks: +'utils::read.csv(system.file("extdata", "TADAPriorityCharUnitRef.csv", package = "EPATADA"))'. } \examples{ # This creates a blank paramRef template of UT Nutrients data. Users will need to fill this diff --git a/man/TADA_CreateParamUseRef.Rd b/man/TADA_CreateParamUseRef.Rd index f4e60b1a..220dcb9f 100644 --- a/man/TADA_CreateParamUseRef.Rd +++ b/man/TADA_CreateParamUseRef.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/CriteriaInputs.R \name{TADA_CreateParamUseRef} \alias{TADA_CreateParamUseRef} -\title{Parameter and Use Name crosswalk} +\title{ATTAINS Parameter Name and Use Name crosswalk} \usage{ TADA_CreateParamUseRef( .data, @@ -69,7 +69,6 @@ parameter name and associated use_name that applies to their org(s) with its ass TADA.ComparableDataIdentifier(s). This can be accomplished by determining which 'use_name'(s) from the drop-down menu in the excel spreadsheet generated by this function correspond to the TADA.ComparableDataIdentifier(s) found in the TADA dataframe. -`. } \details{ Before running this function, users must run TADA_CreateParamRef() to create the @@ -86,11 +85,13 @@ ATTAINS assessment cycles, users should consider contacting the ATTAINS team to Otherwise, users can still proceed by overriding the data validation by value pasting. Users will be warned in the ATTAINS.FlagUseName column if they choose to include an ATTAINS use name that was not named in prior ATTAINS assessment cycles as: -'Suspect: use name is not found as a prior use name for this organization' or -'Suspect: use name is not found as a prior use name for this parameter' +'Use name is not listed as a prior cause in ATTAINS for this organization' or +'Use name is listed as a prior cause in this organization, but not for this parameter name' Users will have the flexibility to include the EPA304a standards by including this string in the org_id -function argument. Users who only want the EPA304a standards would input as an argument input: +function argument. + +Users who only want the EPA304a standards would input as an argument input: org_id = "EPA304a" @@ -98,8 +99,10 @@ Users who want both their orgs and EPA304a standards would input a character vec org_id = c("EPA304a", "UTAHDWQ") -The use_name for EPA304a standards are matched from the CriteriaSearchTool: -(CST) https://www.epa.gov/wqs-tech/state-specific-water-quality-standards-effective-under-clean-water-act-cwa +NOTE: The EPA304a standards are not a part of the ATTAINS domain value, these standards +have been crosswalk to a list of priority TADA.ComparableDataIdentifier by the internal EPATADA team. +The use_name for EPA304a standards are matched from the CriteriaSearchTool (CST): +www.epa.gov/wqs-tech/state-specific-water-quality-standards-effective-under-clean-water-act-cwa while the use_name for any other ATTAINS organization identifiers come from ATTAINS domain value for use_name. } \examples{ diff --git a/man/TADA_GetEPA304aRef.Rd b/man/TADA_GetEPA304aRef.Rd index 982224f5..c0353772 100644 --- a/man/TADA_GetEPA304aRef.Rd +++ b/man/TADA_GetEPA304aRef.Rd @@ -18,6 +18,7 @@ pollutant name and use_name for assessment under the CWA. } \details{ Currently only numeric priority characteristic in TADA are the focus. -This list can be found on the bottom of the web page: -https://www.epa.gov/wqs-tech/state-specific-water-quality-standards-effective-under-clean-water-act-cwa +For a list of characteristics that have a crosswalk between the CST and +TADA.CharacteristicName, please run the following below in the R environment: +'utils::read.csv(system.file("extdata", "TADAPriorityCharUnitRef.csv", package = "EPATADA"))' } diff --git a/vignettes/TADAModule3.Rmd b/vignettes/TADAModule3.Rmd index d682165f..343eab58 100644 --- a/vignettes/TADAModule3.Rmd +++ b/vignettes/TADAModule3.Rmd @@ -287,9 +287,9 @@ an ATTAINS parameter name. # ) ParamRef <- dplyr::mutate(NCTC_ParamRef, ATTAINS.ParameterName = dplyr::case_when( -TADA.CharacteristicName == "PH" ~ "PH", -TADA.ComparableDataIdentifier == "ZINC_TOTAL_NA_UG/L" ~ "ZINC", -grepl("NITRATE", TADA.ComparableDataIdentifier) ~ "NITROGEN, TOTAL" + TADA.CharacteristicName == "PH" ~ "PH", + TADA.ComparableDataIdentifier == "ZINC_TOTAL_NA_UG/L" ~ "ZINC", + grepl("NITRATE", TADA.ComparableDataIdentifier) ~ "NITROGEN, TOTAL" )) TADA_CSVExport(ref = ParamRef) @@ -379,28 +379,29 @@ excel file was generated, this is an example workflow of what a user can do to edit the table in the R environment. ```{r paged.print=TRUE} -add_data <- data.frame("organization_identifier" = "MDE_EASP" ,"ATTAINS.ParameterName" = rep("PH",3), "use_name" = c("Aquatic Life and Wildlife", "Water Contact Sports", -"Seasonal Migratory Fish Spawning and Nursery Subcategory" +add_data <- data.frame("organization_identifier" = "MDE_EASP", "ATTAINS.ParameterName" = rep("PH", 3), "use_name" = c( + "Aquatic Life and Wildlife", "Water Contact Sports", + "Seasonal Migratory Fish Spawning and Nursery Subcategory" )) # The output of this will not reflect changes to the ATTAINS.FlagUseName column. To do so, we need to re run TADA_CreateParamUseRef() with paramUseRef = ParamUseRef as an argument. ParamUseRef <- NCTC_ParamUseRef3 %>% - dplyr::left_join(add_data, by = c("organization_identifier", "ATTAINS.ParameterName"), keep = FALSE) %>% - dplyr::mutate(use_name = dplyr::coalesce(use_name.x, use_name.y)) %>% + dplyr::left_join(add_data, by = c("organization_identifier", "ATTAINS.ParameterName"), keep = FALSE) %>% + dplyr::mutate(use_name = dplyr::coalesce(use_name.x, use_name.y)) %>% dplyr::select(-c(use_name.x, use_name.y)) # User now has a saved dataframe which can be used to reflect any updates of Parameter-use ref in the future. TADA_CSVExport(ref = ParamUseRef) -# PH will now reflect the changes +# PH will now reflect the changes NCTC_ParamUseRef3.1 <- TADA_CreateParamUseRef( Data_NCTC, paramUseRef = ParamUseRef, # Edits were made to paramUseRef, updates flag column - org_id = c("EPA304a", "MDE_EASP"), - paramRef = NCTC_ParamRef2, + org_id = c("EPA304a", "MDE_EASP"), + paramRef = NCTC_ParamRef2, excel = FALSE # comment out 'excel = FALSE' and uncomment 'excel = TRUE, overwrite = TRUE' to run the excel file - # excel = TRUE, overwrite = TRUE - ) + # excel = TRUE, overwrite = TRUE +) TADA_CSVExport(ref = NCTC_ParamUseRef3.1) ```