Skip to content

Commit c4ff19f

Browse files
authored
Merge pull request nationalparkservice#139 from RobLBaker/main
make test_missing_data more robust to file-order mismatches
2 parents 6c294ec + 4566f92 commit c4ff19f

File tree

141 files changed

+305
-412
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+305
-412
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Description: Allows the user (and reviewer) to check a data package and test whe
1010
License: MIT + file LICENSE
1111
Encoding: UTF-8
1212
Roxygen: list(markdown = TRUE)
13-
RoxygenNote: 7.2.3
13+
RoxygenNote: 7.3.1
1414
VignetteBuilder: knitr
1515
Suggests:
1616
here,

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# DPchecker 0.3.4
22

3+
2024-06-24
4+
* Fixed bug in `test_missing_data()` where if the order of files listed in metadata did not match the order of files produced by `list.files()` the function would evaluate the wrong file and produce inadvertent and unhelpful errors.
35
2024-02-05
46
* Fix bug in `test_date_range()` that was adding UTC to temporalCoverage
57
* `test_missing_data()` now also handles the missing data codes "blank" and "empty".

R/optional_eml_elements.R

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,12 +376,16 @@ test_orcid_match <- function(metadata = load_metadata(directory)){
376376
}
377377
}
378378

379-
#if there are any orcids, record orcids bad orcids:
379+
#if there are any orcids, record orcids & bad orcids:
380380
if(!is.null(existing_orcid)){
381381
bad_orcid <- NULL
382382
wrong_person <- NULL
383383
for(i in seq_along(surName)){
384384
orcid_url <- existing_orcid[i]
385+
is_it_na <- stringr::str_sub(orcid_url, start = -2)
386+
if(is_it_na == "NA") {
387+
next
388+
}
385389
#api request to ORCID:
386390

387391
tryCatch({test_req <- httr::GET(orcid_url)},

R/tabular_data_congruence.R

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -627,8 +627,7 @@ test_missing_data <- function(directory = here::here(),
627627
#detail_level <- match.arg(arg_choices)
628628

629629
# get dataTable and all children elements
630-
data_tbl <- EML::eml_get(metadata, "dataTable")
631-
data_tbl$`@context` <- NULL
630+
data_tbl <- metadata[["dataset"]][["dataTable"]]
632631
# If there's only one csv, data_tbl ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently
633632
if ("attributeList" %in% names(data_tbl)) {
634633
data_tbl <- list(data_tbl)
@@ -650,36 +649,44 @@ test_missing_data <- function(directory = here::here(),
650649
for (j in seq_len(ncol(dat))) {
651650
#look for NAs; if NAs found, look for correct missing data codes
652651
if (sum(is.na(dat[,j])) > 0) {
653-
missing <- data_tbl[[i]][["attributeList"]][["attribute"]][[j]][["missingValueCode"]][["code"]]
654-
if(is.null(missing) || sum(missing != missing_types) < 1) {
655-
#file level error message output:
656-
if (detail_level == "files") {
657-
error_log <- append(error_log,
652+
for(k in 1:length(seq_along(data_tbl))){
653+
if(data_tbl[[k]][["physical"]][["objectName"]] != data_files[i]){
654+
next
655+
} else {
656+
missing <- data_tbl[[k]][["attributeList"]][["attribute"]][[j]][["missingValueCode"]][["code"]]
657+
if(is.null(missing) || sum(missing != missing_types) < 1) {
658+
#file level error message output:
659+
if (detail_level == "files") {
660+
error_log <- append(error_log,
658661
paste0(" ",
659662
"---> {.file ",
660663
data_files[i],
661664
"} contains missing data without a corresponding missing data code in metadata." ))
662-
break
663-
}
664-
#column level error message output:
665-
if (detail_level == "columns") {
666-
error_log <- append(error_log,
665+
break
666+
}
667+
#column level error message output:
668+
if (detail_level == "columns") {
669+
error_log <- append(error_log,
667670
paste0(" ",
668671
"---> {.file ",
669672
data_files[i],
670673
"} {.field ",
671674
names(dat)[j],
672675
"} contains missing data without a corresponding missing data code in metadata."))
676+
}
677+
}
678+
}
673679
}
674680
}
675681
}
676682
}
677-
}
678683
if(is.null(error_log)){
679-
cli::cli_inform(c("v" = "Missing data listed as NA is accounted for in metadata"))
684+
cli::cli_inform(c(
685+
"v" = "Missing data listed as NA is accounted for in metadata"))
680686
}
681687
else{
682688
# really only need to say it once per file/column combo
689+
error_log <- unique(error_log)
683690
msg <- error_log
684691
names(msg) <- rep(" ", length(msg))
685692
err <- paste0("Undocumented missing data detected. Please document all missing data in metadata:\n")

docs/404.html

Lines changed: 4 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/LICENSE-text.html

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)