Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve user interaction & error catching for prompts #80

Merged
merged 24 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions R/data-json_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#' Example metadata for a health dataset, to demo the function domain_mapping.R \cr \cr
#' This data was created with these five steps:
#' \enumerate{
#' \item Go to https://modelcatalogue.cs.ox.ac.uk/hdruk_live/#/catalogue/dataModel/17e86f3f-ec29-4c8e-9efc-8793a74b107d
#' \item Go to https://modelcatalogue.cs.ox.ac.uk/hdruk_live/#/catalogue/dataModel/16920b16-e24c-49f9-b4df-3dc85779822b/dataClasses
#' \item Download json metadata file by selecting the 'Export as JSON' option on the download button
#' \item \code{install.packages("rjson")}
#' \item \code{json_metadata <- rjson::fromJSON(file = '/browseMetadata/data-raw/maternity_indicators_dataset_(mids)_20240105T132210.json')}
#' \item \code{json_metadata <- rjson::fromJSON(file = '/browseMetadata/data-raw/national_community_child_health_database_(ncchd)_20240405T130125.json')}
#' \item \code{usethis::use_data(json_metadata)}
#' }
#'
Expand All @@ -16,5 +16,5 @@
#'
#' @format Nested lists
#'
#' @source https://modelcatalogue.cs.ox.ac.uk/hdruk_live/#/catalogue/dataModel/17e86f3f-ec29-4c8e-9efc-8793a74b107d
#' @source https://modelcatalogue.cs.ox.ac.uk/hdruk_live/#/catalogue/dataModel/16920b16-e24c-49f9-b4df-3dc85779822b/dataClasses
"json_metadata"
221 changes: 133 additions & 88 deletions R/domain_mapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@
#' @importFrom utils read.csv write.csv

domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file = NULL) {
# Load data: Check if demo data should be used

## Load data: Check if demo data should be used ----

if (is.null(json_file) && is.null(domain_file)) {
# If both json_file and domain_file are NULL, use demo data
meta_json <- get("json_metadata")
domains <- get("domain_list")
DomainListDesc <- "DemoList"
cat("\n")
cli_alert_info("Running domain_mapping in demo mode using package data files")
cli_alert_success("Running domain_mapping in demo mode using package data files")
} else if (is.null(json_file) || is.null(domain_file)) {
# If only one of json_file and domain_file is NULL, throw error
cat("\n")
Expand All @@ -41,53 +43,75 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file =

# Check if user has provided a look-up table
if (is.null(look_up_file)) {
cli_alert_info("Using the default look-up table in data/look-up.rda")
cli_alert_success("Using the default look-up table in data/look-up.rda")
lookup <- get("look_up")
}
else {
lookup <- read.csv(look_up_file)
cli_alert_info("Using look up file inputted by user")
cli_alert_success("Using look up file inputted by user")
print(lookup)
}

# Present domains plots panel for user's reference ----
## Present domains plots panel for user's reference ----
graphics::plot.new()
domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ALF ID*"), c("*OTHER ID*"), c("*DEMOGRAPHICS*"), domains)
gridExtra::grid.table(domains_extend[1], cols = "Domain", rows = 0:(nrow(domains_extend) - 1))

# Get user and demo list info for log file ----
## Get user and demo list info for log file ----
User_Initials <- ""
cat("\n \n")
while (User_Initials == "") {
cat("\n \n")
User_Initials <- readline(prompt = "Enter Initials: ")
User_Initials <- readline("Enter your initials: ")
}

# Print information about Dataset ----
## Print information about Dataset ----
cli_h1("Dataset Name")
cat(meta_json$dataModel$label, fill = TRUE)
cli_h1("Dataset Last Updated")
cat(meta_json$dataModel$lastUpdated, fill = TRUE)
cli_h1("Dataset File Exported By")
cat(meta_json$exportMetadata$exportedBy, "at", meta_json$exportMetadata$exportedOn, fill = TRUE)
nTables <- length(meta_json$dataModel$childDataClasses)
cat("\n")
cli_alert_info("Found {nTables} Table{?s} in this Dataset")
cat("\n")

Dataset_desc <- ""
while (Dataset_desc != "Y" & Dataset_desc != "N") {
while (Dataset_desc != "Y" & Dataset_desc != "y" & Dataset_desc != "N" & Dataset_desc != "n") {
cat("\n \n")
Dataset_desc <- readline(prompt = "Would you like to read a description of the Dataset? (Y/N) ")
Dataset_desc <- readline(prompt = "Would you like to read a description of the dataset? (y/n): ")
}

if (Dataset_desc == "Y") {
if (Dataset_desc == 'Y' | Dataset_desc == 'y') {
cli_h1("Dataset Description")
cat(meta_json$dataModel$description, fill = TRUE)
readline(prompt = "Press [enter] to proceed")
readline(prompt = "Press any key to proceed")
}

# Extract each Table
## Ask user which tables to process ----

nTables <- length(meta_json$dataModel$childDataClasses)
cat("\n")
cli_alert_info("Found {nTables} Table{?s} in this Dataset")
for (dc in 1:nTables) {
cat("\n")
cat(dc,meta_json$dataModel$childDataClasses[[dc]]$label, fill = TRUE)
}

nTables_Process <- numeric(0)
nTables_Process_Error <- TRUE
nTables_Process_OutOfRange <- FALSE
while (length(nTables_Process) == 0 | nTables_Process_Error==TRUE | nTables_Process_OutOfRange == TRUE) {
if (nTables_Process_OutOfRange == TRUE) {
cli_alert_danger('That table number is not within the range displayed, please try again.')}
tryCatch({
cat("\n \n");
cli_alert_info("Enter each table number you want to process in this session (one number on each line):");
cat("\n");
nTables_Process <- scan(file="",what=0);
nTables_Process_Error <- FALSE;
nTables_Process_OutOfRange = any(nTables_Process > nTables)},
error=function(e) {nTables_Process_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, reference the table numbers and try again')})
}

# Extract each Table ----
for (dc in unique(nTables_Process)) {
cat("\n")
cli_alert_info("Processing Table {dc} of {nTables}")
cli_h1("Table Name")
Expand All @@ -96,15 +120,15 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file =
cat(meta_json$dataModel$childDataClasses[[dc]]$lastUpdated, "\n", fill = TRUE)

table_desc <- ""
while (table_desc != "Y" & table_desc != "N") {
while (table_desc != "Y" & table_desc != "y" & table_desc != "N" & table_desc != "n") {
cat("\n \n")
table_desc <- readline(prompt = "Would you like to read a description of the table? (Y/N) ")
table_desc <- readline(prompt = "Would you like to read a description of the table? (y/n): ")
}

if (table_desc == "Y") {
if (table_desc == 'Y' | table_desc == 'y') {
cli_h1("Table Description")
cat(meta_json$dataModel$childDataClasses[[dc]]$description, fill = TRUE)
readline(prompt = "Press [enter] to proceed")
readline(prompt = "Press any key to proceed")
}

thisTable <- meta_json$dataModel$childDataClasses[[dc]]$childDataElements # probably a better way of dealing with complex json files in R ...
Expand All @@ -125,78 +149,80 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file =

output_fname <- paste0("LOG_", gsub(" ", "", meta_json$dataModel$label), "_", gsub(" ", "", meta_json$dataModel$childDataClasses[[dc]]$label), "_", timestamp_now, ".csv")

Output <- data.frame(
Initials = c(""),
MetaDataVersion = c(""),
MetaDataLastUpdated = c(""),
DomainListDesc = c(""),
Dataset = c(""),
Table = c(""),
DataElement = c(""),
Domain_code = c(""),
Note = c("")
row_Output <- data.frame(
Initials = character(0),
MetaDataVersion = character(0),
MetaDataLastUpdated = character(0),
DomainListDesc = character(0),
Dataset = character(0),
Table = character(0),
DataElement = character(0),
Domain_code = character(0),
Note = character(0)
)

# User inputs ----
# Loop through each data element, request response from the user to match to a domain ----

cat("\n \n")
select_vars_n <- readline(prompt = "Enter the range of Data Elements to process. Press Enter to process all: ")
if (select_vars_n == "") {
start_var <- 1
end_var <- length(thisTable)
} else {
seperate_vars <- unlist(strsplit(select_vars_n, ","))
start_var <- as.numeric(seperate_vars[1])
end_var <- as.numeric(seperate_vars[2])
}
# if it's the demo run, only loop through a max of 20 data elements
if (is.null(json_file) && is.null(domain_file) && nrow(selectTable_df) > 20) {
end_var = 20
} else {end_var = nrow(selectTable_df)}

# Loop through each data element, request response from the user to match to a domain ----
for (datavar in start_var:end_var) {
Output <- row_Output
for (datavar in 1:end_var) {
cat("\n \n")
cli_alert_success("Processing data element {datavar} of {end_var}")
datavar_index <- which(lookup$DataElement == selectTable_df$Label[datavar]) #we should code this to ignore the case
lookup_subset <- lookup[datavar_index,]
if (nrow(lookup_subset) == 1) {
# auto categorisations
Output[nrow(Output) + 1, ] <- NA #why?
Output$DataElement[datavar] <- selectTable_df$Label[datavar]
Output$Domain_code[datavar] <- lookup_subset$DomainCode
Output$Note[datavar] <- "AUTO CATEGORISED"
this_Output <- row_Output
this_Output[nrow(this_Output) + 1 , ] <- NA
this_Output$DataElement[1] <- selectTable_df$Label[datavar]
this_Output$Domain_code[1] <- lookup_subset$DomainCode
this_Output$Note[1] <- "AUTO CATEGORISED"
Output <- rbind(Output,this_Output)
utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely
} else {
# collect user responses
decision_output <- user_categorisation(selectTable_df$Label[datavar],selectTable_df$Description[datavar],selectTable_df$Type[datavar])
# input user responses into output
Output[nrow(Output) + 1, ] <- NA #why?
Output$DataElement[datavar] <- selectTable_df$Label[datavar]
Output$Domain_code[datavar] <- decision_output$decision
Output$Note[datavar] <- decision_output$decision_note
}

# Fill in columns that have all rows identical
Output$Initials <- User_Initials
Output$MetaDataVersion <- meta_json$dataModel$documentationVersion
Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated
Output$DomainListDesc <- DomainListDesc
Output$Dataset <- meta_json$dataModel$label
Output$Table <- meta_json$dataModel$childDataClasses[[dc]]$label

# Save as we go in case session terminates prematurely
Output[Output == ""] <- NA
utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely
this_Output <- row_Output
this_Output[nrow(this_Output) + 1 , ] <- NA
this_Output$DataElement[1] <- selectTable_df$Label[datavar]
this_Output$Domain_code[1] <- decision_output$decision
this_Output$Note[1] <- decision_output$decision_note
Output <- rbind(Output,this_Output)
utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely
}
} # end of loop for DataElement

# Print the AUTO CATEGORISED responses for this Table - request review
## Print the AUTO CATEGORISED responses for this Table and request review ----
Output_auto <- subset(Output, Note == 'AUTO CATEGORISED')
cat("\n \n")
cli_alert_warning("Please check the auto categorised data elements are accurate:")
cat("\n \n")
print(Output_auto[, c("Table", "DataElement", "Domain_code")])
cli_alert_warning("Please check the auto categorised data elements are accurate for table {meta_json$dataModel$childDataClasses[[dc]]$label}:")
cat("\n \n")
auto_row_str <- readline(prompt = "Enter row numbers you'd like to edit or press enter to accept the auto categorisations: ")

if (auto_row_str != "") {
print(Output_auto[, c("DataElement", "Domain_code","Note")])

# extract the rows to edit
auto_row_Error <- TRUE
auto_row_InRange <- TRUE
while (auto_row_Error==TRUE | auto_row_InRange == FALSE) {
if (auto_row_InRange == FALSE) {
cli_alert_danger('The row numbers you provided are not in range. Reference the auto categorised row numbers on the screen and try again')}
tryCatch({
cat("\n \n");
cli_alert_info("Press enter to accept the auto categorisations for table {meta_json$dataModel$childDataClasses[[dc]]$label} or enter each row you'd like to edit:");
cat("\n");
auto_row <- scan(file="",what=0);
auto_row_Error <- FALSE;
auto_row_InRange <- all(auto_row %in% which(Output$Note == 'AUTO CATEGORISED'))},
error=function(e) {auto_row_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, try again')})
}

auto_row <- as.integer(unlist(strsplit(auto_row_str,","))) #probably sub-optimal coding
if (length(auto_row) != 0) {

for (datavar_auto in auto_row) {
for (datavar_auto in unique(auto_row)) {

# collect user responses
decision_output <- user_categorisation(selectTable_df$Label[datavar_auto],selectTable_df$Description[datavar_auto],selectTable_df$Type[datavar_auto])
Expand All @@ -206,26 +232,38 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file =
}
}

# Ask if user wants to review their responses for this Table
## Ask if user wants to review their responses for this Table ----
review_cats <- ""
while (review_cats != "Y" & review_cats != "N") {
while (review_cats != "Y" & review_cats != "y" & review_cats != "N" & review_cats != "n") {
cat("\n \n")
review_cats <- readline(prompt = "Would you like to review your categorisations? (Y/N) ")
review_cats <- readline(prompt = "Would you like to review your categorisations? (y/n): ")
}

if (review_cats == 'Y') {

if (review_cats == 'Y' | review_cats == 'y') {
Output_not_auto <- subset(Output, Note != 'AUTO CATEGORISED')
cat("\n \n")
print(Output_not_auto[, c("Table", "DataElement", "Domain_code")])
print(Output_not_auto[, c("DataElement", "Domain_code","Note")])
cat("\n \n")
not_auto_row_str <- readline(prompt = "Enter row numbers you'd like to edit or press enter to accept: ")

if (not_auto_row_str != "") {
# extract the rows to edit
not_auto_row_Error <- TRUE
not_auto_row_InRange <- TRUE
while (not_auto_row_Error==TRUE | not_auto_row_InRange == FALSE) {
if (not_auto_row_InRange == FALSE) {
cli_alert_danger('The row numbers you provided are not in range. Reference the row numbers on the screen and try again')}
tryCatch({
cat("\n \n");
cli_alert_info("Press enter to accept your categorisations for table {meta_json$dataModel$childDataClasses[[dc]]$label} or enter each row you'd like to edit:");
cat("\n");
not_auto_row <- scan(file="",what=0);
not_auto_row_Error <- FALSE;
not_auto_row_InRange <- all(not_auto_row %in% which(Output$Note != 'AUTO CATEGORISED'))},
error=function(e) {not_auto_row_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, reference the row numbers and try again')})
}

not_auto_row <- as.integer(unlist(strsplit(not_auto_row_str,","))) #probably sub-optimal coding
if (length(not_auto_row) != 0) {

for (datavar_not_auto in not_auto_row) {
for (datavar_not_auto in unique(not_auto_row)) {

# collect user responses
decision_output <- user_categorisation(selectTable_df$Label[datavar_not_auto],selectTable_df$Description[datavar_not_auto],selectTable_df$Type[datavar_not_auto])
Expand All @@ -236,11 +274,18 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file =
}
}

# Save final categorisations for this Table
Output[Output == ""] <- NA
## Fill in columns that have all rows identical ----
Output$Initials <- User_Initials
Output$MetaDataVersion <- meta_json$dataModel$documentationVersion
Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated
Output$DomainListDesc <- DomainListDesc
Output$Dataset <- meta_json$dataModel$label
Output$Table <- meta_json$dataModel$childDataClasses[[dc]]$label

## Save final categorisations for this Table ----
utils::write.csv(Output, output_fname, row.names = FALSE)
cat("\n")
cli_alert_info("Your final categorisations have been saved to {output_fname}")
cli_alert_success("Your final categorisations have been saved to {output_fname}")

} # end of loop for each table

Expand Down
31 changes: 10 additions & 21 deletions R/user_categorisation.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,18 @@ user_categorisation <- function(data_element,data_desc,data_type) {
"\n\nDATA TYPE -----> ", data_type, "\n"
))

state <- "redo"
while (state == "redo") {
# ask user for categorisation

# ask user for categorisation
decision <- ""
while (decision == "") {
cat("\n \n")
decision <- readline(prompt = "Categorise this data element: ")
}
decision <- ""
cat("\n \n")
while (decision == "") {
decision <- readline("Categorise this data element into one or more domains, e.g. 5 or 5,8: ")
}

# ask user for note on categorisation
decision_note <- ""
while (decision_note == "") {
cat("\n \n")
decision_note <- readline(prompt = "Notes (write 'N' if no notes): ")
}
# ask user for note on categorisation
cat("\n \n")
decision_note <- readline("Optional note to explain decision (or press enter to continue): ")

# check if user wants to continue or redo
cat("\n \n")
state <- readline(prompt = "Press enter to continue or write 'redo' to correct previous answer: ")
return(list(decision = decision,decision_note = decision_note))

}

return(list(decision = decision,decision_note = decision_note))

}
Loading
Loading