Skip to content

Commit 33ed942

Browse files
authored
Merge pull request #67 from aim-rsf/improve-auto
Improve manual checking of categorisations
2 parents a177547 + b1654f2 commit 33ed942

6 files changed

+231
-87
lines changed

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export(domain_mapping)
4+
export(user_categorisation)
45
import(cli)
56
import(devtools)
67
import(grid)

R/domain_mapping.R

+82-42
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
#' # Respond 'Demo List ' for the description of domain list.
1717
#' # Respond 'Y' if you want to see the descriptions printed out.
1818
#' # Respond '1,10' to the RANGE OF VARIABLES prompt (or process the full 93 variables if you like!)
19-
#' # Reference the plot tab and categorise each variable into a single ('1')
20-
#' # or multiple ('1,2') domain.
19+
#' # Reference the plot tab and categorise each variable into a single ('1') domain
20+
#' # or multiple ('1,2') domains.
2121
#' # Write a note explaining your category choice (optional).
2222
#' @export
2323
#' @importFrom graphics plot.new
@@ -50,11 +50,14 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) {
5050
domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ALF ID*"), c("*OTHER ID*"), c("*DEMOGRAPHICS*"), domains)
5151
gridExtra::grid.table(domains_extend[1], cols = "Domain", rows = 0:(nrow(domains_extend) - 1))
5252

53+
# temp - delete later
54+
cat("\n You are in the improve-auto branch \n")
55+
5356
# Get user and demo list info for log file ----
5457
User_Initials <- ""
5558
while (User_Initials == "") {
5659
cat("\n \n")
57-
User_Initials <- readline(prompt = "ENTER INITIALS: ")
60+
User_Initials <- readline(prompt = "Enter Initials: ")
5861
}
5962

6063
# Print information about Data Asset ----
@@ -135,7 +138,7 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) {
135138
# User inputs ----
136139

137140
cat("\n \n")
138-
select_vars_n <- readline(prompt = "RANGE OF VARIABLES (DATA ELEMENTS) TO PROCESS (write as 'start_var,end_var' or press Enter to process all): ")
141+
select_vars_n <- readline(prompt = "Enter the range of variables (data elements) to process. Press Enter to process all: ")
139142
if (select_vars_n == "") {
140143
start_var <- 1
141144
end_var <- length(thisDataClass)
@@ -173,7 +176,6 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) {
173176
Output$Domain_code[datavar] <- "2"
174177
Output$Note[datavar] <- "AUTO CATEGORISED"
175178
} else if (grepl("_ID_", selectDataClass_df$Label[datavar], ignore.case = TRUE)) { # picking up generic IDs
176-
177179
Output[nrow(Output) + 1, ] <- NA
178180
Output$DataElement[datavar] <- selectDataClass_df$Label[datavar]
179181
Output$Domain_code[datavar] <- "3"
@@ -201,50 +203,88 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) {
201203
Output$Domain_code[datavar] <- "4"
202204
Output$Note[datavar] <- "AUTO CATEGORISED"
203205
} else {
204-
# user response
205-
cat(paste(
206-
"\nDATA ELEMENT -----> ", selectDataClass_df$Label[datavar],
207-
"\n\nDESCRIPTION -----> ", selectDataClass_df$Description[datavar],
208-
"\n\nDATA TYPE -----> ", selectDataClass_df$Type[datavar], "\n"
209-
))
210-
211-
decision <- ""
212-
while (decision == "") {
213-
cat("\n \n")
214-
decision <- readline(prompt = "CATEGORISE THIS VARIABLE (input a comma separated list of domain numbers): ")
215-
}
216-
217-
decision_note <- ""
218-
while (decision_note == "") {
219-
cat("\n \n")
220-
decision_note <- readline(prompt = "NOTES (write 'N' if no notes): ")
221-
}
222206

207+
# collect user responses
208+
decision_output <- user_categorisation(selectDataClass_df$Label[datavar],selectDataClass_df$Description[datavar],selectDataClass_df$Type[datavar])
209+
# input user responses into output
223210
Output[nrow(Output) + 1, ] <- NA
224211
Output$DataElement[datavar] <- selectDataClass_df$Label[datavar]
225-
Output$Domain_code[datavar] <- decision
226-
Output$Note[datavar] <- decision_note
212+
Output$Domain_code[datavar] <- decision_output$decision
213+
Output$Note[datavar] <- decision_output$decision_note
214+
}
215+
216+
# Fill in columns that have all rows identical
217+
Output$Initials <- User_Initials
218+
Output$MetaDataVersion <- meta_json$dataModel$documentationVersion
219+
Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated
220+
Output$DomainListDesc <- DomainListDesc
221+
Output$DataAsset <- meta_json$dataModel$label
222+
Output$DataClass <- meta_json$dataModel$childDataClasses[[dc]]$label
223+
224+
# Save as we go in case session terminates prematurely
225+
Output[Output == ""] <- NA
226+
utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely
227+
} # end of loop for variable
228+
229+
# Print the AUTO CATEGORISED responses for this DataClass - request review
230+
Output_auto <- subset(Output, Note == 'AUTO CATEGORISED')
231+
cat("\n \n")
232+
cli_alert_warning("Please check the auto categorised data elements are accurate:")
233+
cat("\n \n")
234+
print(Output_auto[, c("DataClass", "DataElement", "Domain_code")])
235+
cat("\n \n")
236+
auto_row_str <- readline(prompt = "Enter row numbers you'd like to edit or press enter to accept the auto categorisations: ")
237+
238+
if (auto_row_str != "") {
239+
240+
auto_row <- as.integer(unlist(strsplit(auto_row_str,","))) #probably sub-optimal coding
241+
242+
for (datavar_auto in auto_row) {
243+
244+
# collect user responses
245+
decision_output <- user_categorisation(selectDataClass_df$Label[datavar_auto],selectDataClass_df$Description[datavar_auto],selectDataClass_df$Type[datavar_auto])
246+
# input user responses into output
247+
Output$Domain_code[datavar_auto] <- decision_output$decision
248+
Output$Note[datavar_auto] <- decision_output$decision_note
227249
}
228250
}
229251

230-
# Fill in columns that have all rows identical
231-
Output$Initials <- User_Initials
232-
Output$MetaDataVersion <- meta_json$dataModel$documentationVersion
233-
Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated
234-
Output$DomainListDesc <- DomainListDesc
235-
Output$DataAsset <- meta_json$dataModel$label
236-
Output$DataClass <- meta_json$dataModel$childDataClasses[[dc]]$label
252+
# Ask if user wants to review their responses for this DataClass
253+
review_cats <- ""
254+
while (review_cats != "Y" & review_cats != "N") {
255+
cat("\n \n")
256+
review_cats <- readline(prompt = "Would you like to review your categorisations? (Y/N) ")
257+
}
258+
259+
if (review_cats == 'Y') {
260+
261+
Output_not_auto <- subset(Output, Note != 'AUTO CATEGORISED')
262+
cat("\n \n")
263+
print(Output_not_auto[, c("DataClass", "DataElement", "Domain_code")])
264+
cat("\n \n")
265+
not_auto_row_str <- readline(prompt = "Enter row numbers you'd like to edit or press enter to accept: ")
266+
267+
if (not_auto_row_str != "") {
268+
269+
not_auto_row <- as.integer(unlist(strsplit(not_auto_row_str,","))) #probably sub-optimal coding
270+
271+
for (datavar_not_auto in not_auto_row) {
237272

238-
# Save file & print the responses to be saved
273+
# collect user responses
274+
decision_output <- user_categorisation(selectDataClass_df$Label[datavar_not_auto],selectDataClass_df$Description[datavar_not_auto],selectDataClass_df$Type[datavar_not_auto])
275+
# input user responses into output
276+
Output$Domain_code[datavar_not_auto] <- decision_output$decision
277+
Output$Note[datavar_not_auto] <- decision_output$decision_note
278+
}
279+
}
280+
}
281+
282+
# Save final categorisations for this DataClass
239283
Output[Output == ""] <- NA
240-
utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely
284+
utils::write.csv(Output, output_fname, row.names = FALSE)
241285
cat("\n")
242-
cli_alert_info("The below responses will be saved to {output_fname}")
243-
cat("\n")
244-
print(Output[, c("DataClass", "DataElement", "Domain_code", "Note")])
245-
}
286+
cli_alert_info("Your final categorisations have been saved to {output_fname}")
287+
288+
} # end of loop for each data class
246289

247-
cat("\n \n")
248-
cli_alert_warning("Please check the auto categorised data elements are accurate!")
249-
cli_alert_warning("Manually edit csv file to correct errors, if needed.")
250-
}
290+
} # end of function

R/user_categorisation.R

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#' user_categorisation
2+
#'
3+
#' This function is used within the domain_mapping function. \cr \cr
4+
#' It displays data properties to the user and requests a categorisation into a domain. \cr \cr
5+
#' An optional note can be included with the categorisation.
6+
#'
7+
#' @param data_element Name of the variable
8+
#' @param data_desc Description of the variable
9+
#' @param data_type Data type of the variable
10+
#' @return It returns a list containing the decision and decision note
11+
#' @export
12+
13+
user_categorisation <- function(data_element,data_desc,data_type) {
14+
15+
# print text to R console
16+
cat(paste(
17+
"\nDATA ELEMENT -----> ", data_element,
18+
"\n\nDESCRIPTION -----> ", data_desc,
19+
"\n\nDATA TYPE -----> ", data_type, "\n"
20+
))
21+
22+
state <- "redo"
23+
while (state == "redo") {
24+
25+
# ask user for categorisation
26+
decision <- ""
27+
while (decision == "") {
28+
cat("\n \n")
29+
decision <- readline(prompt = "Categorise this variable: ")
30+
}
31+
32+
# ask user for note on categorisation
33+
decision_note <- ""
34+
while (decision_note == "") {
35+
cat("\n \n")
36+
decision_note <- readline(prompt = "Notes (write 'N' if no notes): ")
37+
}
38+
39+
# check if user wants to continue or redo
40+
cat("\n \n")
41+
state <- readline(prompt = "Press enter to continue or write 'redo' to correct previous answer: ")
42+
43+
}
44+
45+
return(list(decision = decision,decision_note = decision_note))
46+
47+
}

0 commit comments

Comments
 (0)