Skip to content

Commit 3878413

Browse files
authored
Merge pull request #205 from aim-rsf/user_testing
last checks before release
2 parents 824deb0 + 056bc6c commit 3878413

7 files changed

+24
-23
lines changed

R/inputs_collect.R

+5-8
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#' @importFrom cli cli_alert_info
1515
#' @importFrom utils read.csv
1616
#' @importFrom tools file_path_sans_ext
17-
#' @importFrom dplyr left_join
1817
#' @keywords internal
1918
#' @family metadata_map_internal
2019
#' @dev generate help files for unexported objects, for developers
@@ -105,8 +104,6 @@ data_load <- function(metadata_file, domain_file, look_up_file, quiet = FALSE) {
105104
if (!all(colnames(lookup) == expected_column_names)) {
106105
stop("look_up file does not have expected column names")
107106
}
108-
# Add Domain_Code column into lookup table
109-
lookup <- lookup %>% left_join(domains, by = "Domain_Name")
110107
# Check for look_up rows not covered by domain_list
111108
no_match <- lookup[!lookup$Domain_Name %in% domains$Domain_Name, ]
112109
if (nrow(no_match) != 0) {
@@ -148,16 +145,16 @@ data_load <- function(metadata_file, domain_file, look_up_file, quiet = FALSE) {
148145
#' @dev generate help files for unexported objects, for developers
149146

150147
output_copy <- function(dataset_name, output_dir, quiet = FALSE) {
151-
o_search <- paste0("^MAPPING_", gsub(" ", "", dataset_name), "*")
148+
o_search <- paste0("^MAPPING_", gsub(" ", "", dataset_name), ".*\\.csv$")
152149
csv_list <- data.frame(file = list.files(output_dir, pattern = o_search))
153150
if (nrow(csv_list) != 0) {
154151
df_list <- lapply(file.path(output_dir, csv_list$file), read.csv)
155152
df_prev <- do.call("rbind", df_list) # combine all df
156153
## make a new date column, order by earliest, remove duplicates & auto
157-
df_prev$time2 <- as.POSIXct(df_prev$timestamp, format = "%Y-%m-%d-%H-%M-%S")
154+
df_prev$time2 <- as.POSIXct(df_prev$timestamp, format = "%Y-%m-%d %H:%M:%S")
158155
df_prev <- df_prev[order(df_prev$time2), ]
159156
df_prev <- df_prev %>% distinct(variable, .keep_all = TRUE)
160-
df_prev <- df_prev[-(which(df_prev$note %in% "AUTO CATEGORISED")), ]
157+
df_prev <- df_prev[!(df_prev$note %in% "AUTO CATEGORISED"), ]
161158
df_prev_exist <- TRUE
162159
if (!quiet) {
163160
cli_alert_info(paste0("Copying from previous session(s):\n",
@@ -168,6 +165,6 @@ output_copy <- function(dataset_name, output_dir, quiet = FALSE) {
168165
df_prev_exist <- FALSE
169166
}
170167

171-
copy_prev <- list(df_prev = df_prev, df_prev_exist = df_prev_exist)
172-
copy_prev
168+
list(df_prev = df_prev, df_prev_exist = df_prev_exist)
169+
173170
}

R/metadata_map.R

+10-7
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ select.list <- NULL
5151
#' metadata_map(output_dir = temp_output_dir)
5252
#' }
5353
#' @export
54-
#' @importFrom dplyr %>% filter
54+
#' @importFrom dplyr %>% filter left_join
5555
#' @importFrom cli cli_alert_info cli_alert_success
5656
#' @importFrom utils packageVersion write.csv browseURL menu select.list
5757
#' @importFrom ggplot2 ggsave
@@ -94,7 +94,10 @@ metadata_map <- function(
9494
# SECTION 1 - DEFINE & PREPARE INPUTS ----
9595

9696
## Use 'data_load.R' to collect inputs (defaults or user inputs)
97-
data <- data_load(metadata_file, domain_file, look_up_file)
97+
data <- data_load(metadata_file, domain_file, look_up_file, quiet)
98+
99+
## Add Domain_Code column into lookup table
100+
data$lookup <- data$lookup %>% left_join(data$domains, by = "Domain_Name")
98101

99102
## Extract Dataset from metadata_file
100103
dataset <- data$metadata
@@ -129,7 +132,7 @@ metadata_map <- function(
129132
if (length(existing_files) > 0) {
130133
cli_alert_warning(paste("A bar plot already exists for this dataset, saved",
131134
"in your output directory.\nSkipping creation",
132-
"of a new plot and opening existing plot.\n\n"))
135+
"of a new plot.\n\n"))
133136
} else {
134137
bar_title <- paste0("\n'", dataset_name, "' contains ", n_tables,
135138
" table(s)")
@@ -173,13 +176,12 @@ metadata_map <- function(
173176

174177
#### Use 'output_copy.R' to copy from previous output(s) if they exist
175178
if (table_copy == TRUE) {
176-
copy_prev <- output_copy(dataset_name, output_dir)
179+
copy_prev <- output_copy(dataset_name, output_dir, quiet)
177180
df_prev_exist <- copy_prev$df_prev_exist
178181
df_prev <- copy_prev$df_prev
179182
} else {
180183
df_prev_exist <- FALSE
181184
}
182-
183185
table_note <- readline(paste("Optional note about this table: "))
184186

185187
#### Extract table from metadata
@@ -202,9 +204,10 @@ metadata_map <- function(
202204
table_df,
203205
df_prev_exist,
204206
df_prev,
205-
lookup = data$lookup,
207+
data$lookup,
206208
n_codes,
207-
output_df
209+
output_df,
210+
quiet
208211
)
209212

210213
output_df$timestamp <- timestamp_now

R/user_interactions.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ user_categorisation <- function(var, desc, type,
4949
suppressWarnings(decision_int_max <- max(decision_int, na.rm = TRUE))
5050
suppressWarnings(decision_int_min <- min(decision_int, na.rm = TRUE))
5151
if (decision_int_na == TRUE || decision_int_max > domain_code_max ||
52-
decision_int_min < 0) {
52+
decision_int_min < 1) {
5353
cli_alert_warning(paste("Formatting is invalid or integer out of",
5454
"range. Provide one integer or a comma",
5555
"seperated list of integers."))
@@ -123,7 +123,7 @@ user_categorisation_loop <- function(start_v, end_v, table_df, df_prev_exist,
123123

124124
##### search if variable matches any variable from previous table
125125
if (df_prev_exist == TRUE) {
126-
data_v_index <- which(df_prev$var ==
126+
data_v_index <- which(df_prev$variable ==
127127
table_df$Column.name[data_v])
128128
df_prev_subset <- df_prev[data_v_index, ]
129129
} else {

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ Add a note for processing this table:
8787
8888
Optional note about this table: Demo run
8989
```
90-
This table has 35 variables (see 'n of 35' below) but the demo run will only process the first 5 variables. If it skips over a variable (as is the case with 1, 2 and 3) this means it has been auto-categorised. Variable 4 has not been auto-categorised and is asking you to categorise it. You will be asked to categorise a variable with one (or more) of the numbers shown in the [key that has appeared in your plots tab](https://github.com/aim-rsf/mapmetadata/tree/main/inst/outputs/plots_tab_demo_domains.png). We input '8' which means 'Health Info' as defined by the key. The demo simplifies domains for demonstration purposes; for a research study, your domains are likely to be much more specific e.g. ‘Prenatal, antenatal, neonatal and birth’ or ‘Health behaviours and diet’.
90+
This table has 35 variables (see 'n of 35' below) but the demo run will only process the first 5 variables (by default). If it skips over a variable (as is the case with 1, 2 and 3) this means it has been auto-categorised. Variable 4 has not been auto-categorised and is asking you to categorise it. You will be asked to categorise a variable with one (or more) of the numbers shown in the [key that has appeared in your plots tab](https://github.com/aim-rsf/mapmetadata/tree/main/inst/outputs/plots_tab_demo_domains.png). We input '8' which means 'Health Info' as defined by the key. The demo simplifies domains for demonstration purposes; for a research study, your domains are likely to be much more specific e.g. ‘Prenatal, antenatal, neonatal and birth’ or ‘Health behaviours and diet’.
9191

9292
```
9393
ℹ Table variable 1 of 35 (5 left to process)

codemeta.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@
226226
},
227227
"SystemRequirements": null
228228
},
229-
"fileSize": "2425.471KB",
229+
"fileSize": "2451.978KB",
230230
"citation": [
231231
{
232232
"@type": "SoftwareSourceCode",

tests/testthat/test-metadata_map.R

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ test_that("metadata_map function works correctly with user input", {
3737

3838
local_mocked_bindings(
3939
user_categorisation_loop = function(start_v, end_v, table_df, df_prev_exist,
40-
df_prev, lookup, df_plots, output_df) {
40+
df_prev, lookup, df_plots, output_df,
41+
quiet) {
4142
output_df <- read.csv(demo_output)
4243
output_df$timestamp <- NA
4344
output_df$table <- NA

vignettes/mapmetadata.Rmd

+3-3
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ It is recommended to view the descriptions of each table to give you more contex
4141

4242
# Understanding mapping outputs
4343

44-
Running [metadata_map](https://aim-rsf.github.io/mapmetadata/reference/metadata_map.html) will run this function in demo mode (as explained in the README) and generate four files in your project directory.
44+
Running [metadata_map](https://aim-rsf.github.io/mapmetadata/reference/metadata_map.html) will run this function in demo mode (as explained in the README) and generate six files in your project directory.
4545

4646
By default, the demo mode processes the first 5 variables in a table. Running `metadata_map(demo_number = 20)` will process the first 20 variables in a table.
4747
Here you can [view outputs generated from this longer demo run](https://github.com/aim-rsf/mapmetadata/tree/main/inst/outputs/), which include:
@@ -166,6 +166,6 @@ If it finds other MAPPING files with the same ID_Name in the output directory, a
166166
Selection: 4
167167
Processing Table 4 of 13 (CHILD)
168168

169-
Copying from previous session(s): MAPPING_360_NCCHD_CHILD_2025-02-14-18-14-01.csv
169+
Copying from previous session(s): MAPPING_360_NCCHD_BLOOD_TEST_2025-02-14-18-14-01.csv
170170
```
171-
Instead of 'AUTO CATEGORISED' the note will say 'COPED FROM: CHILD' but you always have the option to manually override this copying during your review, or turn it off altogether with the option `table_copy = FASLE`.
171+
Instead of 'AUTO CATEGORISED' the note will say 'COPED FROM: BLOOD_TEST' but you always have the option to manually override this copying during your review, or turn it off altogether with the option `table_copy = FASLE`.

0 commit comments

Comments
 (0)