From a0770c56218ad1356172117590ac5fa5debe891d Mon Sep 17 00:00:00 2001 From: Daniel de la Rosa Date: Fri, 31 Dec 2021 12:04:43 -0400 Subject: [PATCH 1/9] feat (labels): warnings and encoding --- DESCRIPTION | 2 +- R/Labels.R | 128 ++++++++++++++++++--------- R/browse_dict.R | 22 ++++- R/parse_dict.R | 7 ++ _pkgdown.yml | 14 +++ man/set_labels.Rd | 4 +- man/use_labels.Rd | 4 +- tests/testthat/_snaps/Labels.md | 50 +++++++++-- tests/testthat/_snaps/browse_dict.md | 13 ++- tests/testthat/test-Labels.R | 6 ++ vignettes/labeler.Rmd | 35 ++++---- 11 files changed, 206 insertions(+), 79 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 772ad0e..3ec484a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: labeler Title: Easy to use and share data labels in R -Version: 0.4.3 +Version: 0.5.0 Authors@R: c(person(given = "Daniel E.", family = "de la Rosa", diff --git a/R/Labels.R b/R/Labels.R index 8c01548..da55d59 100644 --- a/R/Labels.R +++ b/R/Labels.R @@ -5,6 +5,7 @@ #' @param vars [character]: If specified, only labels are assigned to those variables #' @param dict [data.frame]: Dictionary with all the data labels to use #' @param ignore_case [logical]: Indicate if case sensitive should be ignored. +#' @param warn [logical]: Indicate if warnings should be shown. #' #' @return The data entered in the \code{tbl} argument but with data labels #' @@ -29,12 +30,20 @@ #' str(df) #' str(set_labels(df, dict = dict)) #' } -set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE) { +set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) { tbl - dict + enc <- dict[["encoding"]] + if (is.null(enc)) { + enc <- "" + } + dict <- dict[names(dict) != "encoding"] if (!is.null(vars)) { names <- vars - } else if (!is.null(tbl)) { # Validar luego las clases de tbl admitidas + } else if (!is.null(tbl)) { + if (!("data.frame" %in% class(tbl))) { # Validar las clases de tbl admitidas + warning("Operation not guaranteed for this class of objects. + Pass an object of type data.frame to ensure operation.") + } names <- names(tbl) } else { names <- NULL @@ -44,26 +53,41 @@ set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE) { if (ignore_case) { name <- names(dict)[tolower(names(dict)) == tolower(name)] } - tryCatch({ - lab <- dict[[name]]$lab - }, error = function(e){ - lab <- NULL - }) + tryCatch( + { + lab <- dict[[name]]$lab + }, + error = function(e) { + lab <- NULL + } + ) if (!is.null(lab)) { + lab <- decode_lab(lab, enc) lab <- validateLab(lab, dict) - lab <- decode_lab(lab) } - tryCatch({ - labs <- dict[[name]]$labs - }, error = function(e){ - labs <- NULL - }) - if(!is.null(labs)){ + tryCatch( + { + labs <- dict[[name]]$labs + }, + error = function(e) { + labs <- NULL + } + ) + if (!is.null(labs)) { labs <- validateLabs(labs, dict) - labs <- decode_labs(labs) + labs <- decode_labs(labs, enc) name <- names(tbl)[tolower(names(tbl)) == tolower(name)] tbl <- labellize(tbl, name, lab, labs) } + if (warn) { + tryCatch({ + if (!is.null(dict[[name]][["warn"]])) { + warning(paste0(name, ": ", dict[[name]][["warn"]])) + } + }, error = function(e){ + + }) + } } } tbl @@ -80,7 +104,11 @@ validateLab <- function(lab, dict) { if (all(!is.null(lab), is.character(lab))) { if (startsWith(lab, "link::")) { link <- strsplit(lab, "::")[[1]][[2]] - lab <- dict[[link]]$lab + if (!is.null(dict[[link]])) { + lab <- dict[[link]]$lab + } else { + lab <- paste0("The link '", link, "' is not defined in the dictionary") + } lab <- validateLab(lab, dict) } } else { @@ -95,7 +123,12 @@ validateLabs <- function(labs, dict) { if (all(length(labs) == 1, is.character(labs))) { if (startsWith(labs, "link::")) { link <- strsplit(labs, "::")[[1]][[2]] - labs <- dict[[link]]$labs + if (!is.null(dict[[link]])) { + labs <- dict[[link]]$labs + } else { + labs <- c(999) + names(labs) <- paste0("The link '", link, "' is not defined in the dictionary") + } labs <- validateLabs(labs, dict) } } @@ -110,18 +143,18 @@ validateLabs <- function(labs, dict) { } -decode_lab <- function(lab) { +decode_lab <- function(lab, enc) { if (!is.null(lab)) { - lab <- iconv(lab) + lab <- iconv(lab, to = enc) } lab } -decode_labs <- function(labs) { +decode_labs <- function(labs, enc) { if (!is.null(labs)) { for (lab in seq_along(names(labs))) { - names(labs)[lab] <- iconv(names(labs)[lab]) + names(labs)[lab] <- iconv(names(labs)[lab], to = enc) } } labs @@ -130,18 +163,24 @@ decode_labs <- function(labs) { labellize <- function(tbl, var_name, lab, labs) { if (!is.null(lab)) { - tryCatch({ - tbl[[var_name]] <- sjlabelled::set_label(tbl[[var_name]], label = lab) - }, error = function(e){ + tryCatch( + { + tbl[[var_name]] <- sjlabelled::set_label(tbl[[var_name]], label = lab) + }, + error = function(e) { - }) + } + ) } if (!is.null(labs)) { - tryCatch({ - tbl[[var_name]] <- sjlabelled::set_labels(tbl[[var_name]], labels = labs) - }, error = function(e){ + tryCatch( + { + tbl[[var_name]] <- sjlabelled::set_labels(tbl[[var_name]], labels = labs) + }, + error = function(e) { - }) + } + ) } tbl } @@ -160,6 +199,7 @@ labellize <- function(tbl, var_name, lab, labs) { #' @param ignore_case [logical]: Indicate if case sensitive should be ignored #' @param check [logical]: If TRUE (default), the function will check if values #' present in variable are valid data labels in dictionary. +#' @param warn [logical]: Indicate if warnings should be shown. #' #' @return The data supplied in the \code{tbl} argument, but instead of values #' using the corresponding data labels @@ -188,9 +228,14 @@ labellize <- function(tbl, var_name, lab, labs) { #' enft #' use_labels(enft, dict = dict) #' } -use_labels <- function(tbl, dict, vars = NULL, ignore_case = F, check = TRUE) { +use_labels <- function(tbl, + dict, + vars = NULL, + ignore_case = F, + check = TRUE, + warn = TRUE) { if (!is.null(dict)) { - tbl <- set_labels(tbl, dict, vars, ignore_case) + tbl <- set_labels(tbl, dict, vars, ignore_case, warn) } if (!is.null(vars)) { names <- vars @@ -202,14 +247,14 @@ use_labels <- function(tbl, dict, vars = NULL, ignore_case = F, check = TRUE) { if (all(!is.null(tbl), !is.na(vars))) { nulas <- character() for (name in names) { - var <- tbl[, name] + var <- tbl[[name]] # tbl[, name] tryCatch( { var2 <- sjlabelled::as_label(var) - if(check){ + if (check) { unicos <- unique(var) unicos <- unicos[!is.na(unicos)] - if(sum(is.na(var)) != sum(is.na(var2))){ + if (sum(is.na(var)) != sum(is.na(var2))) { nulas <- c(nulas, name) var2 <- var } @@ -220,13 +265,13 @@ use_labels <- function(tbl, dict, vars = NULL, ignore_case = F, check = TRUE) { } ) - tbl[, name] <- var + tbl[[name]] <- var # tbl[, name] <- var } - if(length(nulas) > 0){ - - warning(paste0("The following (", length(nulas),") variables contain values - that are not in the dictionary and were not labeled: \n", paste(nulas, collapse = ", "), '. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html" for more details.')) + if (warn) { + if (length(nulas) > 0) { + warning(paste0("The following (", length(nulas), ") variables contain values that are not in the dictionary and were not labeled: \n ", paste(nulas, collapse = ", "), '. + Please see "https://adatar-do.github.io/labeler/articles/labeler.html#warnings" for more details.')) + } } tbl } @@ -236,4 +281,3 @@ useLabels <- function(tbl, dict = NULL, vars = NULL) { deprecate_warn("0.1.1", "endomer::useLabels()", "use_labels()") use_labels(tbl, dict, vars) } - diff --git a/R/browse_dict.R b/R/browse_dict.R index e2bf84e..3564c7d 100644 --- a/R/browse_dict.R +++ b/R/browse_dict.R @@ -30,15 +30,29 @@ #' browse_dict(dict) #' } browse_dict <- function(dict, ...) { - datos <- data.frame(var = character(), lab = character(), labs = character()) + datos <- data.frame( + var = character(), + lab = character(), + labs = character(), + warn = character() + ) + enc <- dict[["encoding"]] + if(is.null(enc)){ + enc <- "" + } + dict <- dict[names(dict) != "encoding"] for (name in names(dict)) { datos[nrow(datos) + 1, "var"] <- name lab <- dict[[name]]$lab lab <- validateLab(lab, dict) - datos[nrow(datos), "lab"] <- decode_lab(lab) + datos[nrow(datos), "lab"] <- decode_lab(lab, enc) + warn <- dict[[name]]$warn + if(!is.null(warn)) { + datos[nrow(datos), "warn"] <- decode_lab(warn, enc) + } labs <- dict[[name]]$labs labs <- validateLabs(labs, dict) - labs <- decode_labs(labs) + labs <- decode_labs(labs, enc) labs2 <- "
" for (lab in seq_along(labs)) { labs2 <- paste0( @@ -75,4 +89,4 @@ browse_dict <- function(dict, ...) { } else { res } -} \ No newline at end of file +} diff --git a/R/parse_dict.R b/R/parse_dict.R index f2cb71a..92ff63d 100644 --- a/R/parse_dict.R +++ b/R/parse_dict.R @@ -18,6 +18,7 @@ #' } parse_dict <- function(dict) { for (var in names(dict)) { + if(!(var %in% c("encoding"))){ lab <- readr::parse_character(dict[[var]][["lab"]]) dict[[var]][["lab"]] <- lab labs <- names(dict[[var]][["labs"]]) @@ -29,6 +30,12 @@ parse_dict <- function(dict) { } names(dict[[var]][["labs"]]) <- labs } + warn <- dict[[var]][["warn"]] + if(!is.null(warn)) { + warn <- readr::parse_character(warn) + dict[[var]][["warn"]] <- warn + } + } } return(dict) } diff --git a/_pkgdown.yml b/_pkgdown.yml index c12f073..ded3b5c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,17 @@ +url: https://adatar-do.github.io/labeler/ + + template: bootstrap: 5 bootswatch: flatly + + +authors: + Daniel E. de la Rosa: + href: http://dnldelarosa.com + Adatar: + html:
Adatar Adatar
+ href: http://adatar.do + sidebar: + roles: [aut, cph] + diff --git a/man/set_labels.Rd b/man/set_labels.Rd index 6f350fe..5a6a27c 100644 --- a/man/set_labels.Rd +++ b/man/set_labels.Rd @@ -5,7 +5,7 @@ \title{Assign data labels to specified variables \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}} \usage{ -set_labels(tbl, dict, vars = NULL, ignore_case = FALSE) +set_labels(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) } \arguments{ \item{tbl}{\link{data.frame}: Data.frame with the data} @@ -15,6 +15,8 @@ set_labels(tbl, dict, vars = NULL, ignore_case = FALSE) \item{vars}{\link{character}: If specified, only labels are assigned to those variables} \item{ignore_case}{\link{logical}: Indicate if case sensitive should be ignored.} + +\item{warn}{\link{logical}: Indicate if warnings should be shown.} } \value{ The data entered in the \code{tbl} argument but with data labels diff --git a/man/use_labels.Rd b/man/use_labels.Rd index 367a664..2973f57 100644 --- a/man/use_labels.Rd +++ b/man/use_labels.Rd @@ -5,7 +5,7 @@ \title{Use the data labels of a variable instead of its values \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}} \usage{ -use_labels(tbl, dict, vars = NULL, ignore_case = F, check = TRUE) +use_labels(tbl, dict, vars = NULL, ignore_case = F, check = TRUE, warn = TRUE) } \arguments{ \item{tbl}{\link{data.frame}: Data.frame with the data} @@ -19,6 +19,8 @@ have not yet been assigned. See \code{Details}} \item{check}{\link{logical}: If TRUE (default), the function will check if values present in variable are valid data labels in dictionary.} + +\item{warn}{\link{logical}: Indicate if warnings should be shown.} } \value{ The data supplied in the \code{tbl} argument, but instead of values diff --git a/tests/testthat/_snaps/Labels.md b/tests/testthat/_snaps/Labels.md index 3f5d4fc..b565ee0 100644 --- a/tests/testthat/_snaps/Labels.md +++ b/tests/testthat/_snaps/Labels.md @@ -179,10 +179,9 @@ Code use_labels(datos, dict = dict, ignore_case = T) Warning - The following (1) variables contain values - that are not in the dictionary and were not labeled: - Altura. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html" for more details. + The following (1) variables contain values that are not in the dictionary and were not labeled: + Altura. + Please see "https://adatar-do.github.io/labeler/articles/labeler.html#warnings" for more details. Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre Hombre 1 Soltero 2 @@ -201,10 +200,9 @@ Code use_labels(dplyr::mutate(datos, Sexo2 = Sexo2 + 1), dict = dict) Warning - The following (1) variables contain values - that are not in the dictionary and were not labeled: - Sexo2. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html" for more details. + The following (1) variables contain values that are not in the dictionary and were not labeled: + Sexo2. + Please see "https://adatar-do.github.io/labeler/articles/labeler.html#warnings" for more details. Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre 2 1 Soltero 2 @@ -235,3 +233,39 @@ 9 Mujer Mujer 2 Viudo 4 10 Mujer Mujer 2 Viudo 4 +--- + + Code + use_labels(datos, dict = dict) + Warning + Sexo3: Warning + Output + SEXO Sexo2 Sexo3 ESTADO Altura + 1 Hombre Hombre 1 Soltero 2 + 2 Hombre Hombre 1 Soltero 2 + 3 Hombre Hombre 1 Soltero 2 + 4 Hombre Hombre 1 Soltero 2 + 5 Hombre Hombre 1 Casado 3 + 6 Mujer Mujer 2 Casado 3 + 7 Mujer Mujer 2 Casado 3 + 8 Mujer Mujer 2 Casado 3 + 9 Mujer Mujer 2 Viudo 4 + 10 Mujer Mujer 2 Viudo 4 + +--- + + Code + use_labels(datos, dict = dict, warn = FALSE) + Output + SEXO Sexo2 Sexo3 ESTADO Altura + 1 Hombre Hombre 1 Soltero 2 + 2 Hombre Hombre 1 Soltero 2 + 3 Hombre Hombre 1 Soltero 2 + 4 Hombre Hombre 1 Soltero 2 + 5 Hombre Hombre 1 Casado 3 + 6 Mujer Mujer 2 Casado 3 + 7 Mujer Mujer 2 Casado 3 + 8 Mujer Mujer 2 Casado 3 + 9 Mujer Mujer 2 Viudo 4 + 10 Mujer Mujer 2 Viudo 4 + diff --git a/tests/testthat/_snaps/browse_dict.md b/tests/testthat/_snaps/browse_dict.md index 8634bcb..d812ddc 100644 --- a/tests/testthat/_snaps/browse_dict.md +++ b/tests/testthat/_snaps/browse_dict.md @@ -7,8 +7,13 @@ 1 SEXO Sexo de la persona 2 ESTADO Estado marital de la persona 3 ALTURA Altura de la persona - labs - 1
1: Hombre
2: Mujer
- 2
1: Soltero
2: Casado
3: Viudo
- 3
1: Baja
2: Media
3: Alta
+ labs warn + 1
1: Hombre
2: Mujer
+ 2
1: Soltero
2: Casado
3: Viudo
+ 3
1: Baja
2: Media
3: Alta
+ +--- + + Code + browse_dict(dict) diff --git a/tests/testthat/test-Labels.R b/tests/testthat/test-Labels.R index 1df3420..5dae95d 100644 --- a/tests/testthat/test-Labels.R +++ b/tests/testthat/test-Labels.R @@ -44,9 +44,15 @@ test_that("labels", { expect_snapshot(use_labels(dplyr::mutate(datos, Sexo2 = Sexo2 + 1), dict = dict)) expect_null(set_labels(NULL, NULL, NULL)) + expect_null(use_labels(NULL, NULL, NULL)) dict[["Sexo3"]] <- list(lab = 5, labs = c(1, 2)) dict[["Sexo4"]] <- list(lab = "link::Sexo3", labs = c(1, 2)) expect_snapshot(use_labels(datos, dict = dict)) + dict[["encoding"]] <- "latin1" + dict[["Sexo3"]][["warn"]] <- "Warning" + expect_snapshot(use_labels(datos, dict = dict)) + expect_snapshot(use_labels(datos, dict = dict, warn = FALSE)) + }) diff --git a/vignettes/labeler.Rmd b/vignettes/labeler.Rmd index f717872..b7926ab 100644 --- a/vignettes/labeler.Rmd +++ b/vignettes/labeler.Rmd @@ -21,7 +21,7 @@ library(labeler) Historically, software like SPSS, SAS or Stata have worked with labelled data. The closest function in R base corresponds to factors, which actually work in reverse, assigning values to text variables with few distinct values. -Being able to work with labelled data is one of the features that R users who come from the previously mentioned systems miss the most. +Being able to work with labelled data is one of the features that R users who come from the previously mentioned systems miss the most. Especially when processing survey and census data. In the R ecosystem there are 3 fundamental packages for working with labelled data `haven`, `labelled` y `sjlabelled`[^1]. @@ -82,11 +82,10 @@ browse_dict(dict) ## Setting the labels -The function `set_labels ()`, has as main objective to take the labels of variables and values available in a dictionary and apply them to a data table or dataframe. +The function `set_labels()`, has as main objective to take the labels of variables and values available in a dictionary and apply them to a data table or dataframe. -The following are basic examples of using this feature. For the sake of familiarity, we will call the data set `enft` which is the acronym for the National Survey of the Dominican Republic Workforce, hinting that the way in which the functions are used in these examples is the way in which the one you will use when working with these surveys. ```{r} -enft <- data.frame( +mydata <- data.frame( GENDER = c(rep(1, 5), rep(2, 5)), AGE = c(seq(1, 30, 3)), SEX = c(rep(1, 5), rep(2, 5)), @@ -94,25 +93,25 @@ enft <- data.frame( ) ``` -Notice in the code below that just by applying the `set_labels ()` function our data now contains labels we can use in our analysis. +In the code below, just by applying the `set_labels ()` function our data now contains labels we can use in our analysis. ```{r} -str(enft) +str(mydata) -str(set_labels(enft, dict = dict)) +str(set_labels(mydata, dict = dict)) ``` -On the other hand, it is important to mention that as indicated in the description of the function's arguments, it is possible to assign the data labels to specific variables. +It is important to mention that as indicated in the description of the function's arguments, it is possible to assign the data labels to specific variables. ```{r} -str(set_labels(enft, dict = dict, vars = c("GENDER"))) +str(set_labels(mydata, dict = dict, vars = c("GENDER"))) ``` Compare this result with the previous one to notice that in this case the `GENDER` variable contains data labels, as indicated in the function call. If you have paid enough attention to the output of the code, you will have noticed that the data labels have never been assigned to the `MARRIED` variable. This is so, because R is case-sensitive, which means that `MARRIED` as stated in the dataset is not the same as `Married` as stated in the dictionary. Fortunately, the `ignore_case` argument of the `set_labels` function is designed especially for this purpose. As shown below, using this argument you can make labels are assigned to this variable as well. ```{r} -str(set_labels(enft, dict = dict, ignore_case = T)) +str(set_labels(mydata, dict = dict, ignore_case = T)) ``` However, depending on the size and origin of your data set and dictionary, you should be cautious with this option, to avoid unexpected results. So it might be a good idea to use this argument when specifying target variables. @@ -123,19 +122,19 @@ Also, if for some reason you want to overwrite some of the predefined tags in th ```{r} dict[["SEX"]]$labs <- c("M" = 1, "W" = 2) -str(set_labels(enft, dict = dict, vars = c("SEX"))) +str(set_labels(mydata, dict = dict, vars = c("SEX"))) ``` -##chec Using the labels +## Using the labels As you may have noticed in all the previous examples, we have used the `str()` function to show the changes in the data, resulting from the application of the labels. This is so, because even having assigned the labels, your data still looks exactly the same. At this point the `use_labels()` function comes into play, which allows us to substitute the values of a variable for the corresponding labels. ```{r} -use_labels(enft, dict = dict, ignore_case = T) +use_labels(mydata, dict = dict, ignore_case = T) ``` You can use the `set_labels()` function to assign the labels at a point in the pipeline, and the `use_labels()` function at a later stage to use them. Or you can, as shown below and the easiest way, pass the dictionary directly to the `use_labels()` function which will take care of calling the `set_labels()` function automatically. Anyway, a more convenient option is to assign / use the labels once a summary table of the data has been generated. What has the advantage of operating on a smaller number of variables. ```{r} -enft %>% +mydata %>% dplyr::group_by( gender = GENDER, united = MARRIED @@ -147,7 +146,7 @@ enft %>% As can be seen in the previous example, even though the `GENDER` variable has been renamed, just by assigning the `ignore_case = TRUE` argument, the function was able to assign the data labels. While in the case of the variable `MARRIED` the name has been changed to one that is not present in the dictionary, so the function was not able to find the corresponding labels. This could have been achieved in the following way, for example: ```{r} -enft %>% +mydata %>% dplyr::group_by( gender = GENDER, married = MARRIED @@ -159,7 +158,7 @@ enft %>% Furthermore, regardless of which variables have assigned labels, it is possible to specify which of them you want to use. This is possible using the `vars` argument. Note also that the arguments can be combined, arriving at the desired result. ```{r} -enft %>% +mydata %>% dplyr::group_by( gender = GENDER, married = MARRIED @@ -169,10 +168,11 @@ enft %>% dplyr::rename("united" = "married") ``` +## Warnings When variables contain values that are not in the dictionary, `use_labels()` by **default** prevents labels from being assigned to these variables and prints a warning in the console indicating the variables that could not be labeled. You can assign the argument `use_labels(..., check = FALSE)` so that the labels are used regardless of those values that are not present in the dictionary, which will be converted to `NA`. In this sense, the best option is to add to the dictionary all the values that the variable has to take. If you just want to avoid the console warning, use the `vars` argument to specify the variables to be labelled, excluding the conflicting variables. ```{r} -enft %>% +mydata %>% dplyr::group_by( GENDER, SEX = SEX + 1 @@ -180,4 +180,3 @@ enft %>% dplyr::count() %>% use_labels(dict = dict) ``` - From a109acebe69ca22f807f3cb05b1090a4c5e12aa4 Mon Sep 17 00:00:00 2001 From: Daniel de la Rosa Date: Thu, 10 Feb 2022 21:53:42 -0400 Subject: [PATCH 2/9] feat (get_dict): new function added --- DESCRIPTION | 5 +++-- NAMESPACE | 1 + R/Labels.R | 2 +- R/get_dict.R | 31 +++++++++++++++++++++++++++++ man/get_dict.Rd | 24 ++++++++++++++++++++++ tests/testthat/_snaps/Labels.md | 4 ++-- tests/testthat/_snaps/get_dict.md | 11 ++++++++++ tests/testthat/_snaps/parse_dict.md | 15 ++++++++++++++ tests/testthat/test-get_dict.R | 13 ++++++++++++ tests/testthat/test-parse_dict.R | 10 ++++++++++ vignettes/labeler.Rmd | 11 ++++++---- 11 files changed, 118 insertions(+), 9 deletions(-) create mode 100644 R/get_dict.R create mode 100644 man/get_dict.Rd create mode 100644 tests/testthat/_snaps/get_dict.md create mode 100644 tests/testthat/_snaps/parse_dict.md create mode 100644 tests/testthat/test-get_dict.R create mode 100644 tests/testthat/test-parse_dict.R diff --git a/DESCRIPTION b/DESCRIPTION index 3ec484a..e86aa3d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: labeler Title: Easy to use and share data labels in R -Version: 0.5.0 +Version: 0.6.0 Authors@R: c(person(given = "Daniel E.", family = "de la Rosa", @@ -25,7 +25,8 @@ Imports: haven, readr, magrittr, - dplyr + dplyr, + labelled Suggests: testthat, covr, diff --git a/NAMESPACE b/NAMESPACE index e64b68d..490df11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export("%>%") export(browse_dict) +export(get_dict) export(parse_dict) export(set_labels) export(use_labels) diff --git a/R/Labels.R b/R/Labels.R index da55d59..bfc822c 100644 --- a/R/Labels.R +++ b/R/Labels.R @@ -270,7 +270,7 @@ use_labels <- function(tbl, if (warn) { if (length(nulas) > 0) { warning(paste0("The following (", length(nulas), ") variables contain values that are not in the dictionary and were not labeled: \n ", paste(nulas, collapse = ", "), '. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html#warnings" for more details.')) + Please see "https://adatar-do.github.io/labeler/articles/labeler.html#checking-labels" for more details.')) } } tbl diff --git a/R/get_dict.R b/R/get_dict.R new file mode 100644 index 0000000..24fe421 --- /dev/null +++ b/R/get_dict.R @@ -0,0 +1,31 @@ +#' Generate a dictionary of given dataset +#' `r lifecycle::badge("experimental")` +#' +#' @param tbl A labelled dataset +#' +#' @return A dictionary of the dataset +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' get_dict(tbl) +#' } +get_dict <- function(tbl){ + dict0 <- labelled::generate_dictionary(tbl) + dict <- list() + for (row in 1:nrow(dict0)) { + variable <- dict0[["variable"]][[row]] + lab <- dict0[["label"]][[row]] + labs <- dict0[["value_labels"]][[row]] + dict[[variable]] <- list() + if(!is.na(lab)){ + dict[[variable]][["lab"]] <- lab + } + if(!all(is.na(labs))){ + dict[[variable]][["labs"]] <- labs + } + } + dict +} + diff --git a/man/get_dict.Rd b/man/get_dict.Rd new file mode 100644 index 0000000..00f4d8d --- /dev/null +++ b/man/get_dict.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_dict.R +\name{get_dict} +\alias{get_dict} +\title{Generate a dictionary of given dataset +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}} +\usage{ +get_dict(tbl) +} +\arguments{ +\item{tbl}{A labelled dataset} +} +\value{ +A dictionary of the dataset +} +\description{ +Generate a dictionary of given dataset +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +} +\examples{ +\dontrun{ + get_dict(tbl) +} +} diff --git a/tests/testthat/_snaps/Labels.md b/tests/testthat/_snaps/Labels.md index b565ee0..0aab1e9 100644 --- a/tests/testthat/_snaps/Labels.md +++ b/tests/testthat/_snaps/Labels.md @@ -181,7 +181,7 @@ Warning The following (1) variables contain values that are not in the dictionary and were not labeled: Altura. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html#warnings" for more details. + Please see "https://adatar-do.github.io/labeler/articles/labeler.html#checking-labels" for more details. Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre Hombre 1 Soltero 2 @@ -202,7 +202,7 @@ Warning The following (1) variables contain values that are not in the dictionary and were not labeled: Sexo2. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html#warnings" for more details. + Please see "https://adatar-do.github.io/labeler/articles/labeler.html#checking-labels" for more details. Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre 2 1 Soltero 2 diff --git a/tests/testthat/_snaps/get_dict.md b/tests/testthat/_snaps/get_dict.md new file mode 100644 index 0000000..ff5e1df --- /dev/null +++ b/tests/testthat/_snaps/get_dict.md @@ -0,0 +1,11 @@ +# get_dict + + Code + get_dict(set_labels(tbl, dict)) + Output + $MARRIED + $MARRIED$lab + [1] "Married" + + + diff --git a/tests/testthat/_snaps/parse_dict.md b/tests/testthat/_snaps/parse_dict.md new file mode 100644 index 0000000..dc4aa67 --- /dev/null +++ b/tests/testthat/_snaps/parse_dict.md @@ -0,0 +1,15 @@ +# parse_dict + + Code + parse_dict(dict) + Output + $MARRIED + $MARRIED$lab + [1] "Married" + + $MARRIED$labs + S No + 1 2 + + + diff --git a/tests/testthat/test-get_dict.R b/tests/testthat/test-get_dict.R new file mode 100644 index 0000000..e2987b0 --- /dev/null +++ b/tests/testthat/test-get_dict.R @@ -0,0 +1,13 @@ +test_that("get_dict", { + local_edition(3) + dict <- list( + MARRIED = list( + lab = "Married", + labs = c("Yes" = 1, "No" = 2) + ) + ) + tbl <- data.frame( + MARRIED = c(1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1) + ) + expect_snapshot(get_dict(set_labels(tbl, dict))) +}) diff --git a/tests/testthat/test-parse_dict.R b/tests/testthat/test-parse_dict.R new file mode 100644 index 0000000..803d70c --- /dev/null +++ b/tests/testthat/test-parse_dict.R @@ -0,0 +1,10 @@ +test_that("parse_dict", { + local_edition(3) + dict <- list( + MARRIED = list( + lab = "Married", + labs = c("Sí" = 1, "No" = 2) + ) + ) + expect_snapshot(parse_dict(dict)) +}) diff --git a/vignettes/labeler.Rmd b/vignettes/labeler.Rmd index b7926ab..c3a19a7 100644 --- a/vignettes/labeler.Rmd +++ b/vignettes/labeler.Rmd @@ -23,12 +23,10 @@ Historically, software like SPSS, SAS or Stata have worked with labelled data. T Being able to work with labelled data is one of the features that R users who come from the previously mentioned systems miss the most. Especially when processing survey and census data. -In the R ecosystem there are 3 fundamental packages for working with labelled data `haven`, `labelled` y `sjlabelled`[^1]. +In the R ecosystem there are 3 fundamental packages for working with labelled data `haven`, `labelled` y `sjlabelled`[^1]. However, these packages work in a similar way. They all expect you to load previously labelled data or assign the labels manually for each of the variables. [^1]: See -However, these packages work in a similar way. They all expect you to load previously labelled data or assign the labels manually for each of the variables. - If you work with a dozen variables, this scheme is possible. However, as the number of variables grows, this scheme becomes difficult to handle. It should even be noted that some operations in R remove most of the attributes on objects, so perhaps some labels may even have to be assigned repeatedly. In this sense, `labeler` offers a new scheme to work with labelled data in R, which is based on: @@ -42,6 +40,7 @@ The `dictionary` object consists of a list that contains each of the variables t 1. **lab**: A character string that describes the variable. 2. **labs**: A named vector of the possible values that the variable takes. +3. **warn** (optional): A character string that describes the warning that should be shown when the variable is labeled. In this sense, a valid dictionary in `labeler` can be built and is displayed as follows: @@ -67,6 +66,10 @@ dict <- list( Following these basic criteria, you can create a custom dictionary, either by building it from scratch, or by adding the necessary labels for the variables that you create through your analysis. +### Building a dictionary, ¿automatically? + +If you are working with a labelled dataset, you can use the `labeler` package to automatically build a dictionary. This is done by using the `labeler::get_dictionary` function. This function takes as input the labeled dataset, and returns a dictionary in the structure described above. + ### Parsing the dictionary Working with non-ASCII characters in R is a tedious task. `labeler` offers a way to work with non-ASCII characters in dictionaries. For these purposes there is the `parse_dict()` function that allows parsing a dictionary to replace these characters. Note that this process is necessary only if the dictionary is intended to be saved as a valid R object, as part of a package for example. The other functions in `labeler` are designed to reverse this automatic forming process. @@ -168,7 +171,7 @@ mydata %>% dplyr::rename("united" = "married") ``` -## Warnings +## Checking labels When variables contain values that are not in the dictionary, `use_labels()` by **default** prevents labels from being assigned to these variables and prints a warning in the console indicating the variables that could not be labeled. You can assign the argument `use_labels(..., check = FALSE)` so that the labels are used regardless of those values that are not present in the dictionary, which will be converted to `NA`. In this sense, the best option is to add to the dictionary all the values that the variable has to take. If you just want to avoid the console warning, use the `vars` argument to specify the variables to be labelled, excluding the conflicting variables. ```{r} From de87f980b6b5d95cb5e3f1652b4fedfe1a1dc4fa Mon Sep 17 00:00:00 2001 From: Daniel de la Rosa Date: Tue, 22 Feb 2022 17:14:28 -0400 Subject: [PATCH 3/9] fix (labels): labels encoding --- DESCRIPTION | 2 +- R/Labels.R | 8 ++++---- R/get_dict.R | 8 ++++++-- R/parse_dict.R | 4 ++-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e86aa3d..c810d66 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: labeler Title: Easy to use and share data labels in R -Version: 0.6.0 +Version: 0.6.1 Authors@R: c(person(given = "Daniel E.", family = "de la Rosa", diff --git a/R/Labels.R b/R/Labels.R index bfc822c..4a31b98 100644 --- a/R/Labels.R +++ b/R/Labels.R @@ -62,7 +62,7 @@ set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) } ) if (!is.null(lab)) { - lab <- decode_lab(lab, enc) + #lab <- decode_lab(lab, enc) lab <- validateLab(lab, dict) } tryCatch( @@ -75,7 +75,7 @@ set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) ) if (!is.null(labs)) { labs <- validateLabs(labs, dict) - labs <- decode_labs(labs, enc) + #labs <- decode_labs(labs, enc) name <- names(tbl)[tolower(names(tbl)) == tolower(name)] tbl <- labellize(tbl, name, lab, labs) } @@ -145,7 +145,7 @@ validateLabs <- function(labs, dict) { decode_lab <- function(lab, enc) { if (!is.null(lab)) { - lab <- iconv(lab, to = enc) + lab <- iconv(lab) } lab } @@ -154,7 +154,7 @@ decode_lab <- function(lab, enc) { decode_labs <- function(labs, enc) { if (!is.null(labs)) { for (lab in seq_along(names(labs))) { - names(labs)[lab] <- iconv(names(labs)[lab], to = enc) + names(labs)[lab] <- iconv(names(labs)[lab]) } } labs diff --git a/R/get_dict.R b/R/get_dict.R index 24fe421..12ccec9 100644 --- a/R/get_dict.R +++ b/R/get_dict.R @@ -19,11 +19,15 @@ get_dict <- function(tbl){ lab <- dict0[["label"]][[row]] labs <- dict0[["value_labels"]][[row]] dict[[variable]] <- list() + if(!all(is.na(labs))){ + dict[[variable]][["labs"]] <- labs + dict[[variable]][["lab"]] <- "" + } if(!is.na(lab)){ dict[[variable]][["lab"]] <- lab } - if(!all(is.na(labs))){ - dict[[variable]][["labs"]] <- labs + if(length(dict[[variable]]) == 0){ + dict[[variable]] <- NULL } } dict diff --git a/R/parse_dict.R b/R/parse_dict.R index 92ff63d..4af3eff 100644 --- a/R/parse_dict.R +++ b/R/parse_dict.R @@ -19,8 +19,8 @@ parse_dict <- function(dict) { for (var in names(dict)) { if(!(var %in% c("encoding"))){ - lab <- readr::parse_character(dict[[var]][["lab"]]) - dict[[var]][["lab"]] <- lab + lab <- dict[[var]][["lab"]] + dict[[var]][["lab"]] <- ifelse(any(is.null(lab), lab == ""), "", readr::parse_character(lab)) labs <- names(dict[[var]][["labs"]]) if (!is.null(labs)) { if (length(labs) > 1) { From 5a35a4f82a2aaadc5ff106e87540cf62e1472bf7 Mon Sep 17 00:00:00 2001 From: Daniel de la Rosa Date: Tue, 5 Apr 2022 20:34:38 -0400 Subject: [PATCH 4/9] fix (parse_dict): encoding issues #99 --- DESCRIPTION | 5 +++-- R/Labels.R | 33 ++++++++++++++++------------- R/parse_dict.R | 6 +++--- tests/testthat/_snaps/Labels.md | 26 ++++++++++++++--------- tests/testthat/_snaps/parse_dict.md | 15 ------------- 5 files changed, 40 insertions(+), 45 deletions(-) delete mode 100644 tests/testthat/_snaps/parse_dict.md diff --git a/DESCRIPTION b/DESCRIPTION index c810d66..2973185 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: labeler Title: Easy to use and share data labels in R -Version: 0.6.1 +Version: 0.6.2 Authors@R: c(person(given = "Daniel E.", family = "de la Rosa", @@ -26,7 +26,8 @@ Imports: readr, magrittr, dplyr, - labelled + labelled, + cli Suggests: testthat, covr, diff --git a/R/Labels.R b/R/Labels.R index 4a31b98..35d7079 100644 --- a/R/Labels.R +++ b/R/Labels.R @@ -41,7 +41,7 @@ set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) names <- vars } else if (!is.null(tbl)) { if (!("data.frame" %in% class(tbl))) { # Validar las clases de tbl admitidas - warning("Operation not guaranteed for this class of objects. + cli::cli_alert_info("Operation not guaranteed for this class of objects. Pass an object of type data.frame to ensure operation.") } names <- names(tbl) @@ -62,7 +62,7 @@ set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) } ) if (!is.null(lab)) { - #lab <- decode_lab(lab, enc) + lab <- decode_lab(lab, enc) lab <- validateLab(lab, dict) } tryCatch( @@ -75,14 +75,14 @@ set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) ) if (!is.null(labs)) { labs <- validateLabs(labs, dict) - #labs <- decode_labs(labs, enc) + labs <- decode_labs(labs, enc) name <- names(tbl)[tolower(names(tbl)) == tolower(name)] tbl <- labellize(tbl, name, lab, labs) } if (warn) { tryCatch({ if (!is.null(dict[[name]][["warn"]])) { - warning(paste0(name, ": ", dict[[name]][["warn"]])) + cli::cli_alert_warning(paste0(name, ": ", dict[[name]][["warn"]])) } }, error = function(e){ @@ -145,7 +145,7 @@ validateLabs <- function(labs, dict) { decode_lab <- function(lab, enc) { if (!is.null(lab)) { - lab <- iconv(lab) + lab <- iconv(lab, to = "utf8") } lab } @@ -154,7 +154,7 @@ decode_lab <- function(lab, enc) { decode_labs <- function(labs, enc) { if (!is.null(labs)) { for (lab in seq_along(names(labs))) { - names(labs)[lab] <- iconv(names(labs)[lab]) + names(labs)[lab] <- iconv(names(labs)[lab], to = "utf8") } } labs @@ -247,30 +247,33 @@ use_labels <- function(tbl, if (all(!is.null(tbl), !is.na(vars))) { nulas <- character() for (name in names) { - var <- tbl[[name]] # tbl[, name] + variable <- tbl[[name]] # tbl[, name] tryCatch( { - var2 <- sjlabelled::as_label(var) + var2 <- sjlabelled::as_label(variable) if (check) { - unicos <- unique(var) + unicos <- unique(variable) unicos <- unicos[!is.na(unicos)] - if (sum(is.na(var)) != sum(is.na(var2))) { + if (sum(is.na(variable)) != sum(is.na(var2))) { nulas <- c(nulas, name) - var2 <- var + var2 <- variable } } - var <- var2 + variable <- var2 }, error = function(e) { } ) - tbl[[name]] <- var # tbl[, name] <- var + tbl[[name]] <- variable # tbl[, name] <- variable } if (warn) { if (length(nulas) > 0) { - warning(paste0("The following (", length(nulas), ") variables contain values that are not in the dictionary and were not labeled: \n ", paste(nulas, collapse = ", "), '. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html#checking-labels" for more details.')) + lnulas <- length(nulas) + names(nulas) <- rep("*", length(nulas)) + cli::cli_text("The following ({lnulas}) variable{?s} w{?as/ere} not labeled since {?it/they} contain values not present in the dictionary:") + cli::cli_bullets(nulas) + cli::cli_text("Please visit {.url {'https://adatar-do.github.io/labeler/articles/labeler.html#checking-labels'}} for more details.") } } tbl diff --git a/R/parse_dict.R b/R/parse_dict.R index 4af3eff..8895d6e 100644 --- a/R/parse_dict.R +++ b/R/parse_dict.R @@ -20,19 +20,19 @@ parse_dict <- function(dict) { for (var in names(dict)) { if(!(var %in% c("encoding"))){ lab <- dict[[var]][["lab"]] - dict[[var]][["lab"]] <- ifelse(any(is.null(lab), lab == ""), "", readr::parse_character(lab)) + dict[[var]][["lab"]] <- ifelse(any(is.null(lab), lab == ""), "", iconv(lab, to = "utf8")) #readr::parse_character(lab) labs <- names(dict[[var]][["labs"]]) if (!is.null(labs)) { if (length(labs) > 1) { for (lab in seq_along(labs)) { - labs[[lab]] <- readr::parse_character(labs[[lab]]) + labs[[lab]] <- iconv(labs[[lab]], to = "utf8")# readr::parse_character(labs[[lab]]) } } names(dict[[var]][["labs"]]) <- labs } warn <- dict[[var]][["warn"]] if(!is.null(warn)) { - warn <- readr::parse_character(warn) + warn <- iconv(warn, to = "utf8") # readr::parse_character(warn) dict[[var]][["warn"]] <- warn } } diff --git a/tests/testthat/_snaps/Labels.md b/tests/testthat/_snaps/Labels.md index 0aab1e9..dffe32e 100644 --- a/tests/testthat/_snaps/Labels.md +++ b/tests/testthat/_snaps/Labels.md @@ -178,10 +178,13 @@ Code use_labels(datos, dict = dict, ignore_case = T) - Warning - The following (1) variables contain values that are not in the dictionary and were not labeled: - Altura. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html#checking-labels" for more details. + Message + The following (1) variable was not labeled since it contain values not present + in the dictionary: + * Altura + Please visit + for + more details. Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre Hombre 1 Soltero 2 @@ -199,10 +202,13 @@ Code use_labels(dplyr::mutate(datos, Sexo2 = Sexo2 + 1), dict = dict) - Warning - The following (1) variables contain values that are not in the dictionary and were not labeled: - Sexo2. - Please see "https://adatar-do.github.io/labeler/articles/labeler.html#checking-labels" for more details. + Message + The following (1) variable was not labeled since it contain values not present + in the dictionary: + * Sexo2 + Please visit + for + more details. Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre 2 1 Soltero 2 @@ -237,8 +243,8 @@ Code use_labels(datos, dict = dict) - Warning - Sexo3: Warning + Message + ! Sexo3: Warning Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre Hombre 1 Soltero 2 diff --git a/tests/testthat/_snaps/parse_dict.md b/tests/testthat/_snaps/parse_dict.md deleted file mode 100644 index dc4aa67..0000000 --- a/tests/testthat/_snaps/parse_dict.md +++ /dev/null @@ -1,15 +0,0 @@ -# parse_dict - - Code - parse_dict(dict) - Output - $MARRIED - $MARRIED$lab - [1] "Married" - - $MARRIED$labs - S No - 1 2 - - - From 5d650598f48e0d95cb15d07843a63daa542655e8 Mon Sep 17 00:00:00 2001 From: Daniel de la Rosa Date: Tue, 5 Apr 2022 20:36:38 -0400 Subject: [PATCH 5/9] fix (doc): readme fix --- README.Rmd | 2 +- README.md | 31 ++++++++++++++----------------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/README.Rmd b/README.Rmd index 598f9ae..e0822df 100644 --- a/README.Rmd +++ b/README.Rmd @@ -61,6 +61,6 @@ Please note that the `labeler` project is released with a [Contributor Code of C
```{r echo=FALSE} -Dmisc::big_button(href = "./articles/labeler.html") +Dmisc::big_button(text = "Get started!", href = "./articles/labeler.html") ``` diff --git a/README.md b/README.md index c38e802..56955df 100644 --- a/README.md +++ b/README.md @@ -58,20 +58,17 @@ By contributing to this project, you agree to abide by its terms.
- + + + + + + + + + + + Get started! + + + From 8471ef4417252a0b86943a9957e7ff53a6653fd1 Mon Sep 17 00:00:00 2001 From: Daniel de la Rosa Date: Tue, 5 Apr 2022 20:43:55 -0400 Subject: [PATCH 6/9] fix (test): tests snap updated --- tests/testthat/_snaps/Labels.md | 14 ++++---------- tests/testthat/_snaps/parse_dict.md | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 tests/testthat/_snaps/parse_dict.md diff --git a/tests/testthat/_snaps/Labels.md b/tests/testthat/_snaps/Labels.md index dffe32e..444e3da 100644 --- a/tests/testthat/_snaps/Labels.md +++ b/tests/testthat/_snaps/Labels.md @@ -179,12 +179,9 @@ Code use_labels(datos, dict = dict, ignore_case = T) Message - The following (1) variable was not labeled since it contain values not present - in the dictionary: + The following (1) variable was not labeled since it contain values not present in the dictionary: * Altura - Please visit - for - more details. + Please visit for more details. Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre Hombre 1 Soltero 2 @@ -203,12 +200,9 @@ Code use_labels(dplyr::mutate(datos, Sexo2 = Sexo2 + 1), dict = dict) Message - The following (1) variable was not labeled since it contain values not present - in the dictionary: + The following (1) variable was not labeled since it contain values not present in the dictionary: * Sexo2 - Please visit - for - more details. + Please visit for more details. Output SEXO Sexo2 Sexo3 ESTADO Altura 1 Hombre 2 1 Soltero 2 diff --git a/tests/testthat/_snaps/parse_dict.md b/tests/testthat/_snaps/parse_dict.md new file mode 100644 index 0000000..482f820 --- /dev/null +++ b/tests/testthat/_snaps/parse_dict.md @@ -0,0 +1,15 @@ +# parse_dict + + Code + parse_dict(dict) + Output + $MARRIED + $MARRIED$lab + [1] "Married" + + $MARRIED$labs + Sí No + 1 2 + + + From d096ed93545522dc9087ef26825263e7dbedab39 Mon Sep 17 00:00:00 2001 From: Daniel de la Rosa Date: Thu, 14 Apr 2022 21:39:41 -0400 Subject: [PATCH 7/9] fix (parse_dict): finally working --- DESCRIPTION | 2 +- R/Labels.R | 14 +++++++++++--- R/browse_dict.R | 2 +- R/parse_dict.R | 19 ++++++++++++++----- README.Rmd | 2 +- README.md | 2 +- vignettes/labeler.Rmd | 2 ++ 7 files changed, 31 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2973185..fca6b49 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: labeler Title: Easy to use and share data labels in R -Version: 0.6.2 +Version: 0.6.5 Authors@R: c(person(given = "Daniel E.", family = "de la Rosa", diff --git a/R/Labels.R b/R/Labels.R index 35d7079..7435000 100644 --- a/R/Labels.R +++ b/R/Labels.R @@ -82,7 +82,7 @@ set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) if (warn) { tryCatch({ if (!is.null(dict[[name]][["warn"]])) { - cli::cli_alert_warning(paste0(name, ": ", dict[[name]][["warn"]])) + cli::cli_alert_warning(paste0(name, ": ", decode_warn(dict[[name]][["warn"]]))) } }, error = function(e){ @@ -143,7 +143,7 @@ validateLabs <- function(labs, dict) { } -decode_lab <- function(lab, enc) { +decode_lab <- function(lab, enc = "") { if (!is.null(lab)) { lab <- iconv(lab, to = "utf8") } @@ -151,7 +151,7 @@ decode_lab <- function(lab, enc) { } -decode_labs <- function(labs, enc) { +decode_labs <- function(labs, enc = "") { if (!is.null(labs)) { for (lab in seq_along(names(labs))) { names(labs)[lab] <- iconv(names(labs)[lab], to = "utf8") @@ -161,6 +161,14 @@ decode_labs <- function(labs, enc) { } +decode_warn <- function(warn, enc = "") { + if (!is.null(warn)) { + warn <- iconv(warn, to = "utf8") + } + warn +} + + labellize <- function(tbl, var_name, lab, labs) { if (!is.null(lab)) { tryCatch( diff --git a/R/browse_dict.R b/R/browse_dict.R index 3564c7d..eb16f7e 100644 --- a/R/browse_dict.R +++ b/R/browse_dict.R @@ -48,7 +48,7 @@ browse_dict <- function(dict, ...) { datos[nrow(datos), "lab"] <- decode_lab(lab, enc) warn <- dict[[name]]$warn if(!is.null(warn)) { - datos[nrow(datos), "warn"] <- decode_lab(warn, enc) + datos[nrow(datos), "warn"] <- decode_warn(warn, enc) } labs <- dict[[name]]$labs labs <- validateLabs(labs, dict) diff --git a/R/parse_dict.R b/R/parse_dict.R index 8895d6e..1e2be11 100644 --- a/R/parse_dict.R +++ b/R/parse_dict.R @@ -1,7 +1,9 @@ #' Replace non ASCII characters with ASCII equivalents #' `r lifecycle::badge('experimental')` #' -#' @param dict A dictionary. See `vignette('labeler', package = "labeler")` for details. +#' @param dict [list] A dictionary. See `vignette('labeler', package = "labeler")` for details. +#' @param .decode [logical] If FALSE (default) the file is encoded for safe writing. +#' If TRUE the file is decode to UTF-8. #' #' @return A dictionary with all non ASCII characters replaced with ASCII #' @export @@ -16,23 +18,30 @@ #' ) #' parse_dict(dict) #' } -parse_dict <- function(dict) { +parse_dict <- function(dict, .decode = FALSE) { for (var in names(dict)) { if(!(var %in% c("encoding"))){ lab <- dict[[var]][["lab"]] - dict[[var]][["lab"]] <- ifelse(any(is.null(lab), lab == ""), "", iconv(lab, to = "utf8")) #readr::parse_character(lab) + dict[[var]][["lab"]] <- ifelse( + any( + is.null(lab), + lab == "" + ), + "", + ifelse(.decode, iconv(lab, to = "utf8"), readr::parse_character(lab)) #iconv(lab, to = "utf8") + ) labs <- names(dict[[var]][["labs"]]) if (!is.null(labs)) { if (length(labs) > 1) { for (lab in seq_along(labs)) { - labs[[lab]] <- iconv(labs[[lab]], to = "utf8")# readr::parse_character(labs[[lab]]) + labs[[lab]] <- ifelse(.decode,iconv(labs[[lab]], to = "utf8") ,readr::parse_character(labs[[lab]])) #iconv(labs[[lab]], to = "utf8") } } names(dict[[var]][["labs"]]) <- labs } warn <- dict[[var]][["warn"]] if(!is.null(warn)) { - warn <- iconv(warn, to = "utf8") # readr::parse_character(warn) + warn <- ifelse(.decode, iconv(warn, to = "utf8"), readr::parse_character(warn)) #iconv(warn, to = "utf8") dict[[var]][["warn"]] <- warn } } diff --git a/README.Rmd b/README.Rmd index e0822df..c2d5fba 100644 --- a/README.Rmd +++ b/README.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( [![CRAN status](https://www.r-pkg.org/badges/version/labeler)](https://CRAN.R-project.org/package=labeler) -This package contains functions to assign and use data labels in an easy and efficient way. With an easy-to-share structure. +This package contains functions to assign and use data labels in an easy and efficient way. With an easy to share structure. ## Installation diff --git a/README.md b/README.md index 56955df..95973af 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ status](https://www.r-pkg.org/badges/version/labeler)](https://CRAN.R-project.or This package contains functions to assign and use data labels in an easy -and efficient way. With an easy-to-share structure. +and efficient way. With an easy to share structure. ## Installation diff --git a/vignettes/labeler.Rmd b/vignettes/labeler.Rmd index c3a19a7..5a977c8 100644 --- a/vignettes/labeler.Rmd +++ b/vignettes/labeler.Rmd @@ -74,6 +74,8 @@ If you are working with a labelled dataset, you can use the `labeler` package to Working with non-ASCII characters in R is a tedious task. `labeler` offers a way to work with non-ASCII characters in dictionaries. For these purposes there is the `parse_dict()` function that allows parsing a dictionary to replace these characters. Note that this process is necessary only if the dictionary is intended to be saved as a valid R object, as part of a package for example. The other functions in `labeler` are designed to reverse this automatic forming process. +> parse_dict(.decode = TRUE) para cuando el diccionario contiene letras no ASCII. + ### Browsing the dictionary Using the `browse_dict()` function, the user will be able to interactively consult the dictionary in order to become familiar with its content. From 9796cef30848251d098305a5da282baa9be3e4fa Mon Sep 17 00:00:00 2001 From: "Daniel E. de la Rosa" Date: Wed, 31 Jul 2024 16:41:49 -0400 Subject: [PATCH 8/9] feat: dict is null by default --- R/Labels.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/Labels.R b/R/Labels.R index 7435000..11b4798 100644 --- a/R/Labels.R +++ b/R/Labels.R @@ -237,13 +237,16 @@ labellize <- function(tbl, var_name, lab, labs) { #' use_labels(enft, dict = dict) #' } use_labels <- function(tbl, - dict, + dict = NULL, vars = NULL, ignore_case = F, check = TRUE, warn = TRUE) { + if (!is.null(dict)) { tbl <- set_labels(tbl, dict, vars, ignore_case, warn) + } else { + dict <- get_dict(tbl) } if (!is.null(vars)) { names <- vars From c2659bec75971acd282bb213ed4a4fc9401f84d3 Mon Sep 17 00:00:00 2001 From: "Daniel E. de la Rosa" Date: Wed, 31 Jul 2024 16:42:45 -0400 Subject: [PATCH 9/9] feat: browse_can receive the labelled df as argument --- R/browse_dict.R | 131 +++++++++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 51 deletions(-) diff --git a/R/browse_dict.R b/R/browse_dict.R index eb16f7e..a5d145f 100644 --- a/R/browse_dict.R +++ b/R/browse_dict.R @@ -1,13 +1,13 @@ -#' Browse dictionary +#' Browse Dictionary #' `r lifecycle::badge('experimental')` #' -#' Allows you to browse the dictionary of the surveys in a web interface where -#' you can check the name and label of the variables, as well as the data labels. +#' Allows you to browse the dictionary of the surveys in a web interface, where +#' you can check the names and labels of the variables, as well as their data labels. #' -#' @param dict database dictionary -#' @param ... for testing purposes +#' @param dict A list representing the database dictionary. If not a list, it will be processed by \code{\link{get_dict}}. +#' @param ... Additional arguments for testing purposes. #' -#' @return a web interface with the data contained in the supplied dictionary +#' @return A web interface displaying the data contained in the supplied dictionary. #' #' @export #' @@ -30,42 +30,85 @@ #' browse_dict(dict) #' } browse_dict <- function(dict, ...) { - datos <- data.frame( - var = character(), - lab = character(), - labs = character(), - warn = character() - ) + .args <- list(...) + testing <- isTRUE(.args$testing) + + if (!inherits(dict, "list")) dict <- get_dict(dict) + + datos <- init_datos() + enc <- dict[["encoding"]] - if(is.null(enc)){ - enc <- "" - } + if (is.null(enc)) enc <- "" dict <- dict[names(dict) != "encoding"] + for (name in names(dict)) { - datos[nrow(datos) + 1, "var"] <- name - lab <- dict[[name]]$lab - lab <- validateLab(lab, dict) - datos[nrow(datos), "lab"] <- decode_lab(lab, enc) - warn <- dict[[name]]$warn - if(!is.null(warn)) { - datos[nrow(datos), "warn"] <- decode_warn(warn, enc) - } - labs <- dict[[name]]$labs - labs <- validateLabs(labs, dict) - labs <- decode_labs(labs, enc) - labs2 <- "
" - for (lab in seq_along(labs)) { - labs2 <- paste0( - labs2, - labs[[lab]], - ": ", - names(labs)[[lab]], - "
" + datos <- datos |> + dplyr::add_row( + var = name, + lab = get_lab(dict, name), + warn = get_warn(dict, name), + labs = get_labs(dict, name) ) - } - datos[nrow(datos), "labs"] <- paste0(labs2, "
") } - res <- DT::datatable(datos, + + + if (testing) { + datos + } else { + generate_dt(datos) + } +} + + + +get_lab <- function(dict, name) { + lab <- dict[[name]]$lab + lab <- validateLab(lab, dict) + datos[nrow(datos), "lab"] <- decode_lab(lab, enc) +} + + +get_warn <- function(dict, name) { + warn <- dict[[name]]$warn + if (!is.null(warn)) { + decode_warn(warn, enc) + } else { + warn + } +} + + +get_labs <- function(dict, name) { + labs <- dict[[name]]$labs + labs <- validateLabs(labs, dict) + labs <- decode_labs(labs, enc) + labs2 <- "
" + for (lab in seq_along(labs)) { + labs2 <- paste0( + labs2, + labs[[lab]], + ": ", + names(labs)[[lab]], + "
" + ) + } + paste0(labs2, "
") +} + + + +init_datos <- function() { + data.frame( + var = character(), + lab = character(), + labs = character(), + warn = character() + ) +} + + +generate_dt <- function(datos) { + DT::datatable(datos, escape = FALSE, width = "100%", options = list( autoWidth = FALSE, @@ -75,18 +118,4 @@ browse_dict <- function(dict, ...) { ) ) ) - - testing <- FALSE - tryCatch({ - testing <- list(...)[["testing"]] - }, - error = function(e) { - } - ) - - if (!is.null(testing)) { - datos - } else { - res - } }