Skip to content

Commit

Permalink
Merge pull request #6 from dnldelarosa/main
Browse files Browse the repository at this point in the history
Enhanced Label Handling and Character Encoding in labeler
  • Loading branch information
dnldelarosa authored Sep 16, 2024
2 parents 6dfd89e + 904b7d1 commit ff9b187
Show file tree
Hide file tree
Showing 20 changed files with 442 additions and 150 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: labeler
Title: Easy to use and share data labels in R
Version: 0.4.3
Version: 0.6.5
Authors@R:
c(person(given = "Daniel E.",
family = "de la Rosa",
Expand All @@ -25,7 +25,9 @@ Imports:
haven,
readr,
magrittr,
dplyr
dplyr,
labelled,
cli
Suggests:
testthat,
covr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export("%>%")
export(browse_dict)
export(get_dict)
export(parse_dict)
export(set_labels)
export(use_labels)
Expand Down
150 changes: 104 additions & 46 deletions R/Labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' @param vars [character]: If specified, only labels are assigned to those variables
#' @param dict [data.frame]: Dictionary with all the data labels to use
#' @param ignore_case [logical]: Indicate if case sensitive should be ignored.
#' @param warn [logical]: Indicate if warnings should be shown.
#'
#' @return The data entered in the \code{tbl} argument but with data labels
#'
Expand All @@ -29,12 +30,20 @@
#' str(df)
#' str(set_labels(df, dict = dict))
#' }
set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE) {
set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE, warn = TRUE) {
tbl
dict
enc <- dict[["encoding"]]
if (is.null(enc)) {
enc <- ""
}
dict <- dict[names(dict) != "encoding"]
if (!is.null(vars)) {
names <- vars
} else if (!is.null(tbl)) { # Validar luego las clases de tbl admitidas
} else if (!is.null(tbl)) {
if (!("data.frame" %in% class(tbl))) { # Validar las clases de tbl admitidas
cli::cli_alert_info("Operation not guaranteed for this class of objects.
Pass an object of type data.frame to ensure operation.")
}
names <- names(tbl)
} else {
names <- NULL
Expand All @@ -44,26 +53,41 @@ set_labels <- function(tbl, dict, vars = NULL, ignore_case = FALSE) {
if (ignore_case) {
name <- names(dict)[tolower(names(dict)) == tolower(name)]
}
tryCatch({
lab <- dict[[name]]$lab
}, error = function(e){
lab <- NULL
})
tryCatch(
{
lab <- dict[[name]]$lab
},
error = function(e) {
lab <- NULL
}
)
if (!is.null(lab)) {
lab <- decode_lab(lab, enc)
lab <- validateLab(lab, dict)
lab <- decode_lab(lab)
}
tryCatch({
labs <- dict[[name]]$labs
}, error = function(e){
labs <- NULL
})
if(!is.null(labs)){
tryCatch(
{
labs <- dict[[name]]$labs
},
error = function(e) {
labs <- NULL
}
)
if (!is.null(labs)) {
labs <- validateLabs(labs, dict)
labs <- decode_labs(labs)
labs <- decode_labs(labs, enc)
name <- names(tbl)[tolower(names(tbl)) == tolower(name)]
tbl <- labellize(tbl, name, lab, labs)
}
if (warn) {
tryCatch({
if (!is.null(dict[[name]][["warn"]])) {
cli::cli_alert_warning(paste0(name, ": ", decode_warn(dict[[name]][["warn"]])))
}
}, error = function(e){

})
}
}
}
tbl
Expand All @@ -80,7 +104,11 @@ validateLab <- function(lab, dict) {
if (all(!is.null(lab), is.character(lab))) {
if (startsWith(lab, "link::")) {
link <- strsplit(lab, "::")[[1]][[2]]
lab <- dict[[link]]$lab
if (!is.null(dict[[link]])) {
lab <- dict[[link]]$lab
} else {
lab <- paste0("The link '", link, "' is not defined in the dictionary")
}
lab <- validateLab(lab, dict)
}
} else {
Expand All @@ -95,7 +123,12 @@ validateLabs <- function(labs, dict) {
if (all(length(labs) == 1, is.character(labs))) {
if (startsWith(labs, "link::")) {
link <- strsplit(labs, "::")[[1]][[2]]
labs <- dict[[link]]$labs
if (!is.null(dict[[link]])) {
labs <- dict[[link]]$labs
} else {
labs <- c(999)
names(labs) <- paste0("The link '", link, "' is not defined in the dictionary")
}
labs <- validateLabs(labs, dict)
}
}
Expand All @@ -110,38 +143,52 @@ validateLabs <- function(labs, dict) {
}


decode_lab <- function(lab) {
decode_lab <- function(lab, enc = "") {
if (!is.null(lab)) {
lab <- iconv(lab)
lab <- iconv(lab, to = "utf8")
}
lab
}


decode_labs <- function(labs) {
decode_labs <- function(labs, enc = "") {
if (!is.null(labs)) {
for (lab in seq_along(names(labs))) {
names(labs)[lab] <- iconv(names(labs)[lab])
names(labs)[lab] <- iconv(names(labs)[lab], to = "utf8")
}
}
labs
}


decode_warn <- function(warn, enc = "") {
if (!is.null(warn)) {
warn <- iconv(warn, to = "utf8")
}
warn
}


labellize <- function(tbl, var_name, lab, labs) {
if (!is.null(lab)) {
tryCatch({
tbl[[var_name]] <- sjlabelled::set_label(tbl[[var_name]], label = lab)
}, error = function(e){
tryCatch(
{
tbl[[var_name]] <- sjlabelled::set_label(tbl[[var_name]], label = lab)
},
error = function(e) {

})
}
)
}
if (!is.null(labs)) {
tryCatch({
tbl[[var_name]] <- sjlabelled::set_labels(tbl[[var_name]], labels = labs)
}, error = function(e){
tryCatch(
{
tbl[[var_name]] <- sjlabelled::set_labels(tbl[[var_name]], labels = labs)
},
error = function(e) {

})
}
)
}
tbl
}
Expand All @@ -160,6 +207,7 @@ labellize <- function(tbl, var_name, lab, labs) {
#' @param ignore_case [logical]: Indicate if case sensitive should be ignored
#' @param check [logical]: If TRUE (default), the function will check if values
#' present in variable are valid data labels in dictionary.
#' @param warn [logical]: Indicate if warnings should be shown.
#'
#' @return The data supplied in the \code{tbl} argument, but instead of values
#' using the corresponding data labels
Expand Down Expand Up @@ -188,9 +236,17 @@ labellize <- function(tbl, var_name, lab, labs) {
#' enft
#' use_labels(enft, dict = dict)
#' }
use_labels <- function(tbl, dict, vars = NULL, ignore_case = F, check = TRUE) {
use_labels <- function(tbl,
dict = NULL,
vars = NULL,
ignore_case = F,
check = TRUE,
warn = TRUE) {

if (!is.null(dict)) {
tbl <- set_labels(tbl, dict, vars, ignore_case)
tbl <- set_labels(tbl, dict, vars, ignore_case, warn)
} else {
dict <- get_dict(tbl)
}
if (!is.null(vars)) {
names <- vars
Expand All @@ -202,31 +258,34 @@ use_labels <- function(tbl, dict, vars = NULL, ignore_case = F, check = TRUE) {
if (all(!is.null(tbl), !is.na(vars))) {
nulas <- character()
for (name in names) {
var <- tbl[, name]
variable <- tbl[[name]] # tbl[, name]
tryCatch(
{
var2 <- sjlabelled::as_label(var)
if(check){
unicos <- unique(var)
var2 <- sjlabelled::as_label(variable)
if (check) {
unicos <- unique(variable)
unicos <- unicos[!is.na(unicos)]
if(sum(is.na(var)) != sum(is.na(var2))){
if (sum(is.na(variable)) != sum(is.na(var2))) {
nulas <- c(nulas, name)
var2 <- var
var2 <- variable
}
}
var <- var2
variable <- var2
},
error = function(e) {

}
)
tbl[, name] <- var
tbl[[name]] <- variable # tbl[, name] <- variable
}
if(length(nulas) > 0){

warning(paste0("The following (", length(nulas),") variables contain values
that are not in the dictionary and were not labeled: \n", paste(nulas, collapse = ", "), '.
Please see "https://adatar-do.github.io/labeler/articles/labeler.html" for more details.'))
if (warn) {
if (length(nulas) > 0) {
lnulas <- length(nulas)
names(nulas) <- rep("*", length(nulas))
cli::cli_text("The following ({lnulas}) variable{?s} w{?as/ere} not labeled since {?it/they} contain values not present in the dictionary:")
cli::cli_bullets(nulas)
cli::cli_text("Please visit {.url {'https://adatar-do.github.io/labeler/articles/labeler.html#checking-labels'}} for more details.")
}
}
tbl
}
Expand All @@ -236,4 +295,3 @@ useLabels <- function(tbl, dict = NULL, vars = NULL) {
deprecate_warn("0.1.1", "endomer::useLabels()", "use_labels()")
use_labels(tbl, dict, vars)
}

Loading

0 comments on commit ff9b187

Please sign in to comment.