Skip to content

Commit

Permalink
Issues #156, #155, #154, #153, #152, #150, #149, #148, #104, #136
Browse files Browse the repository at this point in the history
  • Loading branch information
dbosak01 committed Nov 17, 2023
1 parent d317b95 commit 1084662
Show file tree
Hide file tree
Showing 72 changed files with 4,588 additions and 6,767 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: libr
Type: Package
Title: Libraries, Data Dictionaries, and a Data Step for R
Version: 1.2.8
Version: 1.2.9
Author: David J. Bosak
Maintainer: David Bosak <dbosak01@gmail.com>
Description: Contains a set of functions to create data libraries,
Expand Down Expand Up @@ -38,7 +38,7 @@ Imports:
tools,
Rcpp,
data.table
RoxygenNote: 7.2.0
RoxygenNote: 7.2.3
VignetteBuilder: knitr
LinkingTo:
Rcpp
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# libr 1.2.9
* Fixed bug on `datastep()` when there are spaces in the column names
and output() function is used.
* Some improvements to `datastep()` performance.
* Send message on writing "sas7bdat" that functionality is not available.
* Added where clause parameter to `libname()`.
* Added automatic variables "first.X" and "last.X" for each by variable.

# libr 1.2.8
* Fixed bug on datastep that sometimes was causing variables to lose their attributes.
* Added "where" parameter to datastep.
Expand Down
113 changes: 68 additions & 45 deletions R/datastep.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ e$output <- list()
#' of the data step. If you wish to keep the automatic variable values,
#' assign the automatic variable to a new variable and keep that variable.
#'
#' If there are multiple by group variables, the \code{first.} and \code{last.}
#' automatic variables indicates a either/or combination of all by variables.
#' In addition,
#' \code{first.X} and \code{last.X} automatic variables will be created for
#' each variable, where "X" represents the name of the specified variable.
#' As always, these names are case-sensitive.
#'
#' @section Column Attributes:
#' To set attributes for a column on your data, use the \code{attrib}
#' parameter. Example attributes include 'label', 'description',
Expand Down Expand Up @@ -187,6 +194,8 @@ e$output <- list()
#' This parameter will activate the \code{first.} and \code{last.} automatic
#' variables, that indicate the first or last rows in a group. These
#' automatic variables are useful for conditional processing on groups.
#' The function will also create first and last automatic variables for each
#' variable specified in the by group.
#' @param calculate Steps to set up calculated variables.
#' Calculated variables are commonly generated with summary functions such as
#' \code{mean}, \code{median}, \code{min}, \code{max}, etc. It is more
Expand Down Expand Up @@ -239,8 +248,9 @@ e$output <- list()
#' @param merge A dataset or list of datasets to merge with the input
#' data. The merge operation will occur at the beginning of the datastep,
#' prior to the execution of any steps. When the \code{merge} operation is
#' requested, the \code{by} parameter will be used to indicate which variable(s)
#' to merge by.
#' requested, the \code{merge_by} parameter will be used to indicate which variable(s)
#' to merge by. If no \code{merge_by} is specified, the merge dataset columns will
#' simply be appended to the right of the input dataset.
#' @param merge_by If the \code{merge} parameter is set, the \code{merge_by}
#' parameter will be used to identify the variable(s) to merge by. If merge
#' variables are the same on both datasets, the names may be passed as a simple
Expand Down Expand Up @@ -701,55 +711,66 @@ datastep <- function(data, steps, keep = NULL,
rowcount <- nrow(data)
}

# Step through row by row
for (n. in seq_len(rowcount)) {

# Subset by row
rw <- data[n., , drop = FALSE]

# Put back any attributes dropped during row subset
rw <- copy_attributes(data_attributes, rw)
# If there is no code to step through
if (length(as.character(code)) == 1) {

# Just set original dataset
ret <- data

} else {

# Deal with retained variables
if (!is.null(retain)) {
if (length(ret) == 0) {
for (nm in names(retain)) {

# Populate with initial value
rw[[nm]] <- retain[[nm]]

}

} else {
for (nm in names(retain)) {

# Populate with value from previous row
#data[n., nm] <- ret[n. - 1, nm] way backup

rw[[nm]] <- ret[[n. - 1]][[nm]] # current

# Step through row by row
for (n. in seq_len(rowcount)) {

# Subset by row
rw <- data[n., , drop = FALSE]

# Put back any attributes dropped during row subset
rw <- copy_attributes(data_attributes, rw)



# Deal with retained variables
if (!is.null(retain)) {
if (length(ret) == 0) {
for (nm in names(retain)) {

# Populate with initial value
rw[[nm]] <- retain[[nm]]

}

} else {
for (nm in names(retain)) {

# Populate with value from previous row
#data[n., nm] <- ret[n. - 1, nm] way backup

rw[[nm]] <- ret[[n. - 1]][[nm]] # current


}
}
}


# Evaluate the code for the row
ret[[n.]] <- within(rw, eval(code), keepAttrs = TRUE)


}


# Evaluate the code for the row
ret[[n.]] <- within(rw, eval(code), keepAttrs = TRUE)


}
# Bind all rows
if (hout) {
ret <- bind_rows(e$output, .id = "column_label")

} else {
ret <- bind_rows(ret, .id = "column_label")
}
ret["column_label"] <- NULL

# Bind all rows
if (hout) {
ret <- bind_rows(e$output, .id = "column_label")

} else {
ret <- bind_rows(ret, .id = "column_label")
}
ret["column_label"] <- NULL



# Delete
Expand All @@ -770,9 +791,7 @@ datastep <- function(data, steps, keep = NULL,
ret <- ret[ ,c(orgnms, rtnms[!rtnms %in% orgnms])]

# Remove automatic variables
ret["first."] <- NULL
ret["last."] <- NULL
ret["..delete"] <- NULL
ret <- remove_autos(ret, by)

# Perform drop operation
if (!is.null(drop)) {
Expand Down Expand Up @@ -1020,7 +1039,11 @@ output <- function() {
nlst[["..delete"]] <- pf$..delete

# Convert to data frame and append to output list
e$output[[length(e$output) + 1]] <- as.data.frame(nlst)
e$output[[length(e$output) + 1]] <- as.data.frame(nlst,
stringsAsFactors = FALSE,
make.names = FALSE,
optional = FALSE,
check.names = FALSE)


}
Expand Down
36 changes: 32 additions & 4 deletions R/dshelpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,22 @@ add_autos <- function(df, groups = NULL, sort_check = FALSE) {
# Clear out any names on input
#names(res) <- NULL

df["first."] <- byfirst(res)
df["last."] <- bylast(res)
df[["first."]] <- byfirst(res)
df[["last."]] <- bylast(res)

# Add first and last for each by variable
for (nm in groups) {

df[[paste0("first.", nm)]] <- byfirst(df[[nm]])
df[[paste0("last.", nm)]] <- bylast(df[[nm]])

}

} else {

if (nrow(df) > 0) {
df["first."] <- c(TRUE, rep(FALSE, times = nrow(df) - 1))
df["last."] <- c(rep(FALSE, times = nrow(df) - 1), TRUE)
df[["first."]] <- c(TRUE, rep(FALSE, times = nrow(df) - 1))
df[["last."]] <- c(rep(FALSE, times = nrow(df) - 1), TRUE)
}

}
Expand Down Expand Up @@ -69,3 +77,23 @@ add_autos <- function(df, groups = NULL, sort_check = FALSE) {

}


remove_autos <- function(data, groups) {

ret <- data

ret[["first."]] <- NULL
ret[["last."]] <- NULL
ret[["..delete"]] <- NULL

if (!is.null(groups)) {
for (nm in groups) {

ret[[paste0("first.", nm)]] <- NULL
ret[[paste0("last.", nm)]] <- NULL

}
}

return(ret)
}
34 changes: 32 additions & 2 deletions R/libname.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ e$env <- parent.frame()
#' need to define import specifications for SAS® datasets. The sas7bdat engine
#' interprets empty strings, single blanks, and a single dot (".") as missing
#' values. While the import of SAS® datasets is fairly reliable, sas7bdat files
#' exported with the sas7bdat engine sometimes cannot be read by SAS® software.
#' cannot be written or exported with the sas7bdat engine.
#' In these cases, it is recommended to export to another file format, such
#' as csv or dbf, and then import into SAS®.}
#' \item{\strong{xpt}: The SAS® transport file engine. Transport format is
Expand Down Expand Up @@ -174,6 +174,10 @@ e$env <- parent.frame()
#' files. Default is FALSE.
#' @param log Whether to log the libname operation. Default is TRUE. This
#' parameter is used internally.
#' @param where An expression used to subset all datasets in the library.
#' The where clause will be executed when the library is created. Use the
#' Base R \code{\link{expression}} function to define the subset. If a where clause
#' is supplied, the library will be opened read-only.
#' @return The library object, with all data files loaded into the library
#' list. Items in the list will be named according the the file name,
#' minus the file extension.
Expand Down Expand Up @@ -242,7 +246,7 @@ e$env <- parent.frame()
libname <- function(name, directory_path, engine = "rds",
read_only = FALSE, env = parent.frame(),
import_specs = NULL, filter = NULL, standard_eval = FALSE,
quiet = FALSE, log = TRUE) {
quiet = FALSE, log = TRUE, where = NULL) {
if (is.null(engine))
stop("engine parameter cannot be null")

Expand Down Expand Up @@ -283,6 +287,8 @@ libname <- function(name, directory_path, engine = "rds",
attr(l, "loaded") <- FALSE
attr(l, "engine") <- engine
attr(l, "import_specs") <- import_specs
if (!is.null(where))
attr(l, "where") <- paste(as.character(where), collapse = "")


# Get the file list according to the engine type
Expand Down Expand Up @@ -494,6 +500,11 @@ libname <- function(name, directory_path, engine = "rds",
warning(paste("The name", nm, "already exists in the library.",
"Data will be replaced."))

if (!is.null(where)) {
dat <- tryCatch({subset(dat, eval(where))},
error = function(cond){dat})
}

# Set attributes on data frame
attr(dat, "name") <- nm
attr(dat, "extension") <- ext
Expand Down Expand Up @@ -780,6 +791,8 @@ lib_add <- function(x, ..., name = NULL) {
else
typ <- "rds"

if (tolower(typ) == "sas7bdat")
message("Writing to 'sas7bdat' not supported.")

i <- 1
for (nm in nms) {
Expand Down Expand Up @@ -907,6 +920,8 @@ lib_replace <- function(x, ..., name = NULL) {
else
typ <- "rds"

if (tolower(typ) == "sas7bdat")
message("Writing to 'sas7bdat' not supported.")

i <- 1
for (nm in nms) {
Expand Down Expand Up @@ -1064,6 +1079,8 @@ lib_remove <- function(x, name) {
#' behavior, use the \code{force} option to force \code{lib_write} to write
#' every data file to disk.
#'
#' Note that writing sas7bdat files to disk is not supported.
#'
#' @param x The data library to write.
#' @param force Force writing each data file to disk, even if it has not
#' changed.
Expand Down Expand Up @@ -1136,6 +1153,11 @@ lib_write <- function(x, force = FALSE) {
x <- lib_sync(x, lbnm)
}

if (!is.null(attr(x, "engine"))) {
if (attr(x, "engine") == "sas7bdat")
message("Writing to 'sas7bdat' not supported.")
}

# Get data names
nms <- names(x)

Expand Down Expand Up @@ -1377,6 +1399,11 @@ lib_copy <- function(x, nm, directory_path, standard_eval = FALSE) {
attr(cpy, "name") <- newlib
attr(cpy, "path") <- directory_path
attr(cpy, "loaded") <- FALSE

if (!is.null(attr(x, "engine"))) {
if (attr(x, "engine") == "sas7bdat")
message("Writing to 'sas7bdat' not supported.")
}

# Get list of dataset names
nms <- names(cpy)
Expand Down Expand Up @@ -1479,6 +1506,9 @@ lib_export <- function(x, nm, directory_path, engine,
if (length(engine) > 1)
stop("engine parameter does not accept more than one value.")

if (tolower(engine) == "sas7bdat")
message("Export to 'sas7bdat' not supported.")

if (!tolower(engine) %in% c("rds", "rdata", "rda",
"csv", "sas7bdat", "xlsx", "xls", "xpt", "dbf"))
stop(paste0("Invalid engine parameter value: ", engine))
Expand Down
2 changes: 2 additions & 0 deletions R/libr.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,7 @@
#' indiscriminately.
#' @import common
#' @docType package
#' @aliases libr-package
#' @keywords internal
#' @name libr
NULL
17 changes: 11 additions & 6 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ print.lib <- function(x, ..., verbose = FALSE) {
cat(at)
cat(paste0("- path: ", attr(x, "path"), "\n"))

if (!is.null(attr(x, "where"))) {

cat(paste0("- where: ", attr(x, "where"), "\n"))
}

if (length(x) > 0)
cat("- items:\n")

Expand Down Expand Up @@ -220,12 +225,12 @@ writeData <- function(x, ext, file_path, force = FALSE) {

} else if (ext == "sas7bdat") {

if (!cs_comp | force) {
if (file.exists(file_path))
file.remove(file_path)
write_sas(x, file_path)
attr(x, "checksum") <- md5sum(file_path)
}
# if (!cs_comp | force) {
# if (file.exists(file_path))
# file.remove(file_path)
# write_sas(x, file_path)
# attr(x, "checksum") <- md5sum(file_path)
# }

} else if (ext == "dbf") {

Expand Down
Loading

0 comments on commit 1084662

Please sign in to comment.