Issues #156, #155, #154, #153, #152, #150, #149, #148, #104, #136

dbosak01 · Nov 17, 2023 · 1084662 · 1084662
1 parent d317b95
commit 1084662
Show file tree

Hide file tree

Showing 72 changed files with 4,588 additions and 6,767 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: libr
 Type: Package
 Title: Libraries, Data Dictionaries, and a Data Step for R
-Version: 1.2.8
+Version: 1.2.9
 Author: David J. Bosak
 Maintainer: David Bosak <dbosak01@gmail.com>
 Description: Contains a set of functions to create data libraries,
@@ -38,7 +38,7 @@ Imports:
     tools,
     Rcpp,
     data.table
-RoxygenNote: 7.2.0
+RoxygenNote: 7.2.3
 VignetteBuilder: knitr
 LinkingTo: 
     Rcpp
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+# libr 1.2.9
+* Fixed bug on `datastep()` when there are spaces in the column names 
+and output() function is used.
+* Some improvements to `datastep()` performance.
+* Send message on writing "sas7bdat" that functionality is not available.
+* Added where clause parameter to `libname()`.
+* Added automatic variables "first.X" and "last.X" for each by variable.
+
 # libr 1.2.8
 * Fixed bug on datastep that sometimes was causing variables to lose their attributes.
 * Added "where" parameter to datastep.

diff --git a/R/datastep.R b/R/datastep.R
@@ -41,6 +41,13 @@ e$output <- list()
 #' of the data step.  If you wish to keep the automatic variable values,
 #' assign the automatic variable to a new variable and keep that variable.
 #' 
+#' If there are multiple by group variables, the \code{first.} and \code{last.} 
+#' automatic variables indicates a either/or combination of all by variables.  
+#' In addition,
+#' \code{first.X} and \code{last.X} automatic variables will be created for 
+#' each variable, where "X" represents the name of the specified variable.
+#' As always, these names are case-sensitive.
+#' 
 #' @section Column Attributes:
 #' To set attributes for a column on your data, use the \code{attrib}
 #' parameter.  Example attributes include 'label', 'description', 
@@ -187,6 +194,8 @@ e$output <- list()
 #' This parameter will activate the \code{first.} and \code{last.} automatic
 #' variables, that indicate the first or last rows in a group.  These 
 #' automatic variables are useful for conditional processing on groups.
+#' The function will also create first and last automatic variables for each
+#' variable specified in the by group.
 #' @param calculate Steps to set up calculated variables.  
 #' Calculated variables are commonly generated with summary functions such as
 #' \code{mean}, \code{median}, \code{min}, \code{max}, etc.  It is more 
@@ -239,8 +248,9 @@ e$output <- list()
 #' @param merge A dataset or list of datasets to merge with the input
 #' data.  The merge operation will occur at the beginning of the datastep,
 #' prior to the execution of any steps.  When the \code{merge} operation is 
-#' requested, the \code{by} parameter will be used to indicate which variable(s)
-#' to merge by.  
+#' requested, the \code{merge_by} parameter will be used to indicate which variable(s)
+#' to merge by. If no \code{merge_by} is specified, the merge dataset columns will 
+#' simply be appended to the right of the input dataset.
 #' @param merge_by If the \code{merge} parameter is set, the \code{merge_by} 
 #' parameter will be used to identify the variable(s) to merge by. If merge 
 #' variables are the same on both datasets, the names may be passed as a simple 
@@ -701,55 +711,66 @@ datastep <- function(data, steps, keep = NULL,
     rowcount <- nrow(data)
   }
 
-  # Step through row by row
-  for (n. in seq_len(rowcount)) {
-
-    # Subset by row
-    rw <- data[n., , drop = FALSE]
-
-    # Put back any attributes dropped during row subset
-    rw <- copy_attributes(data_attributes, rw)
+  # If there is no code to step through
+  if (length(as.character(code)) == 1) {
 
+    # Just set original dataset
+    ret <- data
 
+  } else {
 
-    # Deal with retained variables
-    if (!is.null(retain)) {
-      if (length(ret) == 0) {
-        for (nm in names(retain)) {
-
-          # Populate with initial value
-          rw[[nm]] <- retain[[nm]]
-
-        }
-
-      } else {
-        for (nm in names(retain)) {
-
-          # Populate with value from previous row   
-          #data[n., nm] <- ret[n. - 1, nm]  way backup
-
-          rw[[nm]] <- ret[[n. - 1]][[nm]] # current
-
+    # Step through row by row
+    for (n. in seq_len(rowcount)) {
+
+      # Subset by row
+      rw <- data[n., , drop = FALSE]
+
+      # Put back any attributes dropped during row subset
+      rw <- copy_attributes(data_attributes, rw)
+
+
+
+      # Deal with retained variables
+      if (!is.null(retain)) {
+        if (length(ret) == 0) {
+          for (nm in names(retain)) {
+
+            # Populate with initial value
+            rw[[nm]] <- retain[[nm]]
+
+          }
 
+        } else {
+          for (nm in names(retain)) {
+
+            # Populate with value from previous row   
+            #data[n., nm] <- ret[n. - 1, nm]  way backup
+
+            rw[[nm]] <- ret[[n. - 1]][[nm]] # current
+
+
+          }
         }
       }
+
+
+      # Evaluate the code for the row
+      ret[[n.]]  <-  within(rw, eval(code), keepAttrs = TRUE)
+
+
     }
 
-
-    # Evaluate the code for the row
-    ret[[n.]]  <-  within(rw, eval(code), keepAttrs = TRUE)
-
-
-  }
+    # Bind all rows
+    if (hout) {
+      ret <- bind_rows(e$output, .id = "column_label")
+
+    } else {
+      ret <- bind_rows(ret, .id = "column_label")
+    }
+    ret["column_label"] <- NULL
 
-  # Bind all rows
-  if (hout) {
-    ret <- bind_rows(e$output, .id = "column_label")
-
-  } else {
-    ret <- bind_rows(ret, .id = "column_label")
   }
-  ret["column_label"] <- NULL
+
 
 
   # Delete
@@ -770,9 +791,7 @@ datastep <- function(data, steps, keep = NULL,
   ret <- ret[ ,c(orgnms, rtnms[!rtnms %in% orgnms])]
 
   # Remove automatic variables
-  ret["first."] <- NULL
-  ret["last."] <- NULL
-  ret["..delete"] <- NULL
+  ret <- remove_autos(ret, by)
 
   # Perform drop operation
   if (!is.null(drop)) {
@@ -1020,7 +1039,11 @@ output <- function() {
   nlst[["..delete"]] <- pf$..delete
 
   # Convert to data frame and append to output list
-  e$output[[length(e$output) + 1]] <- as.data.frame(nlst)
+  e$output[[length(e$output) + 1]] <- as.data.frame(nlst, 
+                                                    stringsAsFactors = FALSE, 
+                                                    make.names = FALSE,
+                                                    optional = FALSE,
+                                                    check.names = FALSE)
 
 
 }

diff --git a/R/dshelpers.R b/R/dshelpers.R
@@ -26,14 +26,22 @@ add_autos <- function(df, groups = NULL, sort_check = FALSE) {
     # Clear out any names on input 
     #names(res) <- NULL
 
-    df["first."] <- byfirst(res)
-    df["last."] <- bylast(res)
+    df[["first."]] <- byfirst(res)
+    df[["last."]] <- bylast(res)
+
+    # Add first and last for each by variable
+    for (nm in groups) {
+
+      df[[paste0("first.", nm)]] <- byfirst(df[[nm]])
+      df[[paste0("last.", nm)]] <- bylast(df[[nm]])
+
+    }
 
   } else {
 
     if (nrow(df) > 0) {
-      df["first."] <- c(TRUE, rep(FALSE, times = nrow(df) - 1))
-      df["last."] <- c(rep(FALSE, times = nrow(df) - 1), TRUE)
+      df[["first."]] <- c(TRUE, rep(FALSE, times = nrow(df) - 1))
+      df[["last."]] <- c(rep(FALSE, times = nrow(df) - 1), TRUE)
     }
 
   }
@@ -69,3 +77,23 @@ add_autos <- function(df, groups = NULL, sort_check = FALSE) {
 
 }
 
+
+remove_autos <- function(data, groups) {
+
+  ret <- data
+
+  ret[["first."]] <- NULL
+  ret[["last."]] <- NULL
+  ret[["..delete"]] <- NULL 
+
+  if (!is.null(groups)) {
+    for (nm in groups) {
+
+      ret[[paste0("first.", nm)]] <- NULL
+      ret[[paste0("last.", nm)]] <- NULL
+
+    }
+  }
+
+  return(ret)
+}
diff --git a/R/libname.R b/R/libname.R
@@ -89,7 +89,7 @@ e$env <- parent.frame()
 #' need to define import specifications for SAS® datasets.  The sas7bdat engine
 #' interprets empty strings, single blanks, and a single dot (".") as missing
 #' values. While the import of SAS® datasets is fairly reliable, sas7bdat files 
-#' exported with the sas7bdat engine sometimes cannot be read by SAS® software.
+#' cannot be written or exported with the sas7bdat engine.
 #' In these cases, it is recommended to export to another file format, such
 #' as csv or dbf, and then import into SAS®.}
 #' \item{\strong{xpt}: The SAS® transport file engine.  Transport format is 
@@ -174,6 +174,10 @@ e$env <- parent.frame()
 #' files.  Default is FALSE.
 #' @param log Whether to log the libname operation.  Default is TRUE. This
 #' parameter is used internally.
+#' @param where An expression used to subset all datasets in the library.  
+#' The where clause will be executed when the library is created.  Use the 
+#' Base R \code{\link{expression}} function to define the subset.  If a where clause
+#' is supplied, the library will be opened read-only.  
 #' @return The library object, with all data files loaded into the library
 #' list.  Items in the list will be named according the the file name,
 #' minus the file extension.
@@ -242,7 +246,7 @@ e$env <- parent.frame()
 libname <- function(name, directory_path, engine = "rds", 
                     read_only = FALSE, env = parent.frame(), 
                     import_specs = NULL, filter = NULL, standard_eval = FALSE,
-                    quiet = FALSE, log = TRUE) {
+                    quiet = FALSE, log = TRUE, where = NULL) {
   if (is.null(engine))
     stop("engine parameter cannot be null")
 
@@ -283,6 +287,8 @@ libname <- function(name, directory_path, engine = "rds",
   attr(l, "loaded") <- FALSE
   attr(l, "engine") <- engine
   attr(l, "import_specs") <- import_specs  
+  if (!is.null(where))
+    attr(l, "where") <- paste(as.character(where), collapse = "")
 
 
   # Get the file list according to the engine type
@@ -494,6 +500,11 @@ libname <- function(name, directory_path, engine = "rds",
           warning(paste("The name", nm, "already exists in the library.",
                         "Data will be replaced."))
 
+        if (!is.null(where)) {
+          dat <- tryCatch({subset(dat, eval(where))},
+                          error = function(cond){dat})
+        }
+
         # Set attributes on data frame
         attr(dat, "name") <- nm
         attr(dat, "extension") <- ext
@@ -780,6 +791,8 @@ lib_add <- function(x, ..., name = NULL) {
     else 
       typ <- "rds"
 
+    if (tolower(typ) == "sas7bdat")
+      message("Writing to 'sas7bdat' not supported.")
 
     i <- 1
     for (nm in nms) {
@@ -907,6 +920,8 @@ lib_replace <- function(x, ...,  name = NULL) {
     else 
       typ <- "rds"
 
+    if (tolower(typ) == "sas7bdat")
+      message("Writing to 'sas7bdat' not supported.")
 
     i <- 1
     for (nm in nms) {
@@ -1064,6 +1079,8 @@ lib_remove <- function(x, name) {
 #' behavior, use the \code{force} option to force \code{lib_write} to write
 #' every data file to disk.
 #' 
+#' Note that writing sas7bdat files to disk is not supported.
+#' 
 #' @param x The data library to write. 
 #' @param force Force writing each data file to disk, even if it has not 
 #' changed.
@@ -1136,6 +1153,11 @@ lib_write <- function(x, force = FALSE) {
       x <- lib_sync(x, lbnm)
     }
 
+    if (!is.null(attr(x, "engine"))) {
+      if (attr(x, "engine") == "sas7bdat")
+        message("Writing to 'sas7bdat' not supported.")
+    }
+
     # Get data names
     nms <- names(x)
 
@@ -1377,6 +1399,11 @@ lib_copy <- function(x, nm, directory_path, standard_eval = FALSE) {
   attr(cpy, "name") <- newlib
   attr(cpy, "path") <- directory_path
   attr(cpy, "loaded") <- FALSE
+
+  if (!is.null(attr(x, "engine"))) {
+    if (attr(x, "engine") == "sas7bdat")
+      message("Writing to 'sas7bdat' not supported.")
+  }
 
   # Get list of dataset names
   nms <- names(cpy)
@@ -1479,6 +1506,9 @@ lib_export <- function(x, nm, directory_path, engine,
   if (length(engine) > 1)
     stop("engine parameter does not accept more than one value.")
 
+  if (tolower(engine) == "sas7bdat")
+    message("Export to 'sas7bdat' not supported.")
+
   if (!tolower(engine) %in% c("rds", "rdata", "rda", 
                               "csv", "sas7bdat", "xlsx", "xls", "xpt", "dbf"))
     stop(paste0("Invalid engine parameter value: ", engine))

diff --git a/R/libr.R b/R/libr.R
@@ -37,5 +37,7 @@
 #' indiscriminately.
 #' @import common
 #' @docType package
+#' @aliases libr-package
+#' @keywords internal
 #' @name libr
 NULL
diff --git a/R/utilities.R b/R/utilities.R
@@ -66,6 +66,11 @@ print.lib <- function(x, ..., verbose = FALSE) {
     cat(at)
     cat(paste0("- path: ", attr(x, "path"), "\n"))
 
+    if (!is.null(attr(x, "where"))) {
+
+       cat(paste0("- where: ",  attr(x, "where"), "\n"))
+    }
+
     if (length(x) > 0)
       cat("- items:\n")
 
@@ -220,12 +225,12 @@ writeData <- function(x, ext, file_path, force = FALSE) {
 
   } else if (ext == "sas7bdat") {
 
-    if (!cs_comp | force) {
-      if (file.exists(file_path))
-        file.remove(file_path)
-      write_sas(x, file_path)
-      attr(x, "checksum") <- md5sum(file_path)
-    }
+    # if (!cs_comp | force) {
+    #   if (file.exists(file_path))
+    #     file.remove(file_path)
+    #   write_sas(x, file_path)
+    #   attr(x, "checksum") <- md5sum(file_path)
+    # }
 
   } else if (ext == "dbf") {