From 1084662abeb84697f1b5b93b20c742978557cfe7 Mon Sep 17 00:00:00 2001 From: "David J. Bosak" Date: Fri, 17 Nov 2023 09:40:59 -0500 Subject: [PATCH] Issues #156, #155, #154, #153, #152, #150, #149, #148, #104, #136 --- DESCRIPTION | 4 +- NEWS.md | 8 + R/datastep.R | 113 ++-- R/dshelpers.R | 36 +- R/libname.R | 34 +- R/libr.R | 2 + R/utilities.R | 17 +- README.md | 4 +- _pkgdown.yml | 2 + docs/404.html | 159 ++--- docs/articles/index.html | 149 ++--- docs/articles/libr-basics.html | 51 +- docs/articles/libr-datastep.html | 369 +++++++----- docs/articles/libr-disclaimer.html | 27 +- docs/articles/libr-example1.html | 129 ++-- docs/articles/libr-example2.html | 157 ++--- docs/articles/libr-faq.html | 202 ++++--- docs/articles/libr-management.html | 47 +- docs/articles/libr.html | 147 +++-- docs/authors.html | 157 ++--- docs/index.html | 164 +++--- docs/news/index.html | 369 +++--------- docs/pkgdown.css | 83 +-- docs/pkgdown.js | 4 +- docs/pkgdown.yml | 6 +- docs/reference/datastep.html | 883 +++++++++++++--------------- docs/reference/delete.html | 203 ++----- docs/reference/dictionary.html | 215 +++---- docs/reference/dsarray.html | 258 +++----- docs/reference/dsattr.html | 327 ++++------ docs/reference/import_spec.html | 201 ++----- docs/reference/index.html | 313 ++-------- docs/reference/is.lib.html | 215 +++---- docs/reference/length.dsarray.html | 197 ++----- docs/reference/lib_add.html | 250 +++----- docs/reference/lib_copy.html | 259 +++----- docs/reference/lib_delete.html | 244 +++----- docs/reference/lib_export.html | 279 ++++----- docs/reference/lib_info.html | 235 +++----- docs/reference/lib_load.html | 253 +++----- docs/reference/lib_path.html | 221 +++---- docs/reference/lib_remove.html | 251 +++----- docs/reference/lib_replace.html | 254 +++----- docs/reference/lib_size.html | 227 +++---- docs/reference/lib_sync.html | 259 +++----- docs/reference/lib_unload.html | 255 +++----- docs/reference/lib_write.html | 279 ++++----- docs/reference/libname.html | 464 +++++++-------- docs/reference/libr.html | 166 ++---- docs/reference/output.html | 291 ++++----- docs/reference/print.lib.html | 249 +++----- docs/reference/print.specs.html | 193 ++---- docs/reference/read.specs.html | 171 ++---- docs/reference/specs.html | 333 ++++------- docs/reference/sub-.dsarray.html | 263 +++------ docs/reference/write.specs.html | 191 ++---- docs/sitemap.xml | 63 +- man/datastep.Rd | 16 +- man/lib_write.Rd | 2 + man/libname.Rd | 10 +- man/libr.Rd | 2 + tests/testthat/data/demo_studya.dbf | Bin 0 -> 1621 bytes tests/testthat/data/demo_studya.xpt | Bin 0 -> 2720 bytes tests/testthat/data/demo_studyb.dbf | Bin 0 -> 581 bytes tests/testthat/data/demo_studyb.xpt | Bin 0 -> 2160 bytes tests/testthat/data/vect.rds | Bin 0 -> 93 bytes tests/testthat/test-datastep.R | 77 +++ tests/testthat/test-libname.R | 42 +- tests/testthat/test-manipulation.R | 34 +- vignettes/libr-datastep.Rmd | 110 +++- vignettes/libr-faq.Rmd | 100 +++- vignettes/libr.Rmd | 60 +- 72 files changed, 4588 insertions(+), 6767 deletions(-) create mode 100644 tests/testthat/data/demo_studya.dbf create mode 100644 tests/testthat/data/demo_studya.xpt create mode 100644 tests/testthat/data/demo_studyb.dbf create mode 100644 tests/testthat/data/demo_studyb.xpt create mode 100644 tests/testthat/data/vect.rds diff --git a/DESCRIPTION b/DESCRIPTION index 6712dc9..659c5c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: libr Type: Package Title: Libraries, Data Dictionaries, and a Data Step for R -Version: 1.2.8 +Version: 1.2.9 Author: David J. Bosak Maintainer: David Bosak Description: Contains a set of functions to create data libraries, @@ -38,7 +38,7 @@ Imports: tools, Rcpp, data.table -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 VignetteBuilder: knitr LinkingTo: Rcpp diff --git a/NEWS.md b/NEWS.md index fd60d09..9eb8a40 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# libr 1.2.9 +* Fixed bug on `datastep()` when there are spaces in the column names +and output() function is used. +* Some improvements to `datastep()` performance. +* Send message on writing "sas7bdat" that functionality is not available. +* Added where clause parameter to `libname()`. +* Added automatic variables "first.X" and "last.X" for each by variable. + # libr 1.2.8 * Fixed bug on datastep that sometimes was causing variables to lose their attributes. * Added "where" parameter to datastep. diff --git a/R/datastep.R b/R/datastep.R index f0fc8d1..2d8b2a3 100644 --- a/R/datastep.R +++ b/R/datastep.R @@ -41,6 +41,13 @@ e$output <- list() #' of the data step. If you wish to keep the automatic variable values, #' assign the automatic variable to a new variable and keep that variable. #' +#' If there are multiple by group variables, the \code{first.} and \code{last.} +#' automatic variables indicates a either/or combination of all by variables. +#' In addition, +#' \code{first.X} and \code{last.X} automatic variables will be created for +#' each variable, where "X" represents the name of the specified variable. +#' As always, these names are case-sensitive. +#' #' @section Column Attributes: #' To set attributes for a column on your data, use the \code{attrib} #' parameter. Example attributes include 'label', 'description', @@ -187,6 +194,8 @@ e$output <- list() #' This parameter will activate the \code{first.} and \code{last.} automatic #' variables, that indicate the first or last rows in a group. These #' automatic variables are useful for conditional processing on groups. +#' The function will also create first and last automatic variables for each +#' variable specified in the by group. #' @param calculate Steps to set up calculated variables. #' Calculated variables are commonly generated with summary functions such as #' \code{mean}, \code{median}, \code{min}, \code{max}, etc. It is more @@ -239,8 +248,9 @@ e$output <- list() #' @param merge A dataset or list of datasets to merge with the input #' data. The merge operation will occur at the beginning of the datastep, #' prior to the execution of any steps. When the \code{merge} operation is -#' requested, the \code{by} parameter will be used to indicate which variable(s) -#' to merge by. +#' requested, the \code{merge_by} parameter will be used to indicate which variable(s) +#' to merge by. If no \code{merge_by} is specified, the merge dataset columns will +#' simply be appended to the right of the input dataset. #' @param merge_by If the \code{merge} parameter is set, the \code{merge_by} #' parameter will be used to identify the variable(s) to merge by. If merge #' variables are the same on both datasets, the names may be passed as a simple @@ -701,55 +711,66 @@ datastep <- function(data, steps, keep = NULL, rowcount <- nrow(data) } - # Step through row by row - for (n. in seq_len(rowcount)) { - - # Subset by row - rw <- data[n., , drop = FALSE] - - # Put back any attributes dropped during row subset - rw <- copy_attributes(data_attributes, rw) + # If there is no code to step through + if (length(as.character(code)) == 1) { + # Just set original dataset + ret <- data + } else { - # Deal with retained variables - if (!is.null(retain)) { - if (length(ret) == 0) { - for (nm in names(retain)) { - - # Populate with initial value - rw[[nm]] <- retain[[nm]] - - } - - } else { - for (nm in names(retain)) { - - # Populate with value from previous row - #data[n., nm] <- ret[n. - 1, nm] way backup - - rw[[nm]] <- ret[[n. - 1]][[nm]] # current - + # Step through row by row + for (n. in seq_len(rowcount)) { + + # Subset by row + rw <- data[n., , drop = FALSE] + + # Put back any attributes dropped during row subset + rw <- copy_attributes(data_attributes, rw) + + + + # Deal with retained variables + if (!is.null(retain)) { + if (length(ret) == 0) { + for (nm in names(retain)) { + + # Populate with initial value + rw[[nm]] <- retain[[nm]] + + } + } else { + for (nm in names(retain)) { + + # Populate with value from previous row + #data[n., nm] <- ret[n. - 1, nm] way backup + + rw[[nm]] <- ret[[n. - 1]][[nm]] # current + + + } } } + + + # Evaluate the code for the row + ret[[n.]] <- within(rw, eval(code), keepAttrs = TRUE) + + } - - # Evaluate the code for the row - ret[[n.]] <- within(rw, eval(code), keepAttrs = TRUE) - - - } + # Bind all rows + if (hout) { + ret <- bind_rows(e$output, .id = "column_label") + + } else { + ret <- bind_rows(ret, .id = "column_label") + } + ret["column_label"] <- NULL - # Bind all rows - if (hout) { - ret <- bind_rows(e$output, .id = "column_label") - - } else { - ret <- bind_rows(ret, .id = "column_label") } - ret["column_label"] <- NULL + # Delete @@ -770,9 +791,7 @@ datastep <- function(data, steps, keep = NULL, ret <- ret[ ,c(orgnms, rtnms[!rtnms %in% orgnms])] # Remove automatic variables - ret["first."] <- NULL - ret["last."] <- NULL - ret["..delete"] <- NULL + ret <- remove_autos(ret, by) # Perform drop operation if (!is.null(drop)) { @@ -1020,7 +1039,11 @@ output <- function() { nlst[["..delete"]] <- pf$..delete # Convert to data frame and append to output list - e$output[[length(e$output) + 1]] <- as.data.frame(nlst) + e$output[[length(e$output) + 1]] <- as.data.frame(nlst, + stringsAsFactors = FALSE, + make.names = FALSE, + optional = FALSE, + check.names = FALSE) } diff --git a/R/dshelpers.R b/R/dshelpers.R index ce8f539..873f127 100644 --- a/R/dshelpers.R +++ b/R/dshelpers.R @@ -26,14 +26,22 @@ add_autos <- function(df, groups = NULL, sort_check = FALSE) { # Clear out any names on input #names(res) <- NULL - df["first."] <- byfirst(res) - df["last."] <- bylast(res) + df[["first."]] <- byfirst(res) + df[["last."]] <- bylast(res) + + # Add first and last for each by variable + for (nm in groups) { + + df[[paste0("first.", nm)]] <- byfirst(df[[nm]]) + df[[paste0("last.", nm)]] <- bylast(df[[nm]]) + + } } else { if (nrow(df) > 0) { - df["first."] <- c(TRUE, rep(FALSE, times = nrow(df) - 1)) - df["last."] <- c(rep(FALSE, times = nrow(df) - 1), TRUE) + df[["first."]] <- c(TRUE, rep(FALSE, times = nrow(df) - 1)) + df[["last."]] <- c(rep(FALSE, times = nrow(df) - 1), TRUE) } } @@ -69,3 +77,23 @@ add_autos <- function(df, groups = NULL, sort_check = FALSE) { } + +remove_autos <- function(data, groups) { + + ret <- data + + ret[["first."]] <- NULL + ret[["last."]] <- NULL + ret[["..delete"]] <- NULL + + if (!is.null(groups)) { + for (nm in groups) { + + ret[[paste0("first.", nm)]] <- NULL + ret[[paste0("last.", nm)]] <- NULL + + } + } + + return(ret) +} diff --git a/R/libname.R b/R/libname.R index 65f2e25..22a1021 100644 --- a/R/libname.R +++ b/R/libname.R @@ -89,7 +89,7 @@ e$env <- parent.frame() #' need to define import specifications for SAS® datasets. The sas7bdat engine #' interprets empty strings, single blanks, and a single dot (".") as missing #' values. While the import of SAS® datasets is fairly reliable, sas7bdat files -#' exported with the sas7bdat engine sometimes cannot be read by SAS® software. +#' cannot be written or exported with the sas7bdat engine. #' In these cases, it is recommended to export to another file format, such #' as csv or dbf, and then import into SAS®.} #' \item{\strong{xpt}: The SAS® transport file engine. Transport format is @@ -174,6 +174,10 @@ e$env <- parent.frame() #' files. Default is FALSE. #' @param log Whether to log the libname operation. Default is TRUE. This #' parameter is used internally. +#' @param where An expression used to subset all datasets in the library. +#' The where clause will be executed when the library is created. Use the +#' Base R \code{\link{expression}} function to define the subset. If a where clause +#' is supplied, the library will be opened read-only. #' @return The library object, with all data files loaded into the library #' list. Items in the list will be named according the the file name, #' minus the file extension. @@ -242,7 +246,7 @@ e$env <- parent.frame() libname <- function(name, directory_path, engine = "rds", read_only = FALSE, env = parent.frame(), import_specs = NULL, filter = NULL, standard_eval = FALSE, - quiet = FALSE, log = TRUE) { + quiet = FALSE, log = TRUE, where = NULL) { if (is.null(engine)) stop("engine parameter cannot be null") @@ -283,6 +287,8 @@ libname <- function(name, directory_path, engine = "rds", attr(l, "loaded") <- FALSE attr(l, "engine") <- engine attr(l, "import_specs") <- import_specs + if (!is.null(where)) + attr(l, "where") <- paste(as.character(where), collapse = "") # Get the file list according to the engine type @@ -494,6 +500,11 @@ libname <- function(name, directory_path, engine = "rds", warning(paste("The name", nm, "already exists in the library.", "Data will be replaced.")) + if (!is.null(where)) { + dat <- tryCatch({subset(dat, eval(where))}, + error = function(cond){dat}) + } + # Set attributes on data frame attr(dat, "name") <- nm attr(dat, "extension") <- ext @@ -780,6 +791,8 @@ lib_add <- function(x, ..., name = NULL) { else typ <- "rds" + if (tolower(typ) == "sas7bdat") + message("Writing to 'sas7bdat' not supported.") i <- 1 for (nm in nms) { @@ -907,6 +920,8 @@ lib_replace <- function(x, ..., name = NULL) { else typ <- "rds" + if (tolower(typ) == "sas7bdat") + message("Writing to 'sas7bdat' not supported.") i <- 1 for (nm in nms) { @@ -1064,6 +1079,8 @@ lib_remove <- function(x, name) { #' behavior, use the \code{force} option to force \code{lib_write} to write #' every data file to disk. #' +#' Note that writing sas7bdat files to disk is not supported. +#' #' @param x The data library to write. #' @param force Force writing each data file to disk, even if it has not #' changed. @@ -1136,6 +1153,11 @@ lib_write <- function(x, force = FALSE) { x <- lib_sync(x, lbnm) } + if (!is.null(attr(x, "engine"))) { + if (attr(x, "engine") == "sas7bdat") + message("Writing to 'sas7bdat' not supported.") + } + # Get data names nms <- names(x) @@ -1377,6 +1399,11 @@ lib_copy <- function(x, nm, directory_path, standard_eval = FALSE) { attr(cpy, "name") <- newlib attr(cpy, "path") <- directory_path attr(cpy, "loaded") <- FALSE + + if (!is.null(attr(x, "engine"))) { + if (attr(x, "engine") == "sas7bdat") + message("Writing to 'sas7bdat' not supported.") + } # Get list of dataset names nms <- names(cpy) @@ -1479,6 +1506,9 @@ lib_export <- function(x, nm, directory_path, engine, if (length(engine) > 1) stop("engine parameter does not accept more than one value.") + if (tolower(engine) == "sas7bdat") + message("Export to 'sas7bdat' not supported.") + if (!tolower(engine) %in% c("rds", "rdata", "rda", "csv", "sas7bdat", "xlsx", "xls", "xpt", "dbf")) stop(paste0("Invalid engine parameter value: ", engine)) diff --git a/R/libr.R b/R/libr.R index f80504c..8d8ce76 100644 --- a/R/libr.R +++ b/R/libr.R @@ -37,5 +37,7 @@ #' indiscriminately. #' @import common #' @docType package +#' @aliases libr-package +#' @keywords internal #' @name libr NULL diff --git a/R/utilities.R b/R/utilities.R index f259d7b..8df33bb 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -66,6 +66,11 @@ print.lib <- function(x, ..., verbose = FALSE) { cat(at) cat(paste0("- path: ", attr(x, "path"), "\n")) + if (!is.null(attr(x, "where"))) { + + cat(paste0("- where: ", attr(x, "where"), "\n")) + } + if (length(x) > 0) cat("- items:\n") @@ -220,12 +225,12 @@ writeData <- function(x, ext, file_path, force = FALSE) { } else if (ext == "sas7bdat") { - if (!cs_comp | force) { - if (file.exists(file_path)) - file.remove(file_path) - write_sas(x, file_path) - attr(x, "checksum") <- md5sum(file_path) - } + # if (!cs_comp | force) { + # if (file.exists(file_path)) + # file.remove(file_path) + # write_sas(x, file_path) + # attr(x, "checksum") <- md5sum(file_path) + # } } else if (ext == "dbf") { diff --git a/README.md b/README.md index 2c1fa35..9fea455 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@ [![libr lifecycle](https://img.shields.io/badge/lifecycle-stable-blue.svg)](https://cran.r-project.org/package=libr) [![libr downloads](https://cranlogs.r-pkg.org/badges/libr)](https://cran.r-project.org/package=libr) [![libr total downloads](https://cranlogs.r-pkg.org/badges/grand-total/libr)](https://cran.r-project.org/package=libr) -[![R-CMD-check](https://github.com/dbosak01/libr/workflows/R-CMD-check/badge.svg)](https://github.com/dbosak01/libr/actions) +[![R-CMD-check](https://github.com/dbosak01/libr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/dbosak01/libr/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/dbosak01/libr/branch/master/graph/badge.svg)](https://app.codecov.io/gh/dbosak01/libr?branch=master) # Introduction to **libr** - + R is a very flexible and powerful language. But there are some inconveniences when working with data: diff --git a/_pkgdown.yml b/_pkgdown.yml index 33d1404..bfd0429 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -10,6 +10,7 @@ articles: - libr-disclaimer - libr-example1 - libr-example2 + - libr-faq reference: - title: Library Functions @@ -59,6 +60,7 @@ navbar: type: inverse structure: right: [home, intro, articles, reference, faq, news, github] + left: [] components: faq: text: FAQ diff --git a/docs/404.html b/docs/404.html index 46a5ef4..84315d3 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,76 +1,34 @@ - - - - + + + + - Page not found (404) • libr - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + - - - - - + + + - - -
+
+
-
- +
+ + - - diff --git a/docs/articles/index.html b/docs/articles/index.html index 57b9900..ff527b8 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,76 +1,12 @@ - - - - - - - -Articles • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Articles • libr - - - - + + -
-
- -
- - -
- +
- - + + diff --git a/docs/articles/libr-basics.html b/docs/articles/libr-basics.html index f032668..18e804b 100644 --- a/docs/articles/libr-basics.html +++ b/docs/articles/libr-basics.html @@ -26,6 +26,8 @@ + +
+
@@ -126,9 +125,9 @@

Basic Library Operations

entire directory of data files. The library can then be manipulated as a whole using the lib_* functions in the libr package.

-
-

-Basic Library Operations

+
+

Basic Library Operations +

There are four main libr functions for creating and using a data library:

    @@ -166,15 +165,15 @@

    from the temp directory, loads it into memory, adds data to it, and then unloads and writes everything to disk:

    -library(libr)
    +library(libr)
     
     # Create temp directory
    -tmp <- tempdir()
    +tmp <- tempdir()
     
     # Save some data to temp directory
     # for illustration purposes
    -saveRDS(trees, file.path(tmp, "trees.rds"))
    -saveRDS(rock, file.path(tmp, "rocks.rds"))
    +saveRDS(trees, file.path(tmp, "trees.rds"))
    +saveRDS(rock, file.path(tmp, "rocks.rds"))
     
     # Create library
     libname(dat, tmp)
    @@ -203,11 +202,11 @@ 

    lib_load(dat) # Examine workspace -ls() +ls() # [1] "dat" "dat.rocks" "dat.trees" "tmp" # Use data from the library -summary(dat.rocks) +summary(dat.rocks) # area peri shape perm # Min. : 1016 Min. : 308.6 Min. :0.09033 Min. : 6.30 # 1st Qu.: 5305 1st Qu.:1414.9 1st Qu.:0.16226 1st Qu.: 76.45 @@ -217,7 +216,7 @@

    # Max. :12212 Max. :4864.2 Max. :0.46413 Max. :1300.00 # Add data to the library -dat.trees_subset <- subset(dat.trees, Girth > 11) +dat.trees_subset <- subset(dat.trees, Girth > 11) # Add more data to the library dat.cars <- mtcars @@ -226,7 +225,7 @@

    lib_unload(dat) # Examine workspace again -ls() +ls() # [1] "dat" "tmp" # Write the library to disk @@ -245,7 +244,7 @@

    lib_delete(dat) # Examine workspace again -ls() +ls() # [1] "tmp"

    Next: Library Management

@@ -260,11 +259,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -273,5 +274,7 @@

+ + diff --git a/docs/articles/libr-datastep.html b/docs/articles/libr-datastep.html index 20faa8e..5cec620 100644 --- a/docs/articles/libr-datastep.html +++ b/docs/articles/libr-datastep.html @@ -26,6 +26,8 @@ + +
+
@@ -126,12 +125,12 @@

Data Step Operations

style of data processing. It is particularly advantageous when you wish to perform deeply nested conditional logic. It is also very useful for by-group processing.

-
-

-Example 1: Simple Data Step

+
+

Example 1: Simple Data Step +

Here is an example of a simple data step:

-library(libr)
+library(libr)
 
 # Add some columns to mtcars using data step logic
 df <- datastep(mtcars[1:10, 1:3], {
@@ -141,7 +140,7 @@ 

else mpgcat <- "Low" - recdt <- as.Date("1974-06-10") + recdt <- as.Date("1974-06-10") if (cyl == 8) is8cyl <- TRUE @@ -162,31 +161,31 @@

# Merc 230 22.8 4 140.8 1974-06-10 High NA # Merc 280 19.2 6 167.6 1974-06-10 Low NA

-
-

-Keep, Drop, and Rename

+
+

Keep, Drop, and Rename +

The data step has parameters to perform basic shaping of the resulting data frame. These parameters are ‘keep’, ‘drop’, and ‘rename’. For example, the above data step could have been performed by sending all columns into the data step, and keeping only the desired columns. Using the keep parameter also allows you to order the resulting columns.

-
-

-Example 2: Keeping Data Step Variables

+
+

Example 2: Keeping Data Step Variables +

-library(libr)
+library(libr)
 
 # Keep and order output columns 
 df <- datastep(mtcars[1:10,], 
-  keep = c("mpg", "cyl", "disp", "mpgcat", "recdt"), {
+  keep = c("mpg", "cyl", "disp", "mpgcat", "recdt"), {
 
     if (mpg >= 20) 
       mpgcat <- "High"
     else 
       mpgcat <- "Low"
       
-    recdt <- as.Date("1974-06-10")
+    recdt <- as.Date("1974-06-10")
     
     if (cyl == 8)
       is8cyl <- TRUE
@@ -207,24 +206,24 @@ 

# Merc 280 19.2 6 167.6 Low 1974-06-10

-
-

-The Retain Parameter

+
+

The Retain Parameter +

The retain parameter allows you to define variables that will be seeded with the value from the previous step. The retain option is useful for creating cumulative values or for performing conditions based on the value of the previous row.

-
-

-Example 3: Drop, Retain, and Rename Parameters

+
+

Example 3: Drop, Retain, and Rename Parameters +

-library(libr)
+library(libr)
 
 df <- datastep(mtcars[1:10, ],
-               drop = c("disp", "hp", "drat", "qsec",
+               drop = c("disp", "hp", "drat", "qsec",
                         "vs", "am", "gear", "carb"),
-               retain = list(cumwt = 0 ),
-               rename = c(mpg = "MPG", cyl = "Cylinders", wt = "Wgt",
+               retain = list(cumwt = 0 ),
+               rename = c(mpg = "MPG", cyl = "Cylinders", wt = "Wgt",
                           cumwt = "Cumulative Wgt"), {
 
   cumwt <- cumwt + wt
@@ -245,9 +244,9 @@ 

# Merc 280 19.2 6 3.440 31.280

-
-

-By Group Processing

+
+

By Group Processing +

The datastep() function also has the capabilities of performing by-group processing. A by-group is accomplished using the by parameter, and passing a vector of column names that @@ -257,16 +256,16 @@

your data must be sorted properly before sending it into the data step. To turn the sort check off, set the sort_check parameter to FALSE.

-
-

-Example 4: By Groups

+
+

Example 4: By Groups +

-library(libr)
+library(libr)
 
 # Identify start and end of by-groups
 df <- datastep(mtcars[1:10,], 
-  keep = c("mpg", "cyl", "gear", "grp"), 
-  by = c("gear"), sort_check = FALSE, {
+  keep = c("mpg", "cyl", "gear", "grp"), 
+  by = c("gear"), sort_check = FALSE, {
 
     if (first. & last.)
       grp <- "Start - End"
@@ -293,9 +292,68 @@ 

# Merc 280 19.2 6 4 End

-
-

-Using Summary Functions

+
+

By Group Processing of Multiple Variables +

+

If desired, you can pass multiple variables on the by +parameter. When there are multiple by groups, the first. +and last. automatic variables described above will +represent an “or” combination of values for all by-variables. In +addition, automatic variables will be created for each variable in the +by group, similar to SAS®. Observe:

+
+

Example 5: Multiple By Groups +

+
+library(libr)
+
+# Create sample data
+df <- data.frame(HairEyeColor)[seq(2, 32, 2), ]
+
+# Sort by groups
+df <- sort(df, by = c("Sex", "Hair"))
+
+# Identify start and end of by-groups
+df2 <- datastep(df,
+   drop = c("Eye", "Freq"),
+   by = c("Sex", "Hair"), {
+
+    fSex <- first.Sex
+    lSex <- last.Sex
+    fHair <- first.Hair
+    lHair <- last.Hair
+
+  })
+  
+df2
+#     Hair    Sex  fSex  lSex fHair lHair
+# 1  Brown   Male  TRUE FALSE  TRUE FALSE
+# 2  Brown   Male FALSE FALSE FALSE FALSE
+# 3  Brown   Male FALSE FALSE FALSE FALSE
+# 4  Brown   Male FALSE FALSE FALSE  TRUE
+# 5  Blond   Male FALSE FALSE  TRUE FALSE
+# 6  Blond   Male FALSE FALSE FALSE FALSE
+# 7  Blond   Male FALSE FALSE FALSE FALSE
+# 8  Blond   Male FALSE  TRUE FALSE  TRUE
+# 9  Brown Female  TRUE FALSE  TRUE FALSE
+# 10 Brown Female FALSE FALSE FALSE FALSE
+# 11 Brown Female FALSE FALSE FALSE FALSE
+# 12 Brown Female FALSE FALSE FALSE  TRUE
+# 13 Blond Female FALSE FALSE  TRUE FALSE
+# 14 Blond Female FALSE FALSE FALSE FALSE
+# 15 Blond Female FALSE FALSE FALSE FALSE
+# 16 Blond Female FALSE  TRUE FALSE  TRUE
+

The above first.Sex, last.Sex, +first.Hair, and last.Hair variables may also +be used in conditions, functions, or any other expression inside your +datastep. Note that like first. and last. they +are dropped automatically at the end of the datastep. If you want to +retain their values, assign them to a new variable as shown above.

+
+
+
+

Using Summary Functions +

There may be times when you want to combine row-by-row conditional processing with column-by-column vector operations. For example, let’s say you want to calculate a mean and then perform conditional processing @@ -304,16 +362,16 @@

function. The function will execute the calculate block first, add any assigned variables to the data frame, and then execute the data step. Below is an example of such a scenario:

-
-

-Example 5: Calculate Block

-
-library(libr)
+
+

Example 6: Calculate Block +

+
+library(libr)
 
 # Categorize mpg as above or below the mean
 df <- datastep(mtcars, 
-  keep = c("mpg", "cyl", "mean_mpg", "mpgcat"), 
-  calculate = { mean_mpg = mean(mpg) },
+  keep = c("mpg", "cyl", "mean_mpg", "mpgcat"), 
+  calculate = { mean_mpg = mean(mpg) },
   {
 
     if (mpg >= mean_mpg)
@@ -337,30 +395,29 @@ 

# Merc 280 19.2 6 20.09062 Low

-
-

-Data Steps with dplyr +
+

Data Steps with dplyr

Note that the datastep() function is pipe-friendly, and can be combined with dplyr functions in a data pipeline. Also note that the datastep() function will -recognize any group attributes added by the group_by() +recognize any group attributes added by the group_by() function. Therefore, within a dplyr pipeline, it is not necessary to use any datastep parameters. The following example recreates the above data frame from Example 5, but with a dplyr pipeline.

-
-

-Example 6: Data Pipeline

-
-library(libr)
-library(dplyr)
-library(magrittr)
+
+

Example 7: Data Pipeline +

+
+library(libr)
+library(dplyr)
+library(magrittr)
 
 # Add datastep to dplyr pipeline
-df <- mtcars %>% 
-  select(mpg, cyl, gear) %>% 
-  mutate(mean_mpg = mean(mpg)) %>% 
+df <- mtcars %>% 
+  select(mpg, cyl, gear) %>% 
+  mutate(mean_mpg = mean(mpg)) %>% 
   datastep({
 
     if (mpg >= mean_mpg)
@@ -368,8 +425,8 @@ 

else mpgcat <- "Low" - }) %>% - filter(row_number() <= 10) + }) %>% + filter(row_number() <= 10) df # mpg cyl gear mean_mpg mpgcat @@ -385,9 +442,9 @@

# 10 19.2 6 4 20.09062 Low

-
-

-Data Attributes

+
+

Data Attributes +

The libr package recognizes several useful data attributes that are not normally recognized by other R functions. For example, it is very convenient to assign label and @@ -397,17 +454,17 @@

attrib parameter that allows you to supply such attributes as part of a data step. Attributes are assigned with a named list and the dsattr() object.

-
-

-Example 7: Attributes

-
-library(libr)
+
+

Example 8: Attributes +

+
+library(libr)
 
 # Assign label attributes to all columns
 df <- datastep(mtcars[1:10, ], 
-               keep = c("mpg", "cyl", "mpgcat"),
-               calculate = { mean_mpg = mean(mpg) },
-               attrib = list(mpg = dsattr(label = "Miles Per Gallon"),
+               keep = c("mpg", "cyl", "mpgcat"),
+               calculate = { mean_mpg = mean(mpg) },
+               attrib = list(mpg = dsattr(label = "Miles Per Gallon"),
                              cyl = dsattr(label = "Cylinders"),
                              mpgcat = dsattr(label = "Mileage Category")), {
 
@@ -428,9 +485,9 @@ 

# 3 df mpgcat character Mileage Category NA NA 4 NA 10 0

-
-

-Data Step Array

+
+

Data Step Array +

As mentioned previously, R typically operates in a column-wise manner. That is, R processes data column-by-column. But what if you need to get a sum or mean across a row?

@@ -443,7 +500,7 @@

To see the array in action, we’ll use the AirPassengers sample data. This data shows international airline passengers by month between 1949 and 1960. The data looks like this:

-
+
 AirPassengers
 #      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
 # 1949 112 118 132 129 121 135 148 148 136 119 104 118
@@ -463,29 +520,29 @@ 

extract values. You can use the indexer to extract a single value or a subset of values. An empty indexer will return all the values in the array.

-
-

-Example 8: Using a Data Step Array

-
-library(libr)
+
+

Example 9: Using a Data Step Array +

+
+library(libr)
 
 # Create AirPassengers Data Frame
-df <- as.data.frame(t(matrix(AirPassengers, 12,
-                    dimnames = list(month.abb, seq(1949, 1960)))), 
+df <- as.data.frame(t(matrix(AirPassengers, 12,
+                    dimnames = list(month.abb, seq(1949, 1960)))), 
                     stringsAsFactors = FALSE)
 
 # Use datastep array to get year tot, mean, and top month
 dat <- datastep(df,
-                arrays = list(months = dsarray(names(df))),
-                attrib = list(Tot = 0, Mean = 0, Top = ""),
+                arrays = list(months = dsarray(names(df))),
+                attrib = list(Tot = 0, Mean = 0, Top = ""),
                 drop = "mth",
                 {
 
-                  Tot <- sum(months[])
-                  Mean <- mean(months[])
+                  Tot <- sum(months[])
+                  Mean <- mean(months[])
 
                   for (mth in months) {
-                    if (months[mth] == max(months[])) {
+                    if (months[mth] == max(months[])) {
                       Top <- mth
                     }
                   }
@@ -511,22 +568,22 @@ 

are hard to calculate otherwise.

-
-

-Filtering and Duplicating Rows

+
+

Filtering and Duplicating Rows +

The datastep provides different ways to control which rows are output.

First, the function has a where parameter to pass a filter expression to the datastep. The where clause will be executed at the end of datastep processing. Pass in the where clause using the -expression() function. Like so:

-
+expression() function. Like so:

+
 # Prepare sample data
-dat <- as.data.frame(HairEyeColor)
+dat <- as.data.frame(HairEyeColor)
 
 # Filter for black hair and blue eyes
 res <- datastep(dat, 
-                where = expression(Hair == "Black" & Eye == "Blue"), 
+                where = expression(Hair == "Black" & Eye == "Blue"), 
                 {})
 
 res
@@ -537,7 +594,7 @@ 

output() functions to remove or duplicate rows from inside the datastep. These functions give you conditional control over which rows are output.

-
+
 # Delete rows with frequencies less than 25
 res1 <- datastep(dat, {
   
@@ -581,24 +638,24 @@ 

datasets from scratch, just like in SAS®. To create a dataset from scratch, simply pass in an empty data frame and output the desired values.

-
+
 # Create metadata 
-res3 <- datastep(data.frame(), {
+res3 <- datastep(data.frame(), {
   
   
           name <- "mtcars"
-          rows <- nrow(mtcars)
-          cols <- ncol(mtcars)
+          rows <- nrow(mtcars)
+          cols <- ncol(mtcars)
           output()
           
           name <- "iris"
-          rows <- nrow(iris)
-          cols <- ncol(iris)
+          rows <- nrow(iris)
+          cols <- ncol(iris)
           output()
 
           name <- "beaver1"
-          rows <- nrow(beaver1)
-          cols <- ncol(beaver1)
+          rows <- nrow(beaver1)
+          cols <- ncol(beaver1)
           output()
           
   
@@ -610,9 +667,9 @@ 

# 2 iris 150 5 # 3 beaver1 114 4

-
-

-Set and Merge Operations

+
+

Set and Merge Operations +

When working with data, joining datasets is an essential activity. While there are many different functions in R to perform joins, the datastep() “set” and “merge” parameters offer unusual @@ -624,9 +681,9 @@

datasets we will create include one “region” dataset, and two “stores” datasets. Note that the columns on the stores datasets are not identical.

-
+
 # Create sample data
-region <- read.table(header = TRUE, text = '
+region <- read.table(header = TRUE, text = '
   REGION   NAME
   R01      East
   R02      West
@@ -635,7 +692,7 @@ 

', stringsAsFactors = FALSE) # First stores dataset -stores1 <- read.table(header = TRUE, text = ' +stores1 <- read.table(header = TRUE, text = ' ID NAME SIZE REGION FRANCHISE A01 "Eastern Lumber" L R01 T A02 "Tri-City Hardwood" M R02 F @@ -643,7 +700,7 @@

', stringsAsFactors = FALSE) # Extra column on this one -stores2 <- read.table(header = TRUE, text = ' +stores2 <- read.table(header = TRUE, text = ' ID NAME SIZE REGION A03 "AAA Mills" S R05 A04 "Home and Yard" L R03 @@ -651,7 +708,7 @@

Despite not having the same columns, the two stores datasets can be set using the datastep() function. The function will fill in the missing values automatically. Like so:

-
+
 # Set operation
 allstores <- datastep(stores1, set = stores2, {})
 
@@ -666,16 +723,16 @@ 

Let’s pretend we noticed the missing data, and decide to fill it in. We can do that by merging the missing FRANCHISE values to the second stores dataset. First let’s create the missing data:

-
+
 # Create small dataset of missing FRANCHISE values
-franchises <- data.frame(FRANCHISE = c(F, F), stringsAsFactors = FALSE) 
+franchises <- data.frame(FRANCHISE = c(F, F), stringsAsFactors = FALSE) 
 franchises
 #   FRANCHISE
 # 1     FALSE
 # 2     FALSE

Next we can merge in the missing data on “stores2”, and set the two store datasets again:

-
+
 # Merge in missing FRANCHISE column
 stores2mod <- datastep(stores2, merge = franchises, {})
 stores2mod
@@ -702,12 +759,12 @@ 

condition. We will join in the store regions by the region ID. We will also set up merge flags so we can see which rows were in which input dataset.

-
+
 
 # Merge operation - Outer Join
 res <- datastep(allstores, merge = region,
                 merge_by = "REGION",
-                merge_in = c("inA", "inB"), {})
+                merge_in = c("inA", "inB"), {})
 
 # View results
 res
@@ -731,15 +788,15 @@ 

“where” expression. Also, we can recode “R05” to “R04” inside the datastep. Finally, we’ll drop the merge flags to clean up the columns.

-
+
 
 # Merge operation - Left join and clean up
 res <- datastep(allstores, merge = region,
                 merge_by = "REGION",
-                merge_in = c("inA", "inB"), 
-                rename = c(NAME.1 = "STORE_NAME", NAME.2 = "REGION_NAME"),
-                where = expression(inA == TRUE),
-                drop = c("inA", "inB"),
+                merge_in = c("inA", "inB"), 
+                rename = c(NAME.1 = "STORE_NAME", NAME.2 = "REGION_NAME"),
+                where = expression(inA == TRUE),
+                drop = c("inA", "inB"),
                 {
                   if (REGION == "R05") {
                     REGION <- "R04"
@@ -757,6 +814,50 @@ 

# 3 A02 Tri-City Hardwood M R02 FALSE West # 4 A04 Home and Yard L R03 FALSE North # 5 A03 AAA Mills S R04 FALSE South

+
+
+

Datastep Performance +

+

One weakness of the libr datastep() +function is performance. The function is far slower than the equivalent +SAS® datastep. The performance profile may limit the number of records +you are able to reasonably process with the datastep().

+

One thing you can do to increase performance is to reduce the number +of rows and columns on the input data. You can perform this +pre-filtering with Base R or Tidyverse functions. This +strategy is particularly recommended if you were planning to subset the +data anyway using the “where” or “keep” options.

+

The Base R subset() function is convenient to use +because it is always available. Here is an example showing how to reduce +the size of the iris sample dataframe using Base R +subset() before sending it to a datastep. #### Example 10: +Increasing Performance

+
# Subset the input dataset first for only needed rows and columns
+dat <- subset(iris, Species == 'versicolor', c('Petal.Length', 'Petal.Width')) |> 
+       datastep({
+       
+         if (Petal.Length < 3.5)
+            Petal.Size <- "Short"
+         else if (Petal.Length > 4.5)
+            Petal.Size <- "Long"
+         else
+            Petal.Size <- "Medium"
+       
+       })
+
+# View Some Results
+dat[1:10, ]
+#    Petal.Length Petal.Width Petal.Size
+# 1           4.7         1.4       Long
+# 2           4.5         1.5     Medium
+# 3           4.9         1.5       Long
+# 4           4.0         1.3     Medium
+# 5           4.6         1.5       Long
+# 6           4.5         1.3     Medium
+# 7           4.7         1.6       Long
+# 8           3.3         1.0      Short
+# 9           4.6         1.3       Long
+# 10          3.9         1.4     Medium

Next: Disclaimer

@@ -770,11 +871,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -783,5 +886,7 @@

+ + diff --git a/docs/articles/libr-disclaimer.html b/docs/articles/libr-disclaimer.html index d9c5c43..2ee0203 100644 --- a/docs/articles/libr-disclaimer.html +++ b/docs/articles/libr-disclaimer.html @@ -26,6 +26,8 @@ + +
+
@@ -136,11 +135,13 @@

Disclaimer

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -149,5 +150,7 @@

Disclaimer

+ + diff --git a/docs/articles/libr-example1.html b/docs/articles/libr-example1.html index db112f2..a9f7eaa 100644 --- a/docs/articles/libr-example1.html +++ b/docs/articles/libr-example1.html @@ -26,6 +26,8 @@ + +
+
-
-

-Program

+
+

Program +

The previous examples in the libr documentation were intentionally simplified to focus on the workings of a particular function. It is helpful, however, to also view libr @@ -132,24 +131,24 @@

create a report.

The data for this example has been included in the libr package as an external data file. It may be -accessed using the system.file() function as shown below, -or downloaded directly from the libr GitHub site here

+accessed using the system.file() function as shown below, +or downloaded directly from the libr GitHub site here

-library(tidyverse)
-library(sassy)
+library(tidyverse)
+library(sassy)
 
 
 # Prepare Log -------------------------------------------------------------
 
 
-options("logr.autolog" = TRUE,
+options("logr.autolog" = TRUE,
         "logr.notes" = FALSE)
 
 # Get temp location for log and report output
-tmp <- tempdir()
+tmp <- tempdir()
 
 # Open log
-lf <- log_open(file.path(tmp, "example1.log"))
+lf <- log_open(file.path(tmp, "example1.log"))
 
 
 # Load and Prepare Data ---------------------------------------------------
@@ -157,7 +156,7 @@ 

sep("Prepare Data") # Get path to sample data -pkg <- system.file("extdata", package = "libr") +pkg <- system.file("extdata", package = "libr") # Define data library libname(sdtm, pkg, "csv", quiet = TRUE) @@ -166,9 +165,9 @@

lib_load(sdtm) # Prepare data -dm_mod <- sdtm.DM %>% - select(USUBJID, SEX, AGE, ARM) %>% - filter(ARM != "SCREEN FAILURE") %>% +dm_mod <- sdtm.DM %>% + select(USUBJID, SEX, AGE, ARM) %>% + filter(ARM != "SCREEN FAILURE") %>% datastep({ if (AGE >= 18 & AGE <= 24) @@ -180,39 +179,39 @@

else if (AGE >= 65) AGECAT <- ">= 65" - }) %>% put() + }) %>% put() put("Get population counts") -arm_pop <- count(dm_mod, ARM) %>% put() -sex_pop <- count(dm_mod, SEX) %>% put() -agecat_pop <- count(dm_mod, AGECAT) %>% put() +arm_pop <- count(dm_mod, ARM) %>% put() +sex_pop <- count(dm_mod, SEX) %>% put() +agecat_pop <- count(dm_mod, AGECAT) %>% put() # Convert agecat to factor so rows will sort correctly -agecat_pop$AGECAT <- factor(agecat_pop$AGECAT, levels = c("18 to 24", +agecat_pop$AGECAT <- factor(agecat_pop$AGECAT, levels = c("18 to 24", "25 to 44", "45 to 64", ">= 65")) # Sort agecat -agecat_pop <- agecat_pop %>% arrange(AGECAT) +agecat_pop <- agecat_pop %>% arrange(AGECAT) # Create Plots ------------------------------------------------------------ -plt1 <- ggplot(data = arm_pop, aes(x = ARM, y = n)) + - geom_col(fill = "#0000A0") + - geom_text(aes(label = n), vjust = 1.5, colour = "white") + - labs(x = "Treatment Group", y = "Number of Subjects (n)") +plt1 <- ggplot(data = arm_pop, aes(x = ARM, y = n)) + + geom_col(fill = "#0000A0") + + geom_text(aes(label = n), vjust = 1.5, colour = "white") + + labs(x = "Treatment Group", y = "Number of Subjects (n)") -plt2 <- ggplot(data = sex_pop, aes(x = SEX, y = n)) + - geom_col(fill = "#00A000") + - geom_text(aes(label = n), vjust = 1.5, colour = "white") + - labs(x = "Biological Sex", y = "Number of Subjects (n)") +plt2 <- ggplot(data = sex_pop, aes(x = SEX, y = n)) + + geom_col(fill = "#00A000") + + geom_text(aes(label = n), vjust = 1.5, colour = "white") + + labs(x = "Biological Sex", y = "Number of Subjects (n)") -plt3 <- ggplot(data = agecat_pop, aes(x = AGECAT, y = n)) + - geom_col(fill = "#A00000") + - geom_text(aes(label = n), vjust = 1.5, colour = "white") + - labs(x = "Age Categories", y = "Number of Subjects (n)") +plt3 <- ggplot(data = agecat_pop, aes(x = AGECAT, y = n)) + + geom_col(fill = "#A00000") + + geom_text(aes(label = n), vjust = 1.5, colour = "white") + + labs(x = "Age Categories", y = "Number of Subjects (n)") # Report ------------------------------------------------------------------ @@ -221,24 +220,24 @@

sep("Create and print report") -page1 <- create_plot(plt1, 4.5, 7) %>% +page1 <- create_plot(plt1, 4.5, 7) %>% titles("Figure 1.1", "Distribution of Subjects by Treatment Group") -page2 <- create_plot(plt2, 4.5, 7) %>% +page2 <- create_plot(plt2, 4.5, 7) %>% titles("Figure 1.2", "Distribution of Subjects by Biological Sex") -page3 <- create_plot(plt3, 4.5, 7) %>% +page3 <- create_plot(plt3, 4.5, 7) %>% titles("Figure 1.2", "Distribution of Subjects by Age Category") -rpt <- create_report(file.path(tmp, "./output/example1.rtf"), output_type = "RTF", - font = "Arial") %>% - set_margins(top = 1, bottom = 1) %>% - page_header("Sponsor: Company", "Study: ABC") %>% - add_content(page1) %>% - add_content(page2) %>% - add_content(page3) %>% - footnotes("Program: DM_Figure.R") %>% - page_footer(paste0("Date Produced: ", fapply(Sys.time(), "%d%b%y %H:%M")), +rpt <- create_report(file.path(tmp, "./output/example1.rtf"), output_type = "RTF", + font = "Arial") %>% + set_margins(top = 1, bottom = 1) %>% + page_header("Sponsor: Company", "Study: ABC") %>% + add_content(page1) %>% + add_content(page2) %>% + add_content(page3) %>% + footnotes("Program: DM_Figure.R") %>% + page_footer(paste0("Date Produced: ", fapply(Sys.time(), "%d%b%y %H:%M")), right = "Page [pg] of [tpg]") res <- write_report(rpt) @@ -255,14 +254,14 @@

# View log -writeLines(readLines(lf, encoding = "UTF-8")) +writeLines(readLines(lf, encoding = "UTF-8")) # View report # file.show(res$file_path)

-
-

-Log

+
+

Log +

Here is the log from the above program:

=========================================================================
 Log Path: C:/Users/dbosa/AppData/Local/Temp/RtmpwLpEIV/log/example1.log
@@ -400,9 +399,9 @@ 

Log Elapsed Time: 0 00:00:07 =========================================================================

-
-

-Output

+
+

Output +

And here is the output:

Next: Complete Example 2

@@ -420,11 +419,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -433,5 +434,7 @@

+ + diff --git a/docs/articles/libr-example2.html b/docs/articles/libr-example2.html index d706f16..306fad5 100644 --- a/docs/articles/libr-example2.html +++ b/docs/articles/libr-example2.html @@ -26,6 +26,8 @@ + +
+
-
-

-Program

+
+

Program +

Here is another example of a complete program that demonstrates several of the key functions in the libr package.

-library(tidyverse)
-library(sassy)
-library(common)
+library(tidyverse)
+library(sassy)
+library(common)
 
-options("logr.autolog" = TRUE,
+options("logr.autolog" = TRUE,
         "logr.notes" = FALSE)
 
 # Get temp location for log and report output
-tmp <- tempdir()
+tmp <- tempdir()
 
 # Open log
-lf <- log_open(file.path(tmp, "example1.log"))
+lf <- log_open(file.path(tmp, "example1.log"))
 
 
 # Prepare Data ------------------------------------------------------------
@@ -144,7 +143,7 @@ 

sep("Prepare Data") # Get path to sample data -pkg <- system.file("extdata", package = "libr") +pkg <- system.file("extdata", package = "libr") # Create libname for csv data libname(sdtm, pkg, "csv", quiet = TRUE) @@ -153,25 +152,25 @@

lib_load(sdtm) put("Join and prepare data") -prep <- sdtm.DM %>% - left_join(sdtm.VS, by = c("USUBJID" = "USUBJID")) %>% - select(USUBJID, VSTESTCD, VISIT, VISITNUM, VSSTRESN, ARM, VSBLFL) %>% - filter(VSTESTCD %in% c("PULSE", "RESP", "TEMP", "DIABP", "SYSBP"), - !(VISIT == "SCREENING" & VSBLFL != "Y")) %>% - arrange(USUBJID, VSTESTCD, VISITNUM) %>% - group_by(USUBJID, VSTESTCD) %>% - datastep(retain = list(BSTRESN = 0), { +prep <- sdtm.DM %>% + left_join(sdtm.VS, by = c("USUBJID" = "USUBJID")) %>% + select(USUBJID, VSTESTCD, VISIT, VISITNUM, VSSTRESN, ARM, VSBLFL) %>% + filter(VSTESTCD %in% c("PULSE", "RESP", "TEMP", "DIABP", "SYSBP"), + !(VISIT == "SCREENING" & VSBLFL != "Y")) %>% + arrange(USUBJID, VSTESTCD, VISITNUM) %>% + group_by(USUBJID, VSTESTCD) %>% + datastep(retain = list(BSTRESN = 0), { # Combine treatment groups # And distingish baseline time points if (ARM == "ARM A") { - if (VSBLFL %eq% "Y") { + if (VSBLFL %eq% "Y") { GRP <- "A_BASE" } else { GRP <- "A_TRT" } } else { - if (VSBLFL %eq% "Y") { + if (VSBLFL %eq% "Y") { GRP <- "O_BASE" } else { GRP <- "O_TRT" @@ -182,30 +181,30 @@

if (first.) BSTRESN = VSSTRESN - }) %>% - ungroup() %>% + }) %>% + ungroup() %>% put() put("Get population counts") -pop_A <- prep %>% select(USUBJID, GRP) %>% filter(GRP == "A_BASE") %>% - distinct() %>% count() %>% deframe() %>% put() -pop_O <- prep %>% select(USUBJID, GRP) %>% filter(GRP == "O_BASE") %>% - distinct() %>% count() %>% deframe() %>% put() +pop_A <- prep %>% select(USUBJID, GRP) %>% filter(GRP == "A_BASE") %>% + distinct() %>% count() %>% deframe() %>% put() +pop_O <- prep %>% select(USUBJID, GRP) %>% filter(GRP == "O_BASE") %>% + distinct() %>% count() %>% deframe() %>% put() put("Prepare final data frame") -final <- prep %>% - select(VSTESTCD, GRP, VSSTRESN, BSTRESN) %>% - group_by(VSTESTCD, GRP) %>% - summarize(Mean = fmt_mean_sd(VSSTRESN), +final <- prep %>% + select(VSTESTCD, GRP, VSSTRESN, BSTRESN) %>% + group_by(VSTESTCD, GRP) %>% + summarize(Mean = fmt_mean_sd(VSSTRESN), Median = fmt_median(VSSTRESN), Quantiles = fmt_quantile_range(VSSTRESN), - Range = fmt_range(VSSTRESN)) %>% - ungroup() %>% - pivot_longer(cols = c(Mean, Median, Quantiles, Range), + Range = fmt_range(VSSTRESN)) %>% + ungroup() %>% + pivot_longer(cols = c(Mean, Median, Quantiles, Range), names_to = "stats", - values_to = "values") %>% - pivot_wider(names_from = GRP, - values_from = values) %>% + values_to = "values") %>% + pivot_wider(names_from = GRP, + values_from = values) %>% put() @@ -214,48 +213,48 @@

sep("Create formats") # Vital sign lookup format -vs_fmt <- c(PULSE = "Pulse", +vs_fmt <- c(PULSE = "Pulse", TEMP = "Temperature °C", RESP = "Respirations/min", SYSBP = "Systolic Blood Pressure", - DIABP = "Diastolic Blood Pressure") %>% + DIABP = "Diastolic Blood Pressure") %>% put() # Statistics user-defined format stat_fmt <- value(condition(x == "Mean", "Mean (SD)"), - condition(x == "Quantiles", "Q1 - Q3")) %>% + condition(x == "Quantiles", "Q1 - Q3")) %>% put() # Create Report ----------------------------------------------------------- sep("Create Report") # Apply sort -final <- final %>% - mutate(VSTESTCD = factor(VSTESTCD, levels = names(vs_fmt))) %>% - arrange(VSTESTCD) +final <- final %>% + mutate(VSTESTCD = factor(VSTESTCD, levels = names(vs_fmt))) %>% + arrange(VSTESTCD) # Define table object -tbl <- create_table(final) %>% - spanning_header(A_BASE, A_TRT, "Placebo", n = pop_A) %>% - spanning_header(O_BASE, O_TRT, "Treated", n = pop_O) %>% - column_defaults(width = 1.25, align = "center") %>% - stub(c(VSTESTCD, stats), width = 2.5) %>% +tbl <- create_table(final) %>% + spanning_header(A_BASE, A_TRT, "Placebo", n = pop_A) %>% + spanning_header(O_BASE, O_TRT, "Treated", n = pop_O) %>% + column_defaults(width = 1.25, align = "center") %>% + stub(c(VSTESTCD, stats), width = 2.5) %>% define(VSTESTCD, "Vital Sign", format = vs_fmt, - blank_after = TRUE, dedupe = TRUE, label_row = TRUE) %>% - define(stats, indent = .25, format = stat_fmt) %>% - define(A_BASE, "Baseline") %>% - define(A_TRT, "After Treatment") %>% - define(O_BASE, "Baseline") %>% + blank_after = TRUE, dedupe = TRUE, label_row = TRUE) %>% + define(stats, indent = .25, format = stat_fmt) %>% + define(A_BASE, "Baseline") %>% + define(A_TRT, "After Treatment") %>% + define(O_BASE, "Baseline") %>% define(O_TRT, "After Treatment") # Define report object -rpt <- create_report(file.path(tmp, "./output/example1.rtf"), output_type = "RTF", - font = "Times", font_size = 12) %>% - page_header("Sponsor: Company", "Study: ABC") %>% - titles("Table 4.0", "Selected Vital Signs", bold = TRUE) %>% - add_content(tbl, align = "center") %>% - page_footer(Sys.time(), "CONFIDENTIAL", "Page [pg] of [tpg]") +rpt <- create_report(file.path(tmp, "./output/example1.rtf"), output_type = "RTF", + font = "Times", font_size = 12) %>% + page_header("Sponsor: Company", "Study: ABC") %>% + titles("Table 4.0", "Selected Vital Signs", bold = TRUE) %>% + add_content(tbl, align = "center") %>% + page_footer(Sys.time(), "CONFIDENTIAL", "Page [pg] of [tpg]") # Write report to file system res <- write_report(rpt) @@ -271,14 +270,14 @@

log_close() # View log -writeLines(readLines(lf, encoding = "UTF-8")) +writeLines(readLines(lf, encoding = "UTF-8")) # View report # file.show(res$file_path)

-
-

-Log

+
+

Log +

Here is the log from the above program:

=========================================================================
 Log Path: C:/Users/dbosa/AppData/Local/Temp/RtmpwLpEIV/log/example2.log
@@ -474,9 +473,9 @@ 

Log Elapsed Time: 0 00:00:09 =========================================================================

-
-

-Output

+
+

Output +

And here is the output:

@@ -493,11 +492,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -506,5 +507,7 @@

+ + diff --git a/docs/articles/libr-faq.html b/docs/articles/libr-faq.html index 816c48a..cb36748 100644 --- a/docs/articles/libr-faq.html +++ b/docs/articles/libr-faq.html @@ -26,6 +26,8 @@ + +
+
@@ -122,9 +121,9 @@

Frequently Asked Questions

Below are some frequently asked questions about the libr package. Click on the links below to navigate to the full question and answer content.

-
-

-Index

+ -
-

-Content

-
-

-How do I create a libname in R?

+
+

Content +

+
+

How do I create a libname in R? +

Q: I have a directory full of datasets. I need to use several of them in my analysis. In SAS®, I would create a libname so I could access all of them. Is there a way to do something similar in @@ -160,38 +162,28 @@

into the library. To work directly with the datasets, you can then do:

-lib_load(mylib)
-

This statement will load the datasets into your workspace, where you -can begin using them. For instance, you could get summary statistics for -a variable like this:

-
-summary(mylib.dat1$var1)
-

If you made any changes to the data, and want to keep those changes, -remember to save them with:

-
-lib_write(mylib)
-

When you are done, unload the datasets with:

-
-lib_unload(mylib)
+mylib$mydataset

+

To access your datasets.

top


-
-

-Which data formats does the libname function support?

+
+

Which data formats does the libname function support? +

Q: I can see from the examples that the libr package supports CSV and SAS dataset file formats. What other data formats does the package support?

A: The package supports the following data formats: csv, sas7bdat, rds, Rdata, rda, xls, xlsx, xpt, and dbf. The libname() help page has a full list, and a short discussion -of some details on each format.

+of some details on each format. Note that the sas7bdat file format is +read-only at this time.

top


-
-

-Is there a way to filter the datasets in my libname?

+
+

Is there a way to filter the datasets in my libname? +

Q: I have a directory with over 100 datasets. I want to use the libname() function, but worry about loading all those datasets into memory. Is there a way I can filter the libname, to @@ -200,19 +192,19 @@

libname() function allows you to pass a wildcard filter string. For example, the following call will load only those datasets that start with ‘a’:

-
+
 libname(mylib, "c:/mypath/mydata", "csv", filter = "a*")

If you have a more complicated filter criteria, you can also pass a vector of filter strings. The below example will load only those datasets that start with ‘a’ or ‘b’.

-
-libname(mylib, "c:/mypath/mydata", "csv", filter = c("a*", "b*"))
+
+libname(mylib, "c:/mypath/mydata", "csv", filter = c("a*", "b*"))

top


-
-

-How do I view the variables in my datasets?

+
+

How do I view the variables in my datasets? +

Q: I’m doing some analysis with my data, and can’t remember all the variable names. Is there an easy way to view or print out the variables in my datasets?

@@ -222,7 +214,7 @@

variable. The dictionary() function works on a single data frame, or an entire library. You can save this dictionary as metadata, print it, or even create a report from it. Here is an example:

-
+
 # Create libname
 libname(mylib, "c:/mypath/mydata", "csv")
 
@@ -234,9 +226,9 @@ 

top


-
-

-How do I export data to another file format?

+
+

How do I export data to another file format? +

Q: Let’s say I have some data in one format (sas7bdat), and want to export this data to another format (csv or Excel). How can I do that with the libr package?

@@ -244,7 +236,7 @@

designed for this purpose. You can take an existing library and export the entire thing to another library with a different file format. Like this:

-
+
 libname(libA, "c:/mypath/mydata1", "sas7bdat")
 
 lib_export(libA, libB, "c:/mypath/mydata2", "csv")
@@ -256,14 +248,14 @@

top


-
-

-How do I copy a library?

+
+

How do I copy a library? +

Q: I have a directory full of datasets. I want to back up the entire thing to another directory. How can I do that?

A: You can use the lib_copy() function, like this:

-
+
 # Create libname
 libname(lib1, "c:/mypath/mydata1", "csv")
 
@@ -274,9 +266,9 @@ 

top


-
-

-Can I really do a datastep in R?

+
+

Can I really do a datastep in R? +

Q: When I first started learning R I searched all over for a way to do a datastep. I was shocked to learn there was nothing similar. Does the libr package really allow me @@ -288,20 +280,17 @@

for each row. It has basic data shaping, grouping, retain, assigning of attributes, and a datastep array. Here is a simple example showing categorization of an age variable into age groups:

-
-library(dplyr)
-library(libr)
+
+library(dplyr)
+library(libr)
 
 # Define data library
 libname(dat, "./data", "csv") 
 
-# Loads data into workspace
-lib_load(dat)
-
 # Prepare data
-dm_mod <- dat.DM %>% 
-  select(USUBJID, SEX, AGE, ARM) %>% 
-  filter(ARM != "SCREEN FAILURE") %>% 
+dm_mod <- dat$DM %>% 
+  select(USUBJID, SEX, AGE, ARM) %>% 
+  filter(ARM != "SCREEN FAILURE") %>% 
   datastep({
     
     if (AGE >= 18 & AGE <= 24)
@@ -313,9 +302,7 @@ 

else if (AGE >= 65) AGECAT <- ">= 65" - }) - -lib_unload(dat)

+ })

The datastep example above is part of a dplyr pipeline, but it can also function independently. Notice that, just like a SAS® datastep, you don’t have to declare new variables. You can just @@ -325,6 +312,75 @@

vignette for additional examples and complete documentation.

top


+

+
+

Why is the datastep so slow? +

+

Q: I like the datastep() function very +much. But it seems quite slow.
+Is there anything I can do to speed it up?

+

A: Yes. Performance of the datastep() +is directly related to the size of the input data. The best thing you +can do to increase performance is to reduce the input data to only those +rows and columns that you need. The Base R subset() +function and Tidyverse select() and +filter() functions are useful for this purpose. Or you can +use the Base R subset brackets ([]) if you are familiar with that +syntax. If the datastep performance is still not satisfactory, it is +recommended that you explore other R functions to perform your intended +operation.

+

top

+
+
+
+

Can I do “set” and “merge” operations with the datastep? +

+

Q: In SAS®, I used the datastep frequently to +combine two or more datasets.
+Does the libr datastep support “set” and “merge”?

+

A: Yes. The datastep() function +supports both “set” and “merge” operations. The “set” parameter accepts +a list of one or more datasets to stack together, and the “merge” +parameters are used in almost the same way as SAS®. Here is an +example:

+
# Subset iris dataset
+dat1 <- subset(mtcars, cyl == 4, c('mpg', 'cyl', 'disp'))[1:5, ]
+dat2 <- subset(mtcars, cyl == 6, c('mpg', 'cyl', 'disp'))[1:5, ]
+dat3 <- mtcars[1:10, c('hp', 'drat', 'wt')]
+
+# Stack datasets using set operation
+res1 <- datastep(dat1, set = dat2, {})
+#     mpg cyl  disp
+# 1  22.8   4 108.0
+# 2  24.4   4 146.7
+# 3  22.8   4 140.8
+# 4  32.4   4  78.7
+# 5  30.4   4  75.7
+# 6  21.0   6 160.0
+# 7  21.0   6 160.0
+# 8  21.4   6 258.0
+# 9  18.1   6 225.0
+# 10 19.2   6 167.6
+
+# Merge row by row
+res2 <- datastep(res1, merge = dat3, {})
+#     mpg cyl  disp  hp drat    wt
+# 1  22.8   4 108.0 110 3.90 2.620
+# 2  24.4   4 146.7 110 3.90 2.875
+# 3  22.8   4 140.8  93 3.85 2.320
+# 4  32.4   4  78.7 110 3.08 3.215
+# 5  30.4   4  75.7 175 3.15 3.440
+# 6  21.0   6 160.0 105 2.76 3.460
+# 7  21.0   6 160.0 245 3.21 3.570
+# 8  21.4   6 258.0  62 3.69 3.190
+# 9  18.1   6 225.0  95 3.92 3.150
+# 10 19.2   6 167.6 123 3.92 3.440
+

The above merge shows how you can append columns even without a key +column. If you want to merge by a key, use the “merge_by” and “merge_in” +parameters. See the datastep() documentation for more +information and examples.

+

top

+
@@ -341,11 +397,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -354,5 +412,7 @@

+ + diff --git a/docs/articles/libr-management.html b/docs/articles/libr-management.html index 20badc0..bfbee88 100644 --- a/docs/articles/libr-management.html +++ b/docs/articles/libr-management.html @@ -26,6 +26,8 @@ + +
+
@@ -128,17 +127,17 @@

Library Management

manipulation of the data in the libraries. The example ends by looking at some of the metadata available for libraries:

-library(libr)
+library(libr)
 
 # Create temp directory
-tmp <- tempdir()
+tmp <- tempdir()
 
 # Create libraries
 libname(s1, tmp)
 
 # Add data to library and adjust names
 lib_add(s1, state.name, state.area, state.region, state.abb,
-        name = c("name", "area", "region", "abb"))
+        name = c("name", "area", "region", "abb"))
 # # library 's1': 4 items
 # - attributes: rds not loaded
 # - path: C:\Users\User\AppData\Local\Temp\RtmpqAMV6L
@@ -150,7 +149,7 @@ 

Library Management

# 4 abb rds 50 1 4.1 Kb 2020-11-29 17:00:28 # Copy library to backup location -lib_copy(s1, s2, file.path(tmp, "orig")) +lib_copy(s1, s2, file.path(tmp, "orig")) # # library 's2': 4 items # - attributes: rds not loaded # - path: C:\Users\User\AppData\Local\Temp\RtmpqAMV6L/orig @@ -162,7 +161,7 @@

Library Management

# 4 abb rds 50 1 4.1 Kb 2020-11-29 17:01:17 # Remove data from library 1 -lib_remove(s1, name = c("name", "area", "region", "abb")) +lib_remove(s1, name = c("name", "area", "region", "abb")) # # library 's1': 0 items # - attributes: rds not loaded # - path: C:\Users\User\AppData\Local\Temp\RtmpqAMV6L @@ -171,14 +170,14 @@

Library Management

# Load library 1 into memory lib_load(s1) -s1.combined <- data.frame(name = s2.name, abb = s2.abb, +s1.combined <- data.frame(name = s2.name, abb = s2.abb, area = s2.area, region = s2.region, stringsAsFactors = FALSE) -s1.east <- subset(s1.combined, region == "Northeast") -s1.west <- subset(s1.combined, region == "West") -s1.north <- subset(s1.combined, region == "North Central") -s1.south <- subset(s1.combined, region == "South") +s1.east <- subset(s1.combined, region == "Northeast") +s1.west <- subset(s1.combined, region == "West") +s1.north <- subset(s1.combined, region == "North Central") +s1.south <- subset(s1.combined, region == "South") # Sync workspace with library list lib_sync(s1) @@ -267,11 +266,13 @@

Library Management

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.7.

@@ -280,5 +281,7 @@

Library Management

+ + diff --git a/docs/articles/libr.html b/docs/articles/libr.html index a5e62ca..c4891e0 100644 --- a/docs/articles/libr.html +++ b/docs/articles/libr.html @@ -26,6 +26,8 @@ + +
+
@@ -134,9 +133,9 @@

libr

they have not been available in R … until now!

The libr package also includes an enhanced equality operator to make data comparisons more intuitive.

-
-

-Key Functions

+
+

Key Functions +

The above concepts are implemented in the libr package with four key functions. They are:

    @@ -148,26 +147,26 @@

    datastep(): Performs row-by-row processing of data

-
-

-How to Use

+
+

How to Use +

Let’s look at some simple examples of each of the four functions above. These examples will be using some sample data. The sample data is included in the libr package, and also available for -download here.

-
-

-The libname() Function

+download here.

+
+

The libname() Function +

The libr libname() function is quite similar to the SAS® libname statement. The first parameter is the name of the library. The second parameter is a path to a directory the library will point to. The third parameter is the engine with which to read and write the data.

-library(libr)
+library(libr)
 
 # Get path to sample data
-pkg <- system.file("extdata", package = "libr")
+pkg <- system.file("extdata", package = "libr")
 
 # Define data library
 libname(sdtm, pkg, "csv") 
@@ -223,19 +222,19 @@

The column specification shows how the data was imported. Since ‘csv’ files do not contain well-defined data type information on each of the -columns, the libname function has to guess at the data +columns, the libname() function has to guess at the data types. The column specification shows you what the guesses were. This is useful information. You should review these column specifications to see -if the libname function guessed correctly. If it did not +if the libname() function guessed correctly. If it did not guess correctly, you can control the import data types by sending a specs() collection of import_spec() objects to the import_specs parameter on the libname() function. See the specs() documentation for an example and additional details.

-
-

-The lib_load() Function

+
+

Accessing Data +

Observe that there is difference between the SAS® libname statement and the libr libname() function. The difference is that after the SAS® @@ -244,9 +243,55 @@

syntax.

With the libr function, on the other hand, the data is immediately available using list syntax on the library variable name. -To get the two-level syntax, you first have to call the -lib_load() function.

+That means you can get to your data using the dollar sign ($), like +this:

+# View a dataset
+sdtm$DM
+# # A tibble: 87 × 24
+#    STUDYID DOMAIN USUBJID SUBJID RFSTDTC    RFENDTC RFXSTDTC RFXENDTC RFICDTC    RFPENDTC
+#    <chr>   <chr>  <chr>   <chr>  <date>     <date>  <lgl>    <lgl>    <date>     <date>  
+#  1 ABC     DM     ABC-01… 049    2006-11-07 NA      NA       NA       2006-10-25 NA      
+#  2 ABC     DM     ABC-01… 050    2006-11-02 NA      NA       NA       2006-10-25 NA      
+#  3 ABC     DM     ABC-01… 051    2006-11-02 NA      NA       NA       2006-10-25 NA      
+#  4 ABC     DM     ABC-01… 052    2006-11-06 NA      NA       NA       2006-10-31 NA      
+#  5 ABC     DM     ABC-01… 053    2006-11-08 NA      NA       NA       2006-11-01 NA      
+#  6 ABC     DM     ABC-01… 054    2006-11-16 NA      NA       NA       2006-11-07 NA      
+#  7 ABC     DM     ABC-01… 055    2006-12-06 NA      NA       NA       2006-10-31 NA      
+#  8 ABC     DM     ABC-01… 056    2006-11-28 NA      NA       NA       2006-11-21 NA      
+#  9 ABC     DM     ABC-01… 113    2006-12-05 NA      NA       NA       2006-11-28 NA      
+# 10 ABC     DM     ABC-01… 114    2006-12-14 NA      NA       NA       2006-12-01 NA      
+# #  77 more rows
+# #  14 more variables: DTHDTC <lgl>, DTHFL <lgl>, SITEID <chr>, BRTHDTC <date>, AGE <dbl>,
+# #   AGEU <chr>, SEX <chr>, RACE <chr>, ETHNIC <chr>, ARMCD <chr>, ARM <chr>, ACTARMCD <lgl>,
+# #   ACTARM <lgl>, COUNTRY <lgl>
+# #  Use `print(n = ...)` to see more rows
+

Using this syntax, your dataset can be passed into any R function. +For example, here we can subset the dataset for a particular +subject:

+
# Subset the data
+dat <- subset(sdtm$DM, SUBJID == '050')
+
+# View results
+dat
+# # A tibble: 1 × 24
+#   STUDYID DOMAIN USUBJID  SUBJID RFSTDTC    RFENDTC RFXSTDTC RFXENDTC RFICDTC    RFPENDTC
+#   <chr>   <chr>  <chr>    <chr>  <date>     <date>  <lgl>    <lgl>    <date>     <date>  
+# 1 ABC     DM     ABC-01-… 050    2006-11-02 NA      NA       NA       2006-10-25 NA      
+# #   14 more variables: DTHDTC <lgl>, DTHFL <lgl>, SITEID <chr>, BRTHDTC <date>, AGE <dbl>,
+# #   AGEU <chr>, SEX <chr>, RACE <chr>, ETHNIC <chr>, ARMCD <chr>, ARM <chr>, ACTARMCD <lgl>,
+# #   ACTARM <lgl>, COUNTRY <lgl>
+

The dollar sign syntax show above is recommended for the most +memory-efficient programming. If you are writing production code to be +run in batch, use the dollar sign syntax.

+

+
+

The lib_load() Function +

+

For convenience, the package also provides a way to get two-level dot +syntax, similar to SAS®. To get the dot syntax, you first have to call +the lib_load() function.

+
 
 lib_load(sdtm)
 # # library 'sdtm': 8 items
@@ -264,22 +309,22 @@ 

# 8 VS csv 3358 17 467 Kb 2020-09-18 14:30:24

Notice on the console printout that the library is now “loaded”. That means the data has been loaded into the workspace, and is available -using two-level syntax. If you are working in RStudio, -the environment pane will now show all the datasets available in the -library.

+using two-level dot syntax. If you are working in +RStudio, the environment pane will now show all the +datasets available in the library.

At this point, you can work with your data very much the same way as you would in SAS®. You can pass these datasets into statistical functions, or manipulate them with dplyr functions. Note that you can also work with individual variables on the datasets using dollar sign (“$”) syntax.

-
+
 # Get total number of records
-nrow(sdtm.DM)
+nrow(sdtm.DM)
 # [1] 87
 
 # Get frequency counts for each arm
-table(sdtm.DM$ARM)
+table(sdtm.DM$ARM)
 # ARM A          ARM B          ARM C          ARM D SCREEN FAILURE 
 # 20             21             21             23              2 

The datasets will be available in the workspace for the length of @@ -292,9 +337,9 @@

Library Operations and Library Management.

-
-

-The dictionary() Function

+
+

The dictionary() Function +

Once you have a library defined, you may want to examine the column attributes for the datasets in that library. Examining those column attributes can be accomplished with the dictionary() @@ -302,7 +347,7 @@

information about the data in the library.

Continuing from the example above, let’s look at the dictionary for the ‘sdtm’ library created previously.

-
+
 dictionary(sdtm)
 # # A tibble: 130 x 10
 #    Name  Column  Class     Label Description Format Width Justify  Rows   NAs
@@ -325,9 +370,9 @@ 

function documentation for more information.

-
-

-The datastep() Function

+
+

The datastep() Function +

People with experience in SAS® software know that it is sometimes advantageous to process row-by-row. In SAS®, row-by-row processing done with a data step. The data step is one of the most fundamental @@ -338,9 +383,9 @@

SAS® datastep: keep, drop, rename, retain, and by. Here is a simple example, again using the data from the library already defined above:

-
+
 age_groups <- datastep(sdtm.DM, 
-                       keep = c("USUBJID", "AGE", "AGEG"), { 
+                       keep = c("USUBJID", "AGE", "AGEG"), { 
                          
                          if (AGE >= 18 & AGE <= 29)
                            AGEG <- "18 to 29"
@@ -380,9 +425,9 @@ 

additional examples on the datastep() help page and in the data step article.

-
-

-Next Steps

+
+

Next Steps +

For next steps, please review the examples provided in the vignette articles. Those articles include:

    @@ -404,11 +449,13 @@

    -

    Site built with pkgdown 1.6.1.

    +

    +

    Site built with pkgdown 2.0.7.

    @@ -417,5 +464,7 @@

    + + diff --git a/docs/authors.html b/docs/authors.html index 4e823ae..8664744 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,76 +1,12 @@ - - - - - - - -Authors • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Authors and Citation • libr - - + + - - - -
    -
    -

- - -
+
- @@ -170,22 +113,20 @@

Authors

-
- +
- - + + diff --git a/docs/index.html b/docs/index.html index 95d559e..89514b8 100644 --- a/docs/index.html +++ b/docs/index.html @@ -32,6 +32,8 @@ + +
-
-

-libr 1.2.8 2022-09-08 -

-
    -
  • Fixed bug on datastep that sometimes was causing variables to lose -their attributes.
  • +
    + +
    • Fixed bug on datastep() when there are spaces in the column names and output() function is used.
    • +
    • Some improvements to datastep() performance.
    • +
    • Send message on writing “sas7bdat” that functionality is not available.
    • +
    • Added where clause parameter to libname().
    • +
    • Added automatic variables “first.X” and “last.X” for each by variable.
    • +
    +
    + +
    • Fixed bug on datastep that sometimes was causing variables to lose their attributes.
    • Added “where” parameter to datastep.
    • -
    • Fix bug on datastep that was causing a single variable dataframe to -be returned as a vector.
    • +
    • Fix bug on datastep that was causing a single variable dataframe to be returned as a vector.
    • Cleared rownames on datastep exit.
    • -
    • Added “MaxChar” column to dictionary to hold the maximum number of -characters for a data value in the column. This is different from -“width” which is the user-defined, proscribed width. MaxChar is the -actual width with no padding.
    • +
    • Added “MaxChar” column to dictionary to hold the maximum number of characters for a data value in the column. This is different from “width” which is the user-defined, proscribed width. MaxChar is the actual width with no padding.
    • Added “rda” engine to libname.
    • -
    • Allowed single value NSE on drop, keep, and by parameter of -datastep(). For multiple values, use the v() -function.
    • +
    • Allowed single value NSE on drop, keep, and by parameter of datastep(). For multiple values, use the v() function.
    • Added output() function to datastep.
    • -
    • Improved datastep output column ordering. New variables were coming -out in the opposite order they were added. Now they are better.
    • +
    • Improved datastep output column ordering. New variables were coming out in the opposite order they were added. Now they are better.
    • Added “set” parameter to datastep.
    • -
    • Added “merge”, “merge_by”, and “merge_in” parameter to -datastep().
    • +
    • Added “merge”, “merge_by”, and “merge_in” parameter to datastep().
    • Fixed bug on datastep() rename.
    • Added/fixed documentation.
    • Updated logo.
    • -
    -
    -
    -

    -libr 1.2.5 2022-08-08 -

    -
      -
    • BREAKING CHANGE: Removed the %eq% operator from this -package, as it has been moved to the common package. A -dependency on common has been added to the -libr package so that the %eq% operator -will be loaded and available. However, it is still possible some changes -may be required to make the new location for the %eq% -operator work properly.
    • -
    • Apply “format” parameter on datastep() to both input -and output dataset.
    • -
    -
    -
    -

    -libr 1.2.3 2022-06-23 -

    -
      -
    • Added “Rdata” option to libname() function -engines.
    • +
    +
    + +
    • BREAKING CHANGE: Removed the %eq% operator from this package, as it has been moved to the common package. A dependency on common has been added to the libr package so that the %eq% operator will be loaded and available. However, it is still possible some changes may be required to make the new location for the %eq% operator work properly.
    • +
    • Apply “format” parameter on datastep() to both input and output dataset.
    • +
    +
    + +
    • Added “Rdata” option to libname() function engines.
    • Added “label” parameter to datastep() function.
    • Added “format” parameter to datastep() function.
    • -
    • Added lib_export() function to export a library to a -different directory and file format.
    • -
    • Fixed bug on dictionary() that was causing a warning -when there were no rows on the input data frame.
    • -
    -
    -
    -

    -libr 1.2.2 2021-11-22 -

    -
      -
    • Added FAQ and Complete Examples
    • -
    • Fixed bug on dictionary() when encountering datetime -variable with multiple POSIX classes.
    • -
    -
    -
    -

    -libr 1.2.1 2021-10-09 -

    -
      -
    • Added covr and codecov
    • -
    • Fixed bug on dictionary() function that wasn’t showing -width attribute.
    • -
    • Fixed bug on datastep() when applying attributes to a -calculated variable.
    • +
    • Added lib_export() function to export a library to a different directory and file format.
    • +
    • Fixed bug on dictionary() that was causing a warning when there were no rows on the input data frame.
    • +
    +
    + +
    • Added FAQ and Complete Examples
    • +
    • Fixed bug on dictionary() when encountering datetime variable with multiple POSIX classes.
    • +
    +
    + +
    • Added covr and codecov
    • +
    • Fixed bug on dictionary() function that wasn’t showing width attribute.
    • +
    • Fixed bug on datastep() when applying attributes to a calculated variable.
    • Small documentation fixes.
    • -
    -
    -
    -

    -libr 1.2.0 2021-06-29 -

    -
      -
    • Made package compatible to R version 3.6.
    • +
    +
    + +
    • Made package compatible to R version 3.6.
    • Added GitHub actions to test previous versions of R.
    • -
    • Increased performance of datastep() function generally. Benchmarks -show about 40% improvement on average.
    • -
    • Added standard_eval parameter to libname() and -lib_copy() functions to allow user to pass library names as -a variable.
    • -
    • Added quiet parameter on libname() function to -minimize console output if desired.
    • -
    • Added arrays parameter and dsarray() class to -handle data step arrays. This functionality allows iteration across a -list of variables inside a datastep().
    • -
    • Added attributes parameter and dsattr() class -to handle data step attributes. This functionality allows the user to -add attributes to datastep variables.
    • -
    • Fixed bug in datastep() where it was stripping column -attributes on Base R data frames.
    • -
    -
    -
    -

    -libr 1.1.3 2021-02-08 -

    -
      -
    • Fixed bug on datastep when data frame/tibble had a single -column.
    • -
    -
    -
    -

    -libr 1.1.1 2021-01-06 -

    -
      -
    • Integrated libr with logr. All library functions will automatically -provide logging entries if the autolog feature of the logr package is -enabled.
    • +
    • Increased performance of datastep() function generally. Benchmarks show about 40% improvement on average.
    • +
    • Added standard_eval parameter to libname() and lib_copy() functions to allow user to pass library names as a variable.
    • +
    • Added quiet parameter on libname() function to minimize console output if desired.
    • +
    • Added arrays parameter and dsarray() class to handle data step arrays. This functionality allows iteration across a list of variables inside a datastep().
    • +
    • Added attributes parameter and dsattr() class to handle data step attributes. This functionality allows the user to add attributes to datastep variables.
    • +
    • Fixed bug in datastep() where it was stripping column attributes on Base R data frames.
    • +
    +
    + +
    • Fixed bug on datastep when data frame/tibble had a single column.
    • +
    +
    + +
    • Integrated libr with logr. All library functions will automatically provide logging entries if the autolog feature of the logr package is enabled.
    • Added pkgdown site.
    • -
    • Added filter parameters to libname() and -lib_load() functions. The filter parameter allows the user -to specify which data from the library they want loaded into -memory.
    • -
    • Changed default “na” parameter on csv export to empty string instead -of NA to accommodate import into SAS. SAS couldn’t deal with the NA -strings.
    • +
    • Added filter parameters to libname() and lib_load() functions. The filter parameter allows the user to specify which data from the library they want loaded into memory.
    • +
    • Changed default “na” parameter on csv export to empty string instead of NA to accommodate import into SAS. SAS couldn’t deal with the NA strings.
    • Fixed bug in dbf engine when outputting tibbles.
    • -
    • Fixed bug in %eq% operator when comparing objects with different -numbers of classes.
    • -
    • Greatly improved performance of datastep, especially on grouped -tibbles.
    • -
    -
    -
    -

    -libr 1.0.1 2020-12-09 -

    -

    A package to create data libraries, data dictionaries, and the -ability to perform a data step. The major functions are:

    -
      -
    • +
    • Fixed bug in %eq% operator when comparing objects with different numbers of classes.
    • +
    • Greatly improved performance of datastep, especially on grouped tibbles.
    • +
    +
    + +

    A package to create data libraries, data dictionaries, and the ability to perform a data step. The major functions are:

    +
    • libname() function creates a data library
    • dictionary() function creates a data dictionary
    • datastep() function steps through data row-by-row
    • -%eq% allows comparison of any two R objects without -error
    • -
    -

    The packages also contains a variety of functions to manipulate data -libraries:

    - +%eq% allows comparison of any two R objects without error +

The packages also contains a variety of functions to manipulate data libraries: * lib_load(): Loads a library into the workspace * lib_unload(): Unloads a library from the workspace * lib_sync(): Synchronizes the workspace with the library list * lib_write(): Writes library data to the file system * lib_add(): Adds data to a library * lib_replace(): Replaces data in a library * lib_remove(): Removes data from a library * lib_copy(): Copies a data library * lib_delete(): Deletes a data library * lib_info(): Returns a data frame of information about the library * lib_path(): Returns the path of a data library * lib_size(): Returns the size of the data library in bytes * specs(): Create a collection of import specifications * import_spec(): Define an import specification for a file

+
-
- +
- - + + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index 1273238..80ea5b8 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -56,8 +56,10 @@ img.icon { float: right; } -img { +/* Ensure in-page images don't run outside their container */ +.contents img { max-width: 100%; + height: auto; } /* Fix bug in bootstrap (only seen in firefox) */ @@ -78,11 +80,10 @@ dd { /* Section anchors ---------------------------------*/ a.anchor { - margin-left: -30px; - display:inline-block; - width: 30px; - height: 30px; - visibility: hidden; + display: none; + margin-left: 5px; + width: 20px; + height: 20px; background-image: url(./link.svg); background-repeat: no-repeat; @@ -90,17 +91,15 @@ a.anchor { background-position: center center; } -.hasAnchor:hover a.anchor { - visibility: visible; -} - -@media (max-width: 767px) { - .hasAnchor:hover a.anchor { - visibility: hidden; - } +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; } - /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { @@ -264,31 +263,26 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -pre { - word-wrap: normal; - word-break: normal; - border: 1px solid #eee; -} - -pre, code { +pre, code, pre code { background-color: #f8f8f8; color: #333; } +pre, pre code { + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; +} -pre code { - overflow: auto; - word-wrap: normal; - white-space: pre; +pre { + border: 1px solid #eee; } -pre .img { +pre .img, pre .r-plt { margin: 5px 0; } -pre .img img { +pre .img img, pre .r-plt img { background-color: #fff; - display: block; - height: auto; } code a, pre a { @@ -305,9 +299,8 @@ a.sourceLine:hover { .kw {color: #264D66;} /* keyword */ .co {color: #888888;} /* comment */ -.message { color: black; font-weight: bolder;} -.error { color: orange; font-weight: bolder;} -.warning { color: #6A0366; font-weight: bolder;} +.error {font-weight: bolder;} +.warning {font-weight: bolder;} /* Clipboard --------------------------*/ @@ -365,3 +358,27 @@ mark { content: ""; } } + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 7e7048f..6f0eee4 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -80,7 +80,7 @@ $(document).ready(function() { var copyButton = ""; - $(".examples, div.sourceCode").addClass("hasCopyButton"); + $("div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); @@ -91,7 +91,7 @@ // Initialize clipboard: var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { - return trigger.parentNode.textContent; + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); } }); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 6a59a0c..950c25e 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,5 +1,5 @@ -pandoc: '2.18' -pkgdown: 1.6.1 +pandoc: 3.1.1 +pkgdown: 2.0.7 pkgdown_sha: ~ articles: libr-basics: libr-basics.html @@ -10,7 +10,7 @@ articles: libr-faq: libr-faq.html libr-management: libr-management.html libr: libr.html -last_built: 2022-09-08T11:18Z +last_built: 2023-11-16T18:41Z urls: reference: https://libr.r-sassy.org/reference article: https://libr.r-sassy.org/articles diff --git a/docs/reference/datastep.html b/docs/reference/datastep.html index 0a40acb..66884cb 100644 --- a/docs/reference/datastep.html +++ b/docs/reference/datastep.html @@ -1,81 +1,16 @@ - - - - - - - -Step through data row-by-row — datastep • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Step through data row-by-row — datastep • libr - - - - - - - - - - - - - - + + -
-
- -
- -
+
@@ -172,80 +94,82 @@

Step through data row-by-row

by-group processing, and perform row-wise or column-wise calculations.

-
datastep(
-  data,
-  steps,
-  keep = NULL,
-  drop = NULL,
-  rename = NULL,
-  by = NULL,
-  calculate = NULL,
-  retain = NULL,
-  attrib = NULL,
-  arrays = NULL,
-  sort_check = TRUE,
-  format = NULL,
-  label = NULL,
-  where = NULL,
-  set = NULL,
-  merge = NULL,
-  merge_by = NULL,
-  merge_in = NULL,
-  log = TRUE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
data

The data to step through.

steps

The operations to perform on the data. This parameter is +

+
datastep(
+  data,
+  steps,
+  keep = NULL,
+  drop = NULL,
+  rename = NULL,
+  by = NULL,
+  calculate = NULL,
+  retain = NULL,
+  attrib = NULL,
+  arrays = NULL,
+  sort_check = TRUE,
+  format = NULL,
+  label = NULL,
+  where = NULL,
+  set = NULL,
+  merge = NULL,
+  merge_by = NULL,
+  merge_in = NULL,
+  log = TRUE
+)
+
+ +
+

Arguments

+
data
+

The data to step through.

+ + +
steps
+

The operations to perform on the data. This parameter is specified as a set of R statements contained within -curly braces. If no steps are desired, pass empty curly braces.

keep

A vector of quoted variable names to keep in the output -data set. By default, all variables are kept.

drop

A vector of quoted variable names to drop from the output -data set. By default, no variables are dropped.

rename

A named vector of quoted variables to rename. The current +curly braces. If no steps are desired, pass empty curly braces.

+ + +
keep
+

A vector of quoted variable names to keep in the output +data set. By default, all variables are kept.

+ + +
drop
+

A vector of quoted variable names to drop from the output +data set. By default, no variables are dropped.

+ + +
rename
+

A named vector of quoted variables to rename. The current variable name should be on the left hand side of the name/value pair, and the new variable name should be on the right. The rename operation is performed after the data step, the keep, and the drop. Therefore, the data steps should use the input variable name. By default, all variables -retain their original names.

by

A vector of quoted variable names to use for by-group processing. +retain their original names.

+ + +
by
+

A vector of quoted variable names to use for by-group processing. This parameter will activate the first. and last. automatic variables, that indicate the first or last rows in a group. These -automatic variables are useful for conditional processing on groups.

calculate

Steps to set up calculated variables. +automatic variables are useful for conditional processing on groups. +The function will also create first and last automatic variables for each +variable specified in the by group.

+ + +
calculate
+

Steps to set up calculated variables. Calculated variables are commonly generated with summary functions such as mean, median, min, max, etc. It is more efficient to set up calculated variables with the calculate parameter and then use those variables in the data step, rather than perform the summary function inside the data step. The calculate block will be executed -immediately before the data step.

retain

A list of variable names and initial values +immediately before the data step.

+ + +
retain
+

A list of variable names and initial values to retain. Retained variables will begin the data step with the initial value. Then for each iteration of the data step, the variable will be populated with the ending value from the previous step. The retain @@ -256,99 +180,102 @@

Arg character column, set the initial value to an empty string, i.e. retain = list(col1 = 0, col2 = ""). There is no default initial value for a variable. You must supply an initial value for each retained -variable.

attrib

A named list of attributes. The list can be either -dsattr objects or single default values. The dsattr +variable.

+ + +
attrib
+

A named list of attributes. The list can be either +dsattr objects or single default values. The dsattr object allows you to set more attributes on each column. The single default value is convenient if you simply want to create a variable. -By default, variables will be created on the fly with no attributes.

arrays

A named list of dsarray objects. The +By default, variables will be created on the fly with no attributes.

+ + +
arrays
+

A named list of dsarray objects. The dsarray is a list of columns which you can iterate over inside the data step. You can iterate over a dsarray either with a for loop, or with a vectorized function. The default value of the arrays parameter is NULL, meaning -no arrays are defined.

sort_check

Checks to see if the input data is sorted according to +no arrays are defined.

+ + +
sort_check
+

Checks to see if the input data is sorted according to the by variable parameter. The sort check will give an error if the input data is not sorted according to the by variable. The check is turned on if the value of sort_check is TRUE, and turned off if FALSE. The default value is TRUE. Turn the sort check off if you want to perform by-group processing on unsorted data, or data that is not sorted according -to the by-group.

format

A named list of formats to assign to the data -frame. Formats will be assigned both before and after the datastep.

label

A named list of labels to assign to the output data frame.

where

An expression to filter the output dataset. The where +to the by-group.

+ + +
format
+

A named list of formats to assign to the data +frame. Formats will be assigned both before and after the datastep.

+ + +
label
+

A named list of labels to assign to the output data frame.

+ + +
where
+

An expression to filter the output dataset. The where clause will be applied prior to any drop, keep, or rename statement. -Use the expression function to assign the where clause.

set

A dataset or list of datasets to append to the input +Use the expression function to assign the where clause.

+ + +
set
+

A dataset or list of datasets to append to the input data frame. The set operation will occur at the beginning of the datastep, prior to the execution of any steps. The columns in the set datasets do not have to match. Where there are no matching columns, the missing -values will be filled with NA.

merge

A dataset or list of datasets to merge with the input +values will be filled with NA.

+ + +
merge
+

A dataset or list of datasets to merge with the input data. The merge operation will occur at the beginning of the datastep, prior to the execution of any steps. When the merge operation is -requested, the by parameter will be used to indicate which variable(s) -to merge by.

merge_by

If the merge parameter is set, the merge_by +requested, the merge_by parameter will be used to indicate which variable(s) +to merge by. If no merge_by is specified, the merge dataset columns will +simply be appended to the right of the input dataset.

+ + +
merge_by
+

If the merge parameter is set, the merge_by parameter will be used to identify the variable(s) to merge by. If merge variables are the same on both datasets, the names may be passed as a simple quoted vector. If the variable names are different, pass the variables to merge on as a named vector. For example, c("ITEMID" = "ITEMCODE") would specify that the join should occur on the "ITEMID" from the dataset specified in the data parameter, and the "ITEMCODE" -variable from the dataset specified on the merge parameter.

merge_in

A vector of column names to be used to hold the merge flags. +variable from the dataset specified on the merge parameter.

+ + +
merge_in
+

A vector of column names to be used to hold the merge flags. The number of names should correspond to the number of datasets being merged. The merge flags will be populated with 0 or 1 values to indicate whether the record came from the corresponding table. Use the where parameter, -delete function, or output function to filter desired results.

log

Whether or not to log the datastep. Default is TRUE. This -parameter is used internally.

+delete function, or output function to filter desired results.

-

Value

-

The processed data frame, tibble, or data table.

-

Details

+
log
+

Whether or not to log the datastep. Default is TRUE. This +parameter is used internally.

+
+
+

Value

+ + +

The processed data frame, tibble, or data table.

+
+
+

Details

Two parameters on the datastep function are required: data and steps. The data parameter is the input data to the data step. The steps parameter contains @@ -361,14 +288,14 @@

Details a dplyr pipeline. The data step allows you to perform deeply nested and complex conditionals within the pipeline. The data step is also very readable compared to other pipeline conditionals.

-

Automatic Variables

- +
+
+

Automatic Variables

The datastep function provides five automatic variables. These variables are generated for every data step, and can -be accessed at any point within the data step:

    -
  • data: Represents the entire input data frame.

  • +be accessed at any point within the data step:

    • data: Represents the entire input data frame.

    • rw: Represents the current row.

    • n.: Contains the row number.

    • first.: Indicates the beginning of a by-group.

    • @@ -376,26 +303,34 @@

      Column Attributes

      - +

      If there are multiple by group variables, the first. and last. +automatic variables indicates a either/or combination of all by variables. +In addition, +first.X and last.X automatic variables will be created for +each variable, where "X" represents the name of the specified variable. +As always, these names are case-sensitive.

      +
+
+

Column Attributes

To set attributes for a column on your data, use the attrib parameter. Example attributes include 'label', 'description', and 'format'. These types of attributes are set using a named list and a -dsattr object. The name of the list item +dsattr object. The name of the list item is the column name you want to set attributes on. The value of the list item is the dsattr object. For a complete list of available attributes, -see the dsattr documentation.

+see the dsattr documentation.

It should be mentioned that the dsattr object is not required. You can also set attributes with a name and a default value. The default value can be any valid data value, such as a number or string.

The label and format attributes may also be set with the 'label' and 'format' parameters. These parameters accept a named list with the labels or formats, and will be assigned to the output data frame.

-

Optional Parameters

- +
+
+

Optional Parameters

Optional parameters on the datastep allow you to shape @@ -425,8 +360,9 @@

Set and Merge Operations

- +
+
+

Set and Merge Operations

The datastep function allows you to join one or more input datasets @@ -447,8 +383,9 @@

Data Step Arrays

- +
+
+

Data Step Arrays

There are times you may want to iterate over columns in your data step. Such @@ -458,14 +395,15 @@

dsarray objects. The dsarray is essentially a list of columns. You can use a for loop to iterate over the dsarray, and also send it into a vectorized function. Data step arrays allow to you to perform row-wise calculations. For instance, you can calculate a sum or mean by row for the variables in your array.

-

Output Column Order

- +
+
+

Output Column Order

By default, the data step will retain the column order of any variables that @@ -492,8 +430,9 @@

Datastep Performance

- +
+
+

Datastep Performance

The datastep is intended to be used on small and medium-sized @@ -502,293 +441,293 @@

See also

- -

libname function to create a data library, and -the dictionary function to create a data dictionary.

+
+
+

See also

+

libname function to create a data library, and +the dictionary function to create a data dictionary.

Other datastep: -[.dsarray(), -delete(), -dsarray(), -dsattr(), -length.dsarray(), -output()

- -

Examples

-
# Example #1: Simple Data Step
-df <- datastep(mtcars[1:10,], 
-               keep = c("mpg", "cyl", "disp", "mpgcat", "recdt", "is8cyl"), {
+[.dsarray(),
+delete(),
+dsarray(),
+dsattr(),
+length.dsarray(),
+output()

+
+ +
+

Examples

+
# Example #1: Simple Data Step
+df <- datastep(mtcars[1:10,], 
+               keep = c("mpg", "cyl", "disp", "mpgcat", "recdt", "is8cyl"), {
                  
-  if (mpg >= 20) 
-    mpgcat <- "High"
-  else 
-    mpgcat <- "Low"
+  if (mpg >= 20) 
+    mpgcat <- "High"
+  else 
+    mpgcat <- "Low"
                  
-  recdt <- as.Date("1974-06-10")
+  recdt <- as.Date("1974-06-10")
                  
-  if (cyl == 8)
-    is8cyl <- TRUE
-  else 
-    is8cyl <- FALSE
+  if (cyl == 8)
+    is8cyl <- TRUE
+  else 
+    is8cyl <- FALSE
                  
-})
+})
 
-df
-#                    mpg cyl  disp mpgcat      recdt
-# Mazda RX4         21.0   6 160.0   High 1974-06-10
-# Mazda RX4 Wag     21.0   6 160.0   High 1974-06-10
-# Datsun 710        22.8   4 108.0   High 1974-06-10
-# Hornet 4 Drive    21.4   6 258.0   High 1974-06-10
-# Hornet Sportabout 18.7   8 360.0    Low 1974-06-10
-# Valiant           18.1   6 225.0    Low 1974-06-10
-# Duster 360        14.3   8 360.0    Low 1974-06-10
-# Merc 240D         24.4   4 146.7   High 1974-06-10
-# Merc 230          22.8   4 140.8   High 1974-06-10
-# Merc 280          19.2   6 167.6    Low 1974-06-10
+df
+#                    mpg cyl  disp mpgcat      recdt
+# Mazda RX4         21.0   6 160.0   High 1974-06-10
+# Mazda RX4 Wag     21.0   6 160.0   High 1974-06-10
+# Datsun 710        22.8   4 108.0   High 1974-06-10
+# Hornet 4 Drive    21.4   6 258.0   High 1974-06-10
+# Hornet Sportabout 18.7   8 360.0    Low 1974-06-10
+# Valiant           18.1   6 225.0    Low 1974-06-10
+# Duster 360        14.3   8 360.0    Low 1974-06-10
+# Merc 240D         24.4   4 146.7   High 1974-06-10
+# Merc 230          22.8   4 140.8   High 1974-06-10
+# Merc 280          19.2   6 167.6    Low 1974-06-10
 
-# Example #2: By-group Processing
-df <- datastep(mtcars[1:10,], 
-               keep = c("mpg", "cyl", "gear", "grp"), 
-               by = c("gear"), sort_check = FALSE, {
+# Example #2: By-group Processing
+df <- datastep(mtcars[1:10,], 
+               keep = c("mpg", "cyl", "gear", "grp"), 
+               by = c("gear"), sort_check = FALSE, {
                  
-  if (first.)
-    grp <- "Start"
-  else if (last.)
-    grp <- "End"
-  else 
-    grp <- "-"
+  if (first.)
+    grp <- "Start"
+  else if (last.)
+    grp <- "End"
+  else 
+    grp <- "-"
                  
-})
+})
 
-df
-#                    mpg cyl gear   grp
-# Mazda RX4         21.0   6    4 Start
-# Mazda RX4 Wag     21.0   6    4     -
-# Datsun 710        22.8   4    4   End
-# Hornet 4 Drive    21.4   6    3 Start
-# Hornet Sportabout 18.7   8    3     -
-# Valiant           18.1   6    3     -
-# Duster 360        14.3   8    3   End
-# Merc 240D         24.4   4    4 Start
-# Merc 230          22.8   4    4     -
-# Merc 280          19.2   6    4   End 
+df
+#                    mpg cyl gear   grp
+# Mazda RX4         21.0   6    4 Start
+# Mazda RX4 Wag     21.0   6    4     -
+# Datsun 710        22.8   4    4   End
+# Hornet 4 Drive    21.4   6    3 Start
+# Hornet Sportabout 18.7   8    3     -
+# Valiant           18.1   6    3     -
+# Duster 360        14.3   8    3   End
+# Merc 240D         24.4   4    4 Start
+# Merc 230          22.8   4    4     -
+# Merc 280          19.2   6    4   End 
 
-# Example #3: Calculate Block
-df <- datastep(mtcars, 
-               keep = c("mpg", "cyl", "mean_mpg", "mpgcat"), 
-               calculate = { mean_mpg = mean(mpg) }, {
+# Example #3: Calculate Block
+df <- datastep(mtcars, 
+               keep = c("mpg", "cyl", "mean_mpg", "mpgcat"), 
+               calculate = { mean_mpg = mean(mpg) }, {
                  
-  if (mpg >= mean_mpg)
-    mpgcat <- "High"
-  else 
-    mpgcat <- "Low"
+  if (mpg >= mean_mpg)
+    mpgcat <- "High"
+  else 
+    mpgcat <- "Low"
                  
-})
+})
 
-df[1:10,]
-#                    mpg cyl mean_mpg mpgcat
-# Mazda RX4         21.0   6 20.09062   High
-# Mazda RX4 Wag     21.0   6 20.09062   High
-# Datsun 710        22.8   4 20.09062   High
-# Hornet 4 Drive    21.4   6 20.09062   High
-# Hornet Sportabout 18.7   8 20.09062    Low
-# Valiant           18.1   6 20.09062    Low
-# Duster 360        14.3   8 20.09062    Low
-# Merc 240D         24.4   4 20.09062   High
-# Merc 230          22.8   4 20.09062   High
-# Merc 280          19.2   6 20.09062    Low
+df[1:10,]
+#                    mpg cyl mean_mpg mpgcat
+# Mazda RX4         21.0   6 20.09062   High
+# Mazda RX4 Wag     21.0   6 20.09062   High
+# Datsun 710        22.8   4 20.09062   High
+# Hornet 4 Drive    21.4   6 20.09062   High
+# Hornet Sportabout 18.7   8 20.09062    Low
+# Valiant           18.1   6 20.09062    Low
+# Duster 360        14.3   8 20.09062    Low
+# Merc 240D         24.4   4 20.09062   High
+# Merc 230          22.8   4 20.09062   High
+# Merc 280          19.2   6 20.09062    Low
 
-# Example #4: Data pipeline
-library(dplyr)
-library(magrittr)
+# Example #4: Data pipeline
+library(dplyr)
+library(magrittr)
 
-# Add datastep to dplyr pipeline
-df <- mtcars %>% 
-  select(mpg, cyl, gear) %>% 
-  mutate(mean_mpg = mean(mpg)) %>% 
-  datastep({
+# Add datastep to dplyr pipeline
+df <- mtcars %>% 
+  select(mpg, cyl, gear) %>% 
+  mutate(mean_mpg = mean(mpg)) %>% 
+  datastep({
     
-    if (mpg >= mean_mpg)
-      mpgcat <- "High"
-    else 
-      mpgcat <- "Low"
+    if (mpg >= mean_mpg)
+      mpgcat <- "High"
+    else 
+      mpgcat <- "Low"
     
-  }) %>% 
-  filter(row_number() <= 10)
+  }) %>% 
+  filter(row_number() <= 10)
 
-df
-#     mpg cyl gear mean_mpg mpgcat
-# 1  21.0   6    4 20.09062   High
-# 2  21.0   6    4 20.09062   High
-# 3  22.8   4    4 20.09062   High
-# 4  21.4   6    3 20.09062   High
-# 5  18.7   8    3 20.09062    Low
-# 6  18.1   6    3 20.09062    Low
-# 7  14.3   8    3 20.09062    Low
-# 8  24.4   4    4 20.09062   High
-# 9  22.8   4    4 20.09062   High
-# 10 19.2   6    4 20.09062    Low
+df
+#     mpg cyl gear mean_mpg mpgcat
+# 1  21.0   6    4 20.09062   High
+# 2  21.0   6    4 20.09062   High
+# 3  22.8   4    4 20.09062   High
+# 4  21.4   6    3 20.09062   High
+# 5  18.7   8    3 20.09062    Low
+# 6  18.1   6    3 20.09062    Low
+# 7  14.3   8    3 20.09062    Low
+# 8  24.4   4    4 20.09062   High
+# 9  22.8   4    4 20.09062   High
+# 10 19.2   6    4 20.09062    Low
 
-# Example #5: Drop, Retain and Rename
-df <- datastep(mtcars[1:10, ], 
-               drop = c("disp", "hp", "drat", "qsec", 
-                        "vs", "am", "gear", "carb"), 
-               retain = list(cumwt = 0 ),
-               rename = c(mpg = "MPG", cyl = "Cylinders", wt = "Wgt", 
-                          cumwt = "Cumulative Wgt"), {
+# Example #5: Drop, Retain and Rename
+df <- datastep(mtcars[1:10, ], 
+               drop = c("disp", "hp", "drat", "qsec", 
+                        "vs", "am", "gear", "carb"), 
+               retain = list(cumwt = 0 ),
+               rename = c(mpg = "MPG", cyl = "Cylinders", wt = "Wgt", 
+                          cumwt = "Cumulative Wgt"), {
                  
-  cumwt <- cumwt + wt
+  cumwt <- cumwt + wt
                  
-})
+})
 
-df
-#                    MPG Cylinders   Wgt Cumulative Wgt
-# Mazda RX4         21.0         6 2.620          2.620
-# Mazda RX4 Wag     21.0         6 2.875          5.495
-# Datsun 710        22.8         4 2.320          7.815
-# Hornet 4 Drive    21.4         6 3.215         11.030
-# Hornet Sportabout 18.7         8 3.440         14.470
-# Valiant           18.1         6 3.460         17.930
-# Duster 360        14.3         8 3.570         21.500
-# Merc 240D         24.4         4 3.190         24.690
-# Merc 230          22.8         4 3.150         27.840
-# Merc 280          19.2         6 3.440         31.280
+df
+#                    MPG Cylinders   Wgt Cumulative Wgt
+# Mazda RX4         21.0         6 2.620          2.620
+# Mazda RX4 Wag     21.0         6 2.875          5.495
+# Datsun 710        22.8         4 2.320          7.815
+# Hornet 4 Drive    21.4         6 3.215         11.030
+# Hornet Sportabout 18.7         8 3.440         14.470
+# Valiant           18.1         6 3.460         17.930
+# Duster 360        14.3         8 3.570         21.500
+# Merc 240D         24.4         4 3.190         24.690
+# Merc 230          22.8         4 3.150         27.840
+# Merc 280          19.2         6 3.440         31.280
 
-# Example #6: Attributes and Arrays
+# Example #6: Attributes and Arrays
 
-# Create sample data
-dat <- read.table(header = TRUE, text = '
-   Year  Q1   Q2  Q3  Q4
-   2000 125  137 152 140
-   2001 132  145 138  87
-   2002 101  104 115 121')
+# Create sample data
+dat <- read.table(header = TRUE, text = '
+   Year  Q1   Q2  Q3  Q4
+   2000 125  137 152 140
+   2001 132  145 138  87
+   2002 101  104 115 121')
  
-# Use attrib list to control column order and add labels
-# Use array to calculate row sums and means, and get best quarter
-df <- datastep(dat,
-               attrib = list(Tot = dsattr(0, label = "Year Total"),
-                             Avg = dsattr(0, label = "Year Average"),
-                             Best = dsattr(0, label = "Best Quarter")),
-               arrays = list(qtrs = dsarray("Q1", "Q2", "Q3", "Q4")),
-               drop = "q",
-               steps = {
+# Use attrib list to control column order and add labels
+# Use array to calculate row sums and means, and get best quarter
+df <- datastep(dat,
+               attrib = list(Tot = dsattr(0, label = "Year Total"),
+                             Avg = dsattr(0, label = "Year Average"),
+                             Best = dsattr(0, label = "Best Quarter")),
+               arrays = list(qtrs = dsarray("Q1", "Q2", "Q3", "Q4")),
+               drop = "q",
+               steps = {
                
-                 # Empty brackets return all array values
-                 Tot <- sum(qtrs[])
-                 Avg <- mean(qtrs[])
+                 # Empty brackets return all array values
+                 Tot <- sum(qtrs[])
+                 Avg <- mean(qtrs[])
                  
-                 # Iterate to find best quarter
-                 for (q in qtrs) {
-                   if (qtrs[q] == max(qtrs[]))
-                     Best <- q
-                 }
-               })
+                 # Iterate to find best quarter
+                 for (q in qtrs) {
+                   if (qtrs[q] == max(qtrs[]))
+                     Best <- q
+                 }
+               })
                
-df
-#   Year  Q1  Q2  Q3  Q4 Tot    Avg Best
-# 1 2000 125 137 152 140 554 138.50   Q3
-# 2 2001 132 145 138  87 502 125.50   Q2
-# 3 2002 101 104 115 121 441 110.25   Q4
+df
+#   Year  Q1  Q2  Q3  Q4 Tot    Avg Best
+# 1 2000 125 137 152 140 554 138.50   Q3
+# 2 2001 132 145 138  87 502 125.50   Q2
+# 3 2002 101 104 115 121 441 110.25   Q4
 
-dictionary(df)
-#   A tibble: 8 x 10
-#   Name  Column Class     Label        Description Format Width Justify  Rows   NAs
-#   <chr> <chr>  <chr>     <chr>        <chr>       <lgl>  <int> <chr>   <int> <int>
-# 1 df    Year   integer   NA           NA          NA        NA NA          3     0
-# 2 df    Q1     integer   NA           NA          NA        NA NA          3     0
-# 3 df    Q2     integer   NA           NA          NA        NA NA          3     0
-# 4 df    Q3     integer   NA           NA          NA        NA NA          3     0
-# 5 df    Q4     integer   NA           NA          NA        NA NA          3     0
-# 6 df    Tot    integer   Year Total   NA          NA        NA NA          3     0
-# 7 df    Avg    numeric   Year Average NA          NA        NA NA          3     0
-# 8 df    Best   character Best Quarter NA          NA         2 NA          3     0
+dictionary(df)
+#   A tibble: 8 x 10
+#   Name  Column Class     Label        Description Format Width Justify  Rows   NAs
+#   <chr> <chr>  <chr>     <chr>        <chr>       <lgl>  <int> <chr>   <int> <int>
+# 1 df    Year   integer   NA           NA          NA        NA NA          3     0
+# 2 df    Q1     integer   NA           NA          NA        NA NA          3     0
+# 3 df    Q2     integer   NA           NA          NA        NA NA          3     0
+# 4 df    Q3     integer   NA           NA          NA        NA NA          3     0
+# 5 df    Q4     integer   NA           NA          NA        NA NA          3     0
+# 6 df    Tot    integer   Year Total   NA          NA        NA NA          3     0
+# 7 df    Avg    numeric   Year Average NA          NA        NA NA          3     0
+# 8 df    Best   character Best Quarter NA          NA         2 NA          3     0
 
-# Example #7: Set and Merge Operations
+# Example #7: Set and Merge Operations
 
-# Create sample data
-grp1 <- read.table(header = TRUE, text = '
-  GROUP  NAME
-  G01  Group1
-  G02  Group2
-', stringsAsFactors = FALSE)
+# Create sample data
+grp1 <- read.table(header = TRUE, text = '
+  GROUP  NAME
+  G01  Group1
+  G02  Group2
+', stringsAsFactors = FALSE)
 
-grp2 <- read.table(header = TRUE, text = '
-  GROUP  NAME
-  G03  Group3
-  G04  Group4
-', stringsAsFactors = FALSE)
+grp2 <- read.table(header = TRUE, text = '
+  GROUP  NAME
+  G03  Group3
+  G04  Group4
+', stringsAsFactors = FALSE)
   
-dat <- read.table(header = TRUE, text = '
-  ID AGE SEX GROUP
-  A01 58 F    G01
-  A02 20 M    G02
-  A03 47 F    G05
-  A04 11 M    G03
-  A05 23 F    G01
-', stringsAsFactors = FALSE)
+dat <- read.table(header = TRUE, text = '
+  ID AGE SEX GROUP
+  A01 58 F    G01
+  A02 20 M    G02
+  A03 47 F    G05
+  A04 11 M    G03
+  A05 23 F    G01
+', stringsAsFactors = FALSE)
 
-# Set operation
-grps <- datastep(grp1, set = grp2, {})
-grps
-#   GROUP   NAME
-# 1   G01 Group1
-# 2   G02 Group2
-# 3   G03 Group3
-# 4   G04 Group4
+# Set operation
+grps <- datastep(grp1, set = grp2, {})
+grps
+#   GROUP   NAME
+# 1   G01 Group1
+# 2   G02 Group2
+# 3   G03 Group3
+# 4   G04 Group4
 
-# Merge operation - Outer Join
-res <- datastep(dat, merge = grps, 
-                merge_by = "GROUP", 
-                merge_in = c("inA", "inB"), {})
+# Merge operation - Outer Join
+res <- datastep(dat, merge = grps, 
+                merge_by = "GROUP", 
+                merge_in = c("inA", "inB"), {})
                 
-# View results
-res
-#     ID AGE  SEX GROUP   NAME inA inB
-# 1  A01  58    F   G01 Group1   1   1
-# 2  A05  23    F   G01 Group1   1   1
-# 3  A02  20    M   G02 Group2   1   1
-# 4  A04  11    M   G03 Group3   1   1
-# 5  A03  47    F   G05   <NA>   1   0
-# 6 <NA>  NA <NA>   G04 Group4   0   1
+# View results
+res
+#     ID AGE  SEX GROUP   NAME inA inB
+# 1  A01  58    F   G01 Group1   1   1
+# 2  A05  23    F   G01 Group1   1   1
+# 3  A02  20    M   G02 Group2   1   1
+# 4  A04  11    M   G03 Group3   1   1
+# 5  A03  47    F   G05   <NA>   1   0
+# 6 <NA>  NA <NA>   G04 Group4   0   1
 
-# Merge operation - Inner Join
-res <- datastep(dat, merge = grps, 
-                merge_by = "GROUP", 
-                merge_in = c("inA", "inB"), 
-                where = expression(inA & inB), {})
+# Merge operation - Inner Join
+res <- datastep(dat, merge = grps, 
+                merge_by = "GROUP", 
+                merge_in = c("inA", "inB"), 
+                where = expression(inA & inB), {})
                 
-# View results
-res
-#     ID AGE  SEX GROUP   NAME inA inB
-# 1  A01  58    F   G01 Group1   1   1
-# 2  A05  23    F   G01 Group1   1   1
-# 3  A02  20    M   G02 Group2   1   1
-# 4  A04  11    M   G03 Group3   1   1
+# View results +res +# ID AGE SEX GROUP NAME inA inB +# 1 A01 58 F G01 Group1 1 1 +# 2 A05 23 F G01 Group1 1 1 +# 3 A02 20 M G02 Group2 1 1 +# 4 A04 11 M G03 Group3 1 1

+
+

-
- +
- - + + diff --git a/docs/reference/delete.html b/docs/reference/delete.html index 6ffc7f7..d195891 100644 --- a/docs/reference/delete.html +++ b/docs/reference/delete.html @@ -1,80 +1,15 @@ - - - - - - - -Removes an observation from a datastep — delete • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Removes an observation from a datastep — delete • libr - - - - - - - - - - - + + - - - -
-
- -
- -
+
@@ -170,70 +92,73 @@

Removes an observation from a datastep

it is called within a conditional.

-
delete()
- - -

Value

+
+
delete()
+
-

Observation is marked with a delete flag. No return value.

-

See also

+
+

Value

+ -

Other datastep: -[.dsarray(), -datastep(), -dsarray(), -dsattr(), -length.dsarray(), -output()

+

Observation is marked with a delete flag. No return value.

+
+
+

See also

+

Other datastep: +[.dsarray(), +datastep(), +dsarray(), +dsattr(), +length.dsarray(), +output()

+
-

Examples

-
#' # Remove all cars that are not 4 cylinder
-df <- datastep(mtcars, 
-               keep = c("mpg", "cyl", "disp"), {
+    
+

Examples

+
#' # Remove all cars that are not 4 cylinder
+df <- datastep(mtcars, 
+               keep = c("mpg", "cyl", "disp"), {
                  
-  if (cyl != 4)
-    delete()
+  if (cyl != 4)
+    delete()
                  
-})
+})
 
-df
-#     mpg cyl  disp
-# 1  22.8   4 108.0
-# 2  24.4   4 146.7
-# 3  22.8   4 140.8
-# 4  32.4   4  78.7
-# 5  30.4   4  75.7
-# 6  33.9   4  71.1
-# 7  21.5   4 120.1
-# 8  27.3   4  79.0
-# 9  26.0   4 120.3
-# 10 30.4   4  95.1
-# 11 21.4   4 121.0
+df +# mpg cyl disp +# 1 22.8 4 108.0 +# 2 24.4 4 146.7 +# 3 22.8 4 140.8 +# 4 32.4 4 78.7 +# 5 30.4 4 75.7 +# 6 33.9 4 71.1 +# 7 21.5 4 120.1 +# 8 27.3 4 79.0 +# 9 26.0 4 120.3 +# 10 30.4 4 95.1 +# 11 21.4 4 121.0
+
+

-
- +

- - + + diff --git a/docs/reference/dictionary.html b/docs/reference/dictionary.html index 6b9027b..7df24b9 100644 --- a/docs/reference/dictionary.html +++ b/docs/reference/dictionary.html @@ -1,55 +1,5 @@ - - - - - - - -Create a Data Dictionary — dictionary • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a Data Dictionary — dictionary • libr - - - - - - - - - - + + - - - - -
-
- -
- -
+

A function to create a data dictionary for a data frame, a tibble, or a data library. The function will generate a tibble of -information about the data. The tibble will contain the following columns:

    -
  • Name: The name of the data object.

  • +information about the data. The tibble will contain the following columns:

    • Name: The name of the data object.

    • Column: The name of the column.

    • Class: The class of the column.

    • Label: The value of the label attribute.

    • @@ -194,81 +115,77 @@

      Create a Data Dictionary

    • NAs: The number of NA values in this column.

    • MaxChar: The maximum character length of the values in this column with no padding.

    • -
    +
+
+
dictionary(x)
-
dictionary(x)
- -

Arguments

- - - - - - -
x

The input library, data frame, or tibble.

- -

See also

- -

libname to create a data library. Also -see the dsattr function to set attributes for your -dataset from within a datastep. To render attributes, +

+

Arguments

+
x
+

The input library, data frame, or tibble.

+ +
+
+

See also

+

libname to create a data library. Also +see the dsattr function to set attributes for your +dataset from within a datastep. To render attributes, see the fmtr package.

+
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
+# Create library
+libname(dat, tmp)
 
-# Add data to the library
-lib_add(dat, beaver1)
-lib_add(dat, iris)
+# Add data to the library
+lib_add(dat, beaver1)
+lib_add(dat, iris)
 
-# Examine the dictionary for the library
-dictionary(dat)
-# A tibble: 9 x 10
-#   Name    Column       Class   Label Description Format Width Justify  Rows   NAs MaxChar
-#   <chr>   <chr>        <chr>   <lgl> <lgl>       <lgl>  <lgl> <lgl>   <int> <int>   <int>
-# 1 beaver1 day          numeric NA    NA          NA     NA    NA        114     0       3
-# 2 beaver1 time         numeric NA    NA          NA     NA    NA        114     0       4
-# 3 beaver1 temp         numeric NA    NA          NA     NA    NA        114     0       5
-# 4 beaver1 activ        numeric NA    NA          NA     NA    NA        114     0       1
-# 5 iris    Sepal.Length numeric NA    NA          NA     NA    NA        150     0       3
-# 6 iris    Sepal.Width  numeric NA    NA          NA     NA    NA        150     0       3
-# 7 iris    Petal.Length numeric NA    NA          NA     NA    NA        150     0       3
-# 8 iris    Petal.Width  numeric NA    NA          NA     NA    NA        150     0       3
-# 9 iris    Species      factor  NA    NA          NA     NA    NA        150     0      10
+# Examine the dictionary for the library
+dictionary(dat)
+# A tibble: 9 x 10
+#   Name    Column       Class   Label Description Format Width Justify  Rows   NAs MaxChar
+#   <chr>   <chr>        <chr>   <lgl> <lgl>       <lgl>  <lgl> <lgl>   <int> <int>   <int>
+# 1 beaver1 day          numeric NA    NA          NA     NA    NA        114     0       3
+# 2 beaver1 time         numeric NA    NA          NA     NA    NA        114     0       4
+# 3 beaver1 temp         numeric NA    NA          NA     NA    NA        114     0       5
+# 4 beaver1 activ        numeric NA    NA          NA     NA    NA        114     0       1
+# 5 iris    Sepal.Length numeric NA    NA          NA     NA    NA        150     0       3
+# 6 iris    Sepal.Width  numeric NA    NA          NA     NA    NA        150     0       3
+# 7 iris    Petal.Length numeric NA    NA          NA     NA    NA        150     0       3
+# 8 iris    Petal.Width  numeric NA    NA          NA     NA    NA        150     0       3
+# 9 iris    Species      factor  NA    NA          NA     NA    NA        150     0      10
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+
-
- +
- - + + diff --git a/docs/reference/dsarray.html b/docs/reference/dsarray.html index 32afda1..663b061 100644 --- a/docs/reference/dsarray.html +++ b/docs/reference/dsarray.html @@ -1,79 +1,14 @@ - - - - - - - -Create a Data Step Array — dsarray • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a Data Step Array — dsarray • libr - - - - - - - - - - - + + - - - -
-
- -
- -
+

A data step array is an object that allows you to iterate -across a set of columns inside a datastep. This structure is +across a set of columns inside a datastep. This structure is useful when you need to perform the same or similar operations on many columns.

-
dsarray(...)
+
+
dsarray(...)
+
-

Arguments

- - - - - - -
...

Column names to include as part of the datastep array. The +

+

Arguments

+
...
+

Column names to include as part of the datastep array. The names can be provided as quoted strings or a vector of strings. If names are provided as quoted strings, separate the strings with commas -(i.e. dsarray("col1", "col2", "col3")).

- -

Value

+(i.e. dsarray("col1", "col2", "col3")).

-

The datastep array object.

-

Details

+
+
+

Value

+ +

The datastep array object.

+
+
+

Details

The datastep array has an indexer that allows you to access a particular column value. The indexer can be used within a for loop to iterate over the array. In this manner, you can place a set of conditions inside the for loop and run the same conditional logic on all the columns in the array.

You can also use the datastep array with an empty indexer in vectorized -functions like sum, mean, -and max. The empty indexer will return all the +functions like sum, mean, +and max. The empty indexer will return all the values in the array for the current row.

-

See also

- -

libname to create a data library, and -dictionary for generating a data dictionary

+
+
+

See also

+

libname to create a data library, and +dictionary for generating a data dictionary

Other datastep: -[.dsarray(), -datastep(), -delete(), -dsattr(), -length.dsarray(), -output()

+[.dsarray(), +datastep(), +delete(), +dsattr(), +length.dsarray(), +output()

+
-

Examples

-
library(libr)
+    
+

Examples

+
library(libr)
 
-# Create AirPassengers Data Frame
-df <- as.data.frame(t(matrix(AirPassengers, 12, 
-                    dimnames = list(month.abb, seq(1949, 1960)))),
-                    stringsAsFactors = FALSE)
+# Create AirPassengers Data Frame
+df <- as.data.frame(t(matrix(AirPassengers, 12, 
+                    dimnames = list(month.abb, seq(1949, 1960)))),
+                    stringsAsFactors = FALSE)
 
-# Use datastep array to get year tot, mean, and top month
-dat <- datastep(df,
-                arrays = list(months = dsarray(names(df))),
-                attrib = list(Tot = 0, Mean = 0, Top = ""),
-                drop = "mth",
-                {
+# Use datastep array to get year tot, mean, and top month
+dat <- datastep(df,
+                arrays = list(months = dsarray(names(df))),
+                attrib = list(Tot = 0, Mean = 0, Top = ""),
+                drop = "mth",
+                {
                 
-                  Tot <- sum(months[])
-                  Mean <- mean(months[])
+                  Tot <- sum(months[])
+                  Mean <- mean(months[])
                 
-                  for (mth in months) {
-                    if (months[mth] == max(months[])) {
-                      Top <- mth
-                    }
-                  }
+                  for (mth in months) {
+                    if (months[mth] == max(months[])) {
+                      Top <- mth
+                    }
+                  }
                   
-                })
+                })
 
-dat
-#      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec  Tot     Mean Top
-# 1949 112 118 132 129 121 135 148 148 136 119 104 118 1520 126.6667 Aug
-# 1950 115 126 141 135 125 149 170 170 158 133 114 140 1676 139.6667 Aug
-# 1951 145 150 178 163 172 178 199 199 184 162 146 166 2042 170.1667 Aug
-# 1952 171 180 193 181 183 218 230 242 209 191 172 194 2364 197.0000 Aug
-# 1953 196 196 236 235 229 243 264 272 237 211 180 201 2700 225.0000 Aug
-# 1954 204 188 235 227 234 264 302 293 259 229 203 229 2867 238.9167 Jul
-# 1955 242 233 267 269 270 315 364 347 312 274 237 278 3408 284.0000 Jul
-# 1956 284 277 317 313 318 374 413 405 355 306 271 306 3939 328.2500 Jul
-# 1957 315 301 356 348 355 422 465 467 404 347 305 336 4421 368.4167 Aug
-# 1958 340 318 362 348 363 435 491 505 404 359 310 337 4572 381.0000 Aug
-# 1959 360 342 406 396 420 472 548 559 463 407 362 405 5140 428.3333 Aug
-# 1960 417 391 419 461 472 535 622 606 508 461 390 432 5714 476.1667 Jul
+dat +# Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Tot Mean Top +# 1949 112 118 132 129 121 135 148 148 136 119 104 118 1520 126.6667 Aug +# 1950 115 126 141 135 125 149 170 170 158 133 114 140 1676 139.6667 Aug +# 1951 145 150 178 163 172 178 199 199 184 162 146 166 2042 170.1667 Aug +# 1952 171 180 193 181 183 218 230 242 209 191 172 194 2364 197.0000 Aug +# 1953 196 196 236 235 229 243 264 272 237 211 180 201 2700 225.0000 Aug +# 1954 204 188 235 227 234 264 302 293 259 229 203 229 2867 238.9167 Jul +# 1955 242 233 267 269 270 315 364 347 312 274 237 278 3408 284.0000 Jul +# 1956 284 277 317 313 318 374 413 405 355 306 271 306 3939 328.2500 Jul +# 1957 315 301 356 348 355 422 465 467 404 347 305 336 4421 368.4167 Aug +# 1958 340 318 362 348 363 435 491 505 404 359 310 337 4572 381.0000 Aug +# 1959 360 342 406 396 420 472 548 559 463 407 362 405 5140 428.3333 Aug +# 1960 417 391 419 461 472 535 622 606 508 461 390 432 5714 476.1667 Jul
+
+
-
- +
- - + + diff --git a/docs/reference/dsattr.html b/docs/reference/dsattr.html index 2e65ed0..4920422 100644 --- a/docs/reference/dsattr.html +++ b/docs/reference/dsattr.html @@ -1,83 +1,18 @@ - - - - - - - -Assign Datastep Variable Attributes — dsattr • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Assign Datastep Variable Attributes — dsattr • libr - - - - - - - - - - - - + + - - -
-
- -
- -
+

An object to assign attributes -to a column in a datastep. The parameters allow you +to a column in a datastep. The parameters allow you to set the following attributes: 'class', 'label', 'description', 'width', 'justify', and 'format'. Any other desired attributes can be set with ....

The attributes available in the dsattr class are closely aligned -with those available on the dictionary object.

+with those available on the dictionary object.

-
dsattr(
-  default = NA,
-  label = NULL,
-  description = NULL,
-  width = NULL,
-  format = NULL,
-  justify = NULL,
-  ...
-)
+
+
dsattr(
+  default = NA,
+  label = NULL,
+  description = NULL,
+  width = NULL,
+  format = NULL,
+  justify = NULL,
+  ...
+)
+
-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
default

The default value of the column. The default value can +

+

Arguments

+
default
+

The default value of the column. The default value can be any valid data value. Typical default values might be an empty string ("") or a zero (0). If no -default value is specified, the column will be defaulted to NA.

label

The label to associate with this column. Accepts +default value is specified, the column will be defaulted to NA.

+ + +
label
+

The label to associate with this column. Accepts any string value. The label will appear as a column header on some -data viewers and reporting packages.

description

A description for this column. Accepts any string +data viewers and reporting packages.

+ + +
description
+

A description for this column. Accepts any string value. The description is intended to be a longer explanation of the -purpose or source of the variable.

width

The desired width for the column in number of characters.

format

The format associated with this column. See the -fmtr package for more information about formatting.

justify

The desired justification for the column. This parameter +purpose or source of the variable.

+ + +
width
+

The desired width for the column in number of characters.

+ + +
format
+

The format associated with this column. See the +fmtr package for more information about formatting.

+ + +
justify
+

The desired justification for the column. This parameter is normally used only for fixed-width, character columns. Valid values -are 'left', 'right', 'center', and 'centre'.

...

Any other attributes you wish to assign to this column. Pass -these additional attributes as a name/value pair.

+are 'left', 'right', 'center', and 'centre'.

-

Value

-

The data step attributes object.

-

See also

+
...
+

Any other attributes you wish to assign to this column. Pass +these additional attributes as a name/value pair.

-

dictionary function to observe the attributes +

+
+

Value

+ + +

The data step attributes object.

+
+
+

See also

+

dictionary function to observe the attributes associated with a dataset. Also see the fdata function in the fmtr package for more information on formatting and rendering data frames.

Other datastep: -[.dsarray(), -datastep(), -delete(), -dsarray(), -length.dsarray(), -output()

+[.dsarray(), +datastep(), +delete(), +dsarray(), +length.dsarray(), +output()

+
-

Examples

-
library(libr)
+    
+

Examples

+
library(libr)
 
-# Create small sample dataframe
-dat <- mtcars[1:10, c("mpg", "cyl")]
+# Create small sample dataframe
+dat <- mtcars[1:10, c("mpg", "cyl")]
 
-# Perform datastep and assign attributes
-dat1 <- datastep(dat, 
-                 attrib = list(mpg = dsattr(label = "Miles Per Gallon"),
-                               cyl = dsattr(label = "Cylinders"),
-                               mpgcat = dsattr(label = "Fuel Efficiency")),
-                {
+# Perform datastep and assign attributes
+dat1 <- datastep(dat, 
+                 attrib = list(mpg = dsattr(label = "Miles Per Gallon"),
+                               cyl = dsattr(label = "Cylinders"),
+                               mpgcat = dsattr(label = "Fuel Efficiency")),
+                {
                 
-                  if (mpg >= 20) 
-                    mpgcat = "High"
-                  else 
-                    mpgcat = "Low"
+                  if (mpg >= 20) 
+                    mpgcat = "High"
+                  else 
+                    mpgcat = "Low"
                 
-                })
+                })
 
-# Print results                 
-dat1
-#                    mpg cyl mpgcat
-# Mazda RX4         21.0   6   High
-# Mazda RX4 Wag     21.0   6   High
-# Datsun 710        22.8   4   High
-# Hornet 4 Drive    21.4   6   High
-# Hornet Sportabout 18.7   8    Low
-# Valiant           18.1   6    Low
-# Duster 360        14.3   8    Low
-# Merc 240D         24.4   4   High
-# Merc 230          22.8   4   High
-# Merc 280          19.2   6    Low
+# Print results                 
+dat1
+#                    mpg cyl mpgcat
+# Mazda RX4         21.0   6   High
+# Mazda RX4 Wag     21.0   6   High
+# Datsun 710        22.8   4   High
+# Hornet 4 Drive    21.4   6   High
+# Hornet Sportabout 18.7   8    Low
+# Valiant           18.1   6    Low
+# Duster 360        14.3   8    Low
+# Merc 240D         24.4   4   High
+# Merc 230          22.8   4   High
+# Merc 280          19.2   6    Low
                 
-# Examine label attributes
-attr(dat1$mpg, "label") 
-# [1] "Miles Per Gallon"
+# Examine label attributes
+attr(dat1$mpg, "label") 
+# [1] "Miles Per Gallon"
 
-attr(dat1$cyl, "label") 
-# [1] "Cylinders"
+attr(dat1$cyl, "label") 
+# [1] "Cylinders"
 
-attr(dat1$mpgcat, "label")
-# [1] "Fuel Efficiency"
+attr(dat1$mpgcat, "label")
+# [1] "Fuel Efficiency"
 
-# See labels in viewer 
-# View(dat1)
+# See labels in viewer +# View(dat1)
+
+

-
- +

- - + + diff --git a/docs/reference/import_spec.html b/docs/reference/import_spec.html index 8721514..a525362 100644 --- a/docs/reference/import_spec.html +++ b/docs/reference/import_spec.html @@ -1,55 +1,5 @@ - - - - - - - -Create an Import Specification — import_spec • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create an Import Specification — import_spec • libr - - - - - - - - - - - - + + - - -
-
- -
- -
+

A function to create the import specifications for a particular data file. This information can be used on the -libname function to correctly assign the data types for +libname function to correctly assign the data types for columns on imported data. The import specifications are defined as name/value pairs, where the name is the column name and the value is the data type indicator. Available data type indicators are 'guess', 'logical', 'character', 'integer', 'numeric', -'date', 'datetime', and 'time'. See the specs function +'date', 'datetime', and 'time'. See the specs function for an example of using import specs.

-
import_spec(..., na = NULL, trim_ws = NULL)
+
+
import_spec(..., na = NULL, trim_ws = NULL)
+
-

Arguments

- - - - - - - - - - - - - - -
...

Named pairs of column names and column data types. +

+

Arguments

+
...
+

Named pairs of column names and column data types. Available types are: 'guess', 'logical', 'character', 'integer', 'numeric', 'date', 'datetime', and 'time'. The date/time data types accept an optional input format. To supply the input format, append it after the data type following an equals sign, e.g.: 'date=%d%B%Y' or 'datetime=%d%m%Y %H:%M:%S'. Default is NULL, meaning no column types are specified, and the function should make its best -guess for each column.

na

A vector of values to be treated as NA. For example, the +guess for each column.

+ + +
na
+

A vector of values to be treated as NA. For example, the vector c('', ' ') will cause empty strings and single blanks to be converted to NA values. Default is NULL, meaning the value of the -na parameter will be taken from the specs function. +na parameter will be taken from the specs function. Any value supplied on the import_spec function will override the -value from the specs function.

trim_ws

Whether or not to trim white space from the input data values. +value from the specs function.

+ + +
trim_ws
+

Whether or not to trim white space from the input data values. The default is NULL, meaning the value of the trim_ws parameter -will be taken from the specs function. Any value supplied +will be taken from the specs function. Any value supplied on the import_spec function will override the value from the -specs function.

+specs function.

-

Value

+
+
+

Value

+ -

The import specification object.

-

See also

- -

libname to create a data library, and -specs for an example using import specs.

+

The import specification object.

+
+
+

See also

+

libname to create a data library, and +specs for an example using import specs.

Other specs: -print.specs(), -read.specs(), -specs(), -write.specs()

+print.specs(), +read.specs(), +specs(), +write.specs()

+
+
-
- +
- - + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 05e5b97..2903d6e 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,76 +1,12 @@ - - - - - - - -Function reference • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • libr - + + - - - -
-
- -
- -
+
- - - - - - - - - - -
-

Library Functions

-

Functions to create and manage data libraries.

+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+

Library Functions

+

Functions to create and manage data libraries.

+

libname()

Create a data library

+

lib_load()

Load a Library into the Workspace

+

lib_unload()

Unload a Library from the Workspace

+

lib_sync()

Synchronize Loaded Library

+

lib_write()

Write a Data Library to the File System

+

lib_add()

Add Data to a Data Library

+

lib_replace()

Replace Data in a Data Library

+

lib_remove()

Remove Data from a Data Library

+

lib_copy()

Copy a Data Library

+

lib_export()

Export a Data Library

+

lib_delete()

Delete a Data Library

+

lib_info()

Get Information about a Data Library

+

lib_path()

Get the Path for a Data Library

+

lib_size()

Get the Size of a Data Library

+

is.lib()

Class test for a data library

+

print(<lib>)

Print a data library

-

Library Specs Functions

-

Functions to create, read, and write import specs.

+
+

Library Specs Functions

+

Functions to create, read, and write import specs.

+

specs()

Create an Import Spec Collection

+

import_spec()

Create an Import Specification

+

write.specs()

Write import specs to the file system

+

read.specs()

Read import specs from the file system

+

print(<specs>)

Print import specifications

-

Datastep Functions

-

Functions to perform and enhance a datastep.

+
+

Datastep Functions

+

Functions to perform and enhance a datastep.

+

datastep()

Step through data row-by-row

+

dsattr()

Assign Datastep Variable Attributes

+

dsarray()

Create a Data Step Array

+

`[`(<dsarray>)

Indexer for Data Step Array

+

length(<dsarray>)

Length function for dsarray class

+

delete()

Removes an observation from a datastep

+

output()

Outputs an observation from a datastep

-

Other Functions

-

Other useful functions in the libr package.

+
+

Other Functions

+

Other useful functions in the libr package.

+

dictionary()

Create a Data Dictionary

- +
+
-
- +
- - + + diff --git a/docs/reference/is.lib.html b/docs/reference/is.lib.html index 25d9957..b611e5f 100644 --- a/docs/reference/is.lib.html +++ b/docs/reference/is.lib.html @@ -1,78 +1,13 @@ - - - - - - - -Class test for a data library — is.lib • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Class test for a data library — is.lib • libr + + - - - - -
-
- -
- -
+
@@ -166,78 +88,79 @@

Class test for a data library

data library has a class of "lib".

-
is.lib(x)
+
+
is.lib(x)
+
-

Arguments

- - - - - - -
x

The object to test.

+
+

Arguments

+
x
+

The object to test.

-

Value

+
+
+

Value

+ -

TRUE or FALSE, depending on whether or not the object is a +

TRUE or FALSE, depending on whether or not the object is a data library.

-

See also

- - - -

Examples

-
# Create format catalog
-libname(dat, tempdir()) 
+    
+
+

See also

+ +
+ +
+

Examples

+
# Create format catalog
+libname(dat, tempdir()) 
            
-# Test for "lib" class
-is.lib(dat) 
-# [1] TRUE
+# Test for "lib" class
+is.lib(dat) 
+# [1] TRUE
 
-is.lib(list())
-# [1] FALSE
+is.lib(list())
+# [1] FALSE
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)

+

+
-
- +
- - + + diff --git a/docs/reference/length.dsarray.html b/docs/reference/length.dsarray.html index 320cb8f..26514d4 100644 --- a/docs/reference/length.dsarray.html +++ b/docs/reference/length.dsarray.html @@ -1,78 +1,13 @@ - - - - - - - -Length function for dsarray class — length.dsarray • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Length function for dsarray class — length.dsarray • libr - - + + - - -
-
- -
- -
+
-

A length function for the data step array dsarray. +

A length function for the data step array dsarray. The length function can by used either inside or outside the data step.

-
# S3 method for dsarray
-length(x)
- -

Arguments

- - - - - - -
x

The dsarray object.

- -

Value

- -

The number of items in the specified dsarray.

-

See also

- -

Other datastep: -[.dsarray(), -datastep(), -delete(), -dsarray(), -dsattr(), -output()

- -

Examples

-
# Define datastep array
-carr <- dsarray(names(mtcars))
+    
+
# S3 method for dsarray
+length(x)
+
+ +
+

Arguments

+
x
+

The dsarray object.

+ +
+
+

Value

+ + +

The number of items in the specified dsarray.

+
+
+

See also

+

Other datastep: +[.dsarray(), +datastep(), +delete(), +dsarray(), +dsattr(), +output()

+
+ +
+

Examples

+
# Define datastep array
+carr <- dsarray(names(mtcars))
 
-length(carr)
-# 11
+length(carr) +# 11
+
+
-
- +
- - + + diff --git a/docs/reference/lib_add.html b/docs/reference/lib_add.html index 16e963b..664d2ae 100644 --- a/docs/reference/lib_add.html +++ b/docs/reference/lib_add.html @@ -1,83 +1,18 @@ - - - - - - - -Add Data to a Data Library — lib_add • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Add Data to a Data Library — lib_add • libr - - - - - - - - - - - - + + - - -
-
- -
- -
+
@@ -176,97 +98,95 @@

Add Data to a Data Library

add the data to the workspace environment.

-
lib_add(x, ..., name = NULL)
+
+
lib_add(x, ..., name = NULL)
+
-

Arguments

- - - - - - - - - - - - - - -
x

The library to add data to.

...

The data frame(s) to add to the library. If more than one, -separate with commas.

name

The reference name to use for the data. By default, -the name will be the variable name. To assign a name different -from the variable name, assign a quoted name to this parameter. If more -than one data set is being appended, assign a vector of quoted names.

+
+

Arguments

+
x
+

The library to add data to.

-

See also

- +
...
+

The data frame(s) to add to the library. If more than one, +separate with commas.

-

Examples

-
#' # Create temp directory
-tmp <- tempdir()
+
+
name
+

The reference name to use for the data. By default, +the name will be the variable name. To assign a name different +from the variable name, assign a quoted name to this parameter. If more +than one data set is being appended, assign a vector of quoted names.

+ +
+
+

See also

+ +
+ +
+

Examples

+
#' # Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
-# # library 'dat': 0 items
-# - attributes: rds not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# NULL
+# Create library
+libname(dat, tmp)
+# # library 'dat': 0 items
+# - attributes: rds not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# NULL
 
-# Add data to the library
-lib_add(dat, mtcars, beaver1, iris)
-# library 'dat': 3 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#      Name Extension Rows Cols   Size        LastModified
-# 1  mtcars       rds   32   11 7.5 Kb 2020-11-05 19:32:00
-# 2 beaver1       rds  114    4 5.1 Kb 2020-11-05 19:32:04
-# 3    iris       rds  150    5 7.5 Kb 2020-11-05 19:32:08
+# Add data to the library
+lib_add(dat, mtcars, beaver1, iris)
+# library 'dat': 3 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#      Name Extension Rows Cols   Size        LastModified
+# 1  mtcars       rds   32   11 7.5 Kb 2020-11-05 19:32:00
+# 2 beaver1       rds  114    4 5.1 Kb 2020-11-05 19:32:04
+# 3    iris       rds  150    5 7.5 Kb 2020-11-05 19:32:08
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)

+

+
-
- +

- - + + diff --git a/docs/reference/lib_copy.html b/docs/reference/lib_copy.html index 60a372e..ef95e69 100644 --- a/docs/reference/lib_copy.html +++ b/docs/reference/lib_copy.html @@ -1,84 +1,19 @@ - - - - - - - -Copy a Data Library — lib_copy • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Copy a Data Library — lib_copy • libr - - - - - - - - - - - - - - + + -
-
- -
- -
+
@@ -178,105 +100,106 @@

Copy a Data Library

that is the version that will be copied.

-
lib_copy(x, nm, directory_path, standard_eval = FALSE)
+
+
lib_copy(x, nm, directory_path, standard_eval = FALSE)
+
+ +
+

Arguments

+
x
+

The library to copy.

-

Arguments

- - - - - - - - - - - - - - - - - - -
x

The library to copy.

nm

The variable name to hold the new library. + +

nm
+

The variable name to hold the new library. The parameter will assume non-standard (unquoted) evaluation unless the standard_eval parameter is set -to TRUE.

directory_path

The path to copy the library to.

standard_eval

A TRUE or FALSE value which indicates whether to +to TRUE.

+ + +
directory_path
+

The path to copy the library to.

+ + +
standard_eval
+

A TRUE or FALSE value which indicates whether to use standard (quoted) or non-standard (unquoted) evaluation on the nm parameter. Default is FALSE. Use this parameter if you want to -pass the target library name in a variable.

- -

Value

+pass the target library name in a variable.

-

The new library.

-

See also

+
+
+

Value

+ - +

The new library.

+
+
+

See also

+ +
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat1, tmp)
+# Create library
+libname(dat1, tmp)
 
-# Add dat to library
-lib_add(dat1, mtcars, iris)
+# Add dat to library
+lib_add(dat1, mtcars, iris)
 
-# Copy dat1 to dat2
-lib_copy(dat1, dat2, file.path(tmp, "copy"))
-# library 'dat2': 2 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc/copy
-# - items:
-#     Name Extension Rows Cols   Size        LastModified
-# 1 mtcars       rds   32   11 7.5 Kb 2020-11-05 21:14:54
-# 2   iris       rds  150    5 7.5 Kb 2020-11-05 21:14:54
+# Copy dat1 to dat2
+lib_copy(dat1, dat2, file.path(tmp, "copy"))
+# library 'dat2': 2 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc/copy
+# - items:
+#     Name Extension Rows Cols   Size        LastModified
+# 1 mtcars       rds   32   11 7.5 Kb 2020-11-05 21:14:54
+# 2   iris       rds  150    5 7.5 Kb 2020-11-05 21:14:54
 
-# Clean up
-lib_delete(dat1)
-lib_delete(dat2)
+# Clean up +lib_delete(dat1) +lib_delete(dat2)
+
+
-
- +
- - + + diff --git a/docs/reference/lib_delete.html b/docs/reference/lib_delete.html index d4aa5b2..98f87e0 100644 --- a/docs/reference/lib_delete.html +++ b/docs/reference/lib_delete.html @@ -1,84 +1,19 @@ - - - - - - - -Delete a Data Library — lib_delete • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Delete a Data Library — lib_delete • libr - - - - - - - - - - + + - - - - -
-
- -
- -
+
@@ -175,98 +97,96 @@

Delete a Data Library

by the delete operation.

The directory that contains the data will also not be affected by the delete operation. To delete the data directory, -use the unlink function or other packaged functions.

+use the unlink function or other packaged functions.

-
lib_delete(x)
- -

Arguments

- - - - - - -
x

The data library to delete.

- -

See also

+
+
lib_delete(x)
+
- +
+

Arguments

+
x
+

The data library to delete.

+ +
+
+

See also

+ +
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
+# Create library
+libname(dat, tmp)
 
-# Add data to library
-lib_add(dat, mtcars)
-lib_add(dat, iris)
+# Add data to library
+lib_add(dat, mtcars)
+lib_add(dat, iris)
 
-# Load library
-lib_load(dat)
+# Load library
+lib_load(dat)
 
-# Examine workspace
-ls()
-# [1] "dat" "dat.iris" "dat.mtcars" "tmp"
+# Examine workspace
+ls()
+# [1] "dat" "dat.iris" "dat.mtcars" "tmp"
 
-# Examine library
-dat
-# library 'dat': 2 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#     Name Extension Rows Cols   Size        LastModified
-# 1 mtcars       rds   32   11 7.5 Kb 2020-11-05 21:18:17
-# 2   iris       rds  150    5 7.5 Kb 2020-11-05 21:18:17
+# Examine library
+dat
+# library 'dat': 2 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#     Name Extension Rows Cols   Size        LastModified
+# 1 mtcars       rds   32   11 7.5 Kb 2020-11-05 21:18:17
+# 2   iris       rds  150    5 7.5 Kb 2020-11-05 21:18:17
 
-# Delete library
-lib_delete(dat)
+# Delete library
+lib_delete(dat)
 
-#' # Examine workspace again
-ls()
-# [1] "tmp"
+#' # Examine workspace again +ls() +# [1] "tmp"
+
+

-
- +

- - + + diff --git a/docs/reference/lib_export.html b/docs/reference/lib_export.html index 97b346a..8a17c60 100644 --- a/docs/reference/lib_export.html +++ b/docs/reference/lib_export.html @@ -1,55 +1,5 @@ - - - - - - - -Export a Data Library — lib_export • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Export a Data Library — lib_export • libr - - - - - - - - - - - - + + - - -
-
- -
- -
+
@@ -184,117 +106,118 @@

Export a Data Library

that is the version that will be exported.

-
lib_export(x, nm, directory_path, engine, filter = NULL, standard_eval = FALSE)
+
+
lib_export(x, nm, directory_path, engine, filter = NULL, standard_eval = FALSE)
+
+ +
+

Arguments

+
x
+

The library to export.

-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - -
x

The library to export.

nm

The variable name to hold the new library. + +

nm
+

The variable name to hold the new library. The parameter will assume non-standard (unquoted) evaluation unless the standard_eval parameter is set -to TRUE.

directory_path

The path to export the library to.

engine

The name of the engine to use for the exported data. +to TRUE.

+ + +
directory_path
+

The path to export the library to.

+ + +
engine
+

The name of the engine to use for the exported data. The engine name corresponds to the standard file extension of the data file type. Valid values are 'rds', 'Rdata', 'rda', 'sas7bdat', -'xpt', 'xls', 'xlsx', 'dbf', and 'csv'.

filter

A filter string to limit which datasets are exported. -The filter parameter accepts wildcards.

standard_eval

A TRUE or FALSE value which indicates whether to +'xpt', 'xls', 'xlsx', 'dbf', and 'csv'.

+ + +
filter
+

A filter string to limit which datasets are exported. +The filter parameter accepts wildcards.

+ + +
standard_eval
+

A TRUE or FALSE value which indicates whether to use standard (quoted) or non-standard (unquoted) evaluation on the nm parameter. Default is FALSE. Use this parameter if you want to -pass the target library name in a variable.

+pass the target library name in a variable.

-

Value

+
+
+

Value

+ -

The newly exported library.

-

See also

- - +

The newly exported library.

+
+
+

See also

+ +
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat1, tmp)
+# Create library
+libname(dat1, tmp)
 
-# Add dat to library
-lib_add(dat1, mtcars, iris)
+# Add dat to library
+lib_add(dat1, mtcars, iris)
 
-# Export dat1 to dat2
-lib_export(dat1, dat2, file.path(tmp, "export"), "rdata")
-# library 'dat2': 2 items
-# - attributes: rdata not loaded
-# - path: C:\Users\User\AppData\Local\Temp\Rtmp0Sq3kt/export
-# - items:
-#     Name Extension Rows Cols   Size        LastModified
-# 1 mtcars     rdata   32   11 8.1 Kb 2022-06-23 00:10:52
-# 2   iris     rdata  150    5 8.1 Kb 2022-06-23 00:10:52
+# Export dat1 to dat2
+lib_export(dat1, dat2, file.path(tmp, "export"), "rdata")
+# library 'dat2': 2 items
+# - attributes: rdata not loaded
+# - path: C:\Users\User\AppData\Local\Temp\Rtmp0Sq3kt/export
+# - items:
+#     Name Extension Rows Cols   Size        LastModified
+# 1 mtcars     rdata   32   11 8.1 Kb 2022-06-23 00:10:52
+# 2   iris     rdata  150    5 8.1 Kb 2022-06-23 00:10:52
 
-# Clean up
-lib_delete(dat1)
-lib_delete(dat2)
+# Clean up +lib_delete(dat1) +lib_delete(dat2)
+
+

-
- +
- - + + diff --git a/docs/reference/lib_info.html b/docs/reference/lib_info.html index f14cb63..0c00516 100644 --- a/docs/reference/lib_info.html +++ b/docs/reference/lib_info.html @@ -1,80 +1,15 @@ - - - - - - - -Get Information about a Data Library — lib_info • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get Information about a Data Library — lib_info • libr - - - - - - - - - - - - + + - - -
-
- -
- -
+
@@ -170,86 +92,87 @@

Get Information about a Data Library

and the last modified date.

-
lib_info(x)
- -

Arguments

- - - - - - -
x

The data library.

- -

Value

- -

A data frame of information about the library.

-

See also

- - - -

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+
lib_info(x)
+
+ +
+

Arguments

+
x
+

The data library.

+ +
+
+

Value

+ + +

A data frame of information about the library.

+
+
+

See also

+ +
+ +
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create data library
-libname(dat, tmp)
+# Create data library
+libname(dat, tmp)
 
-# Add data to library
-lib_add(dat, trees, rock, beaver1)
+# Add data to library
+lib_add(dat, trees, rock, beaver1)
 
-# Get library information
-info <- lib_info(dat)
+# Get library information
+info <- lib_info(dat)
 
-# Examine info
-info
-#      Name Extension Rows Cols   Size        LastModified
-# 1 beaver1       rds  114    4 5.3 Kb 2020-11-05 21:27:57
-# 2   rocks       rds   48    4 3.1 Kb 2020-11-05 21:27:56
-# 3   trees       rds   31    3 2.4 Kb 2020-11-05 21:27:56
+# Examine info
+info
+#      Name Extension Rows Cols   Size        LastModified
+# 1 beaver1       rds  114    4 5.3 Kb 2020-11-05 21:27:57
+# 2   rocks       rds   48    4 3.1 Kb 2020-11-05 21:27:56
+# 3   trees       rds   31    3 2.4 Kb 2020-11-05 21:27:56
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+
-
- +
- - + + diff --git a/docs/reference/lib_load.html b/docs/reference/lib_load.html index a77d1e9..1250477 100644 --- a/docs/reference/lib_load.html +++ b/docs/reference/lib_load.html @@ -1,82 +1,17 @@ - - - - - - - -Load a Library into the Workspace — lib_load • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Load a Library into the Workspace — lib_load • libr - - - - - - - - - - - + + - - - -
-
- -
- -
+

The lib_load function loads a data library into an environment. The environment used is associated with the library at -the time it is created with the libname function. +the time it is created with the libname function. When the lib_load function is called, the data frames/tibbles will be loaded with <library>.<data set> syntax. Loading the data frames into the environment makes them easy to access and use in your program.

-
lib_load(x, filter = NULL)
+
+
lib_load(x, filter = NULL)
+
+ +
+

Arguments

+
x
+

The data library to load.

-

Arguments

- - - - - - - - - - -
x

The data library to load.

filter

One or more quoted strings to use as filters for the + +

filter
+

One or more quoted strings to use as filters for the data names to load into the workspace. For more than one filter string, pass them as a vector of strings. The filter string can be a full or partial name. If using a partial name, use a wild-card character (*) -to identify the missing portion. The match will be case-insensitive.

- -

Value

+to identify the missing portion. The match will be case-insensitive.

-

The loaded data library.

-

See also

+
+
+

Value

+ -

lib_unload to unload the library.

+

The loaded data library.

+
+
+

See also

+

lib_unload to unload the library.

Other lib: -is.lib(), -lib_add(), -lib_copy(), -lib_delete(), -lib_export(), -lib_info(), -lib_path(), -lib_remove(), -lib_replace(), -lib_size(), -lib_sync(), -lib_unload(), -lib_write(), -libname(), -print.lib()

+is.lib(), +lib_add(), +lib_copy(), +lib_delete(), +lib_export(), +lib_info(), +lib_path(), +lib_remove(), +lib_replace(), +lib_size(), +lib_sync(), +lib_unload(), +lib_write(), +libname(), +print.lib()

+
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Save some data to temp directory for illustration purposes
-saveRDS(iris, file.path(tmp, "iris.rds"))
-saveRDS(ToothGrowth, file.path(tmp, "ToothGrowth.rds"))
-saveRDS(PlantGrowth, file.path(tmp, "PlantGrowth.rds"))
+# Save some data to temp directory for illustration purposes
+saveRDS(iris, file.path(tmp, "iris.rds"))
+saveRDS(ToothGrowth, file.path(tmp, "ToothGrowth.rds"))
+saveRDS(PlantGrowth, file.path(tmp, "PlantGrowth.rds"))
 
-# Create library
-libname(dat, tmp)
+# Create library
+libname(dat, tmp)
 
-# Load library into workspace
-lib_load(dat)
+# Load library into workspace
+lib_load(dat)
 
-# Examine workspace
-ls()
-# [1] "dat" "dat.iris" "dat.PlantGrowth" "dat.ToothGrowth" "tmp"
+# Examine workspace
+ls()
+# [1] "dat" "dat.iris" "dat.PlantGrowth" "dat.ToothGrowth" "tmp"
 
-# Use some data
-summary(dat.PlantGrowth)
-summary(dat.ToothGrowth)
+# Use some data
+summary(dat.PlantGrowth)
+summary(dat.ToothGrowth)
 
-# Unload library
-lib_unload(dat)
+# Unload library
+lib_unload(dat)
 
-# Examine workspace again
-ls()
-# [1] "dat" "tmp"
+# Examine workspace again
+ls()
+# [1] "dat" "tmp"
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+
-
- +
- - + + diff --git a/docs/reference/lib_path.html b/docs/reference/lib_path.html index d79a509..d322cd0 100644 --- a/docs/reference/lib_path.html +++ b/docs/reference/lib_path.html @@ -1,78 +1,13 @@ - - - - - - - -Get the Path for a Data Library — lib_path • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get the Path for a Data Library — lib_path • libr - - + + - - -
-
- -
- -
+
@@ -166,77 +88,78 @@

Get the Path for a Data Library

the library as a string.

-
lib_path(x)
- -

Arguments

- - - - - - -
x

The data library.

- -

Value

- -

The path of the data library as a single string.

-

See also

- - - -

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+
lib_path(x)
+
+ +
+

Arguments

+
x
+

The data library.

+ +
+
+

Value

+ + +

The path of the data library as a single string.

+
+
+

See also

+ +
+ +
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
+# Create library
+libname(dat, tmp)
 
-# Examine library path
-lib_path(dat)
-# [1] "C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc"
+# Examine library path
+lib_path(dat)
+# [1] "C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc"
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+
-
- +
- - + + diff --git a/docs/reference/lib_remove.html b/docs/reference/lib_remove.html index af3a3f6..769d206 100644 --- a/docs/reference/lib_remove.html +++ b/docs/reference/lib_remove.html @@ -1,79 +1,14 @@ - - - - - - - -Remove Data from a Data Library — lib_remove • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Remove Data from a Data Library — lib_remove • libr - - - - - - - - - - + + - - - - -
-
- -
- -
+
@@ -168,98 +90,99 @@

Remove Data from a Data Library

is loaded, it will also remove that item from the workspace environment.

-
lib_remove(x, name)
+
+
lib_remove(x, name)
+
+ +
+

Arguments

+
x
+

The data library.

-

Arguments

- - - - - - - - - - -
x

The data library.

name

The quoted name of the item to remove from the data library. -For more than one name, pass a vector of quoted names.

-

Value

+
name
+

The quoted name of the item to remove from the data library. +For more than one name, pass a vector of quoted names.

-

The library with the requested item removed.

-

See also

+
+
+

Value

+ - +

The library with the requested item removed.

+
+
+

See also

+ +
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
+# Create library
+libname(dat, tmp)
 
-# Add data to the library
-lib_add(dat, mtcars, beaver1, iris)
-# library 'dat': 3 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#      Name Extension Rows Cols   Size        LastModified
-# 1  mtcars       rds   32   11 7.5 Kb 2020-11-05 19:32:00
-# 2 beaver1       rds  114    4 5.1 Kb 2020-11-05 19:32:04
-# 3    iris       rds  150    5 7.5 Kb 2020-11-05 19:32:08
+# Add data to the library
+lib_add(dat, mtcars, beaver1, iris)
+# library 'dat': 3 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#      Name Extension Rows Cols   Size        LastModified
+# 1  mtcars       rds   32   11 7.5 Kb 2020-11-05 19:32:00
+# 2 beaver1       rds  114    4 5.1 Kb 2020-11-05 19:32:04
+# 3    iris       rds  150    5 7.5 Kb 2020-11-05 19:32:08
 
-# Remove items from the library
-lib_remove(dat, c("beaver1", "iris"))
-# library 'dat': 1 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#     Name Extension Rows Cols   Size        LastModified
-# 1 mtcars       rds   32   11 7.5 Kb 2020-11-05 19:32:40
+# Remove items from the library
+lib_remove(dat, c("beaver1", "iris"))
+# library 'dat': 1 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#     Name Extension Rows Cols   Size        LastModified
+# 1 mtcars       rds   32   11 7.5 Kb 2020-11-05 19:32:40
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+

-
- +
- - + + diff --git a/docs/reference/lib_replace.html b/docs/reference/lib_replace.html index 40de686..03d6978 100644 --- a/docs/reference/lib_replace.html +++ b/docs/reference/lib_replace.html @@ -1,82 +1,17 @@ - - - - - - - -Replace Data in a Data Library — lib_replace • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Replace Data in a Data Library — lib_replace • libr - - - - - - - - - - - - + + - - -
-
- -
- -
+
@@ -174,100 +96,98 @@

Replace Data in a Data Library

associated with the library engine.

-
lib_replace(x, ..., name = NULL)
+
+
lib_replace(x, ..., name = NULL)
+
-

Arguments

- - - - - - - - - - - - - - -
x

The library to replace data in.

...

The data frame(s) to replace. If you wish to replace more than -one data set, separate with commas.

name

The reference name to use for the data. By default, -the name will be the variable name. To assign a name different -from the variable name, assign a quoted name to this parameter. If more -than one data set is being replaced, assign a vector of quoted names.

+
+

Arguments

+
x
+

The library to replace data in.

-

See also

-

Other lib: -is.lib(), -lib_add(), -lib_copy(), -lib_delete(), -lib_export(), -lib_info(), -lib_load(), -lib_path(), -lib_remove(), -lib_size(), -lib_sync(), -lib_unload(), -lib_write(), -libname(), -print.lib()

+
...
+

The data frame(s) to replace. If you wish to replace more than +one data set, separate with commas.

-

Examples

-
#' # Create temp directory
-tmp <- tempdir()
+
+
name
+

The reference name to use for the data. By default, +the name will be the variable name. To assign a name different +from the variable name, assign a quoted name to this parameter. If more +than one data set is being replaced, assign a vector of quoted names.

+ +
+
+

See also

+

Other lib: +is.lib(), +lib_add(), +lib_copy(), +lib_delete(), +lib_export(), +lib_info(), +lib_load(), +lib_path(), +lib_remove(), +lib_size(), +lib_sync(), +lib_unload(), +lib_write(), +libname(), +print.lib()

+
+ +
+

Examples

+
#' # Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
+# Create library
+libname(dat, tmp)
 
-# Add data to the library
-lib_add(dat, mtcars)
-# library 'dat': 3 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#      Name Extension Rows Cols   Size        LastModified
-# 1  mtcars       rds   32   11 7.5 Kb 2020-11-05 19:32:00
+# Add data to the library
+lib_add(dat, mtcars)
+# library 'dat': 3 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#      Name Extension Rows Cols   Size        LastModified
+# 1  mtcars       rds   32   11 7.5 Kb 2020-11-05 19:32:00
 
-# Replace data with a subset
-lib_replace(dat, mtcars[1:10, 1:5], name = "mtcars")
-# library 'dat': 3 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#      Name Extension Rows Cols   Size        LastModified
-# 1  mtcars       rds   10    5 7.5 Kb 2020-11-05 19:33:00
+# Replace data with a subset
+lib_replace(dat, mtcars[1:10, 1:5], name = "mtcars")
+# library 'dat': 3 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#      Name Extension Rows Cols   Size        LastModified
+# 1  mtcars       rds   10    5 7.5 Kb 2020-11-05 19:33:00
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+
-
- +
- - + + diff --git a/docs/reference/lib_size.html b/docs/reference/lib_size.html index e17dc4f..a66b5df 100644 --- a/docs/reference/lib_size.html +++ b/docs/reference/lib_size.html @@ -1,78 +1,13 @@ - - - - - - - -Get the Size of a Data Library — lib_size • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get the Size of a Data Library — lib_size • libr - - + + - - -
-
- -
- -
+
@@ -166,81 +88,82 @@

Get the Size of a Data Library

by the data library, as stored on disk.

-
lib_size(x)
- -

Arguments

- - - - - - -
x

The data library.

- -

Value

- -

The size of the data library in bytes as stored on the file system.

-

See also

- - - -

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+
lib_size(x)
+
+ +
+

Arguments

+
x
+

The data library.

+ +
+
+

Value

+ + +

The size of the data library in bytes as stored on the file system.

+
+
+

See also

+ +
+ +
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
+# Create library
+libname(dat, tmp)
 
-# Add some data to library
-lib_add(dat, mtcars)
-lib_add(dat, iris)
+# Add some data to library
+lib_add(dat, mtcars)
+lib_add(dat, iris)
 
-# Check size of library
-lib_size(dat)
-# [1] 9757
+# Check size of library
+lib_size(dat)
+# [1] 9757
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+
-
- +
- - + + diff --git a/docs/reference/lib_sync.html b/docs/reference/lib_sync.html index 483d746..c9995ad 100644 --- a/docs/reference/lib_sync.html +++ b/docs/reference/lib_sync.html @@ -1,55 +1,5 @@ - - - - - - - -Synchronize Loaded Library — lib_sync • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Synchronize Loaded Library — lib_sync • libr - - - - - - - - - - + + - - - - -
-
- -
- -
+
@@ -188,105 +110,106 @@

Synchronize Loaded Library

write any data to disk. Also note that the lib_sync function will not automatically remove any variables from the library list that have been removed from the workspace. To remove items from the library -list, use the lib_remove function. To write data to -disk, use the lib_write function.

+list, use the lib_remove function. To write data to +disk, use the lib_write function.

+
+ +
+
lib_sync(x, name = NULL)
-
lib_sync(x, name = NULL)
+
+

Arguments

+
x
+

The data library to synchronize.

-

Arguments

- - - - - - - - - - -
x

The data library to synchronize.

name

The name of the library to sync if not the variable -name. Used internally.

-

Value

+
name
+

The name of the library to sync if not the variable +name. Used internally.

-

The synchronized data library.

-

See also

+
+
+

Value

+ - +

The synchronized data library.

+
+
+

See also

+ +
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
-# library 'dat': 0 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# NULL
+# Create library
+libname(dat, tmp)
+# library 'dat': 0 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# NULL
 
-# Load the library 
-lib_load(dat)
+# Load the library 
+lib_load(dat)
 
-# Add data to the workspace
-dat.mtcars <- mtcars
-dat.beaver1 <- beaver1
-dat.iris <- iris
+# Add data to the workspace
+dat.mtcars <- mtcars
+dat.beaver1 <- beaver1
+dat.iris <- iris
 
-# Sync the library
-lib_sync(dat)
-# library 'dat': 3 items
-# - attributes: loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#      Name Extension Rows Cols   Size LastModified
-# 1 beaver1        NA  114    4 4.6 Kb         <NA>
-# 2    iris        NA  150    5 7.1 Kb         <NA>
-# 3  mtcars        NA   32   11   7 Kb         <NA>
+# Sync the library
+lib_sync(dat)
+# library 'dat': 3 items
+# - attributes: loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#      Name Extension Rows Cols   Size LastModified
+# 1 beaver1        NA  114    4 4.6 Kb         <NA>
+# 2    iris        NA  150    5 7.1 Kb         <NA>
+# 3  mtcars        NA   32   11   7 Kb         <NA>
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+ -
- +
- - + + diff --git a/docs/reference/lib_unload.html b/docs/reference/lib_unload.html index c011c32..be98641 100644 --- a/docs/reference/lib_unload.html +++ b/docs/reference/lib_unload.html @@ -1,82 +1,17 @@ - - - - - - - -Unload a Library from the Workspace — lib_unload • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Unload a Library from the Workspace — lib_unload • libr - - - - - - - - - - - + + - - - -
-
- -
- -
+
@@ -174,106 +96,107 @@

Unload a Library from the Workspace

two instances can become out of sync if you change the data in working memory.

-
lib_unload(x, sync = TRUE, name = NULL)
+
+
lib_unload(x, sync = TRUE, name = NULL)
+
+ +
+

Arguments

+
x
+

The data library to unload.

-

Arguments

- - - - - - - - - - - - - - -
x

The data library to unload.

sync

Whether to sync the workspace with the library list before + +

sync
+

Whether to sync the workspace with the library list before it is unloaded. Default is TRUE. If you want to unload the workspace -without saving the workspace data, set this parameter to FALSE.

name

The name of the library to unload, if the name is different -than the variable name. Used internally.

+without saving the workspace data, set this parameter to FALSE.

+ -

Value

+
name
+

The name of the library to unload, if the name is different +than the variable name. Used internally.

-

The unloaded data library.

-

See also

+
+
+

Value

+ -

lib_load to load the library.

+

The unloaded data library.

+
+
+

See also

+

lib_load to load the library.

Other lib: -is.lib(), -lib_add(), -lib_copy(), -lib_delete(), -lib_export(), -lib_info(), -lib_load(), -lib_path(), -lib_remove(), -lib_replace(), -lib_size(), -lib_sync(), -lib_write(), -libname(), -print.lib()

+is.lib(), +lib_add(), +lib_copy(), +lib_delete(), +lib_export(), +lib_info(), +lib_load(), +lib_path(), +lib_remove(), +lib_replace(), +lib_size(), +lib_sync(), +lib_write(), +libname(), +print.lib()

+
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
+# Create library
+libname(dat, tmp)
 
-# Add data to library
-lib_add(dat, iris, ToothGrowth, PlantGrowth)
+# Add data to library
+lib_add(dat, iris, ToothGrowth, PlantGrowth)
 
-# Load library into workspace
-lib_load(dat)
+# Load library into workspace
+lib_load(dat)
 
-# Examine workspace
-ls()
-# [1] "dat" "dat.iris" "dat.PlantGrowth" "dat.ToothGrowth" "tmp"
+# Examine workspace
+ls()
+# [1] "dat" "dat.iris" "dat.PlantGrowth" "dat.ToothGrowth" "tmp"
 
-# Use some data
-summary(dat.PlantGrowth)
-summary(dat.ToothGrowth)
+# Use some data
+summary(dat.PlantGrowth)
+summary(dat.ToothGrowth)
 
-# Unload library
-lib_unload(dat)
+# Unload library
+lib_unload(dat)
 
-# Examine workspace again
-ls()
-# [1] "dat" "tmp"
+# Examine workspace again
+ls()
+# [1] "dat" "tmp"
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+ -
- +
- - + + diff --git a/docs/reference/lib_write.html b/docs/reference/lib_write.html index 7dc6e16..b2b6aa6 100644 --- a/docs/reference/lib_write.html +++ b/docs/reference/lib_write.html @@ -1,55 +1,5 @@ - - - - - - - -Write a Data Library to the File System — lib_write • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Write a Data Library to the File System — lib_write • libr - - - - - - - - - - + + - - - - -
-
- -
- -
+
@@ -176,7 +99,7 @@

Write a Data Library to the File System

to the file system. The library will be written to the directory for which it was defined, and each data frame will be written in the format associated with the library data engine. See the -libname function for further elaboration on the types +libname function for further elaboration on the types of engines available, and the assumptions/limitations of each.

By default, the lib_write function will not write data that has not changed. Prior to writing a file, lib_write will compare the @@ -184,114 +107,116 @@

Write a Data Library to the File System

the function will overwrite the version on disk. To override the default behavior, use the force option to force lib_write to write every data file to disk.

+

Note that writing sas7bdat files to disk is not supported.

+
+ +
+
lib_write(x, force = FALSE)
-
lib_write(x, force = FALSE)
+
+

Arguments

+
x
+

The data library to write.

-

Arguments

- - - - - - - - - - -
x

The data library to write.

force

Force writing each data file to disk, even if it has not -changed.

-

Value

+
force
+

Force writing each data file to disk, even if it has not +changed.

-

The saved data library.

-

See also

+
+
+

Value

+ - +

The saved data library.

+
+
+

See also

+ +
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create library
-libname(dat, tmp)
-# # library 'dat': 0 items
-# - attributes: rds not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# NULL
+# Create library
+libname(dat, tmp)
+# # library 'dat': 0 items
+# - attributes: rds not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# NULL
 
-# Load the empty library 
-lib_load(dat)
+# Load the empty library 
+lib_load(dat)
 
-# Add data to the library
-dat.mtcars <- mtcars
-dat.beaver1 <- beaver1
-dat.iris <- iris
+# Add data to the library
+dat.mtcars <- mtcars
+dat.beaver1 <- beaver1
+dat.iris <- iris
 
-# Unload the library
-lib_unload(dat)
-# library 'dat': 3 items
-# - attributes: rds not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#      Name Extension Rows Cols   Size LastModified
-# 1 beaver1        NA  114    4 4.6 Kb         <NA>
-# 2    iris        NA  150    5 7.1 Kb         <NA>
-# 3  mtcars        NA   32   11   7 Kb         <NA>
+# Unload the library
+lib_unload(dat)
+# library 'dat': 3 items
+# - attributes: rds not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#      Name Extension Rows Cols   Size LastModified
+# 1 beaver1        NA  114    4 4.6 Kb         <NA>
+# 2    iris        NA  150    5 7.1 Kb         <NA>
+# 3  mtcars        NA   32   11   7 Kb         <NA>
 
-# Write the library to the file system
-lib_write(dat)
-# library 'dat': 3 items
-#- attributes: not loaded
-#- path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-#- items:
-#     Name Extension Rows Cols   Size        LastModified
-#1 beaver1       rds  114    4 4.8 Kb 2020-11-05 20:47:16
-#2    iris       rds  150    5 7.3 Kb 2020-11-05 20:47:16
-#3  mtcars       rds   32   11 7.3 Kb 2020-11-05 20:47:16
+# Write the library to the file system
+lib_write(dat)
+# library 'dat': 3 items
+#- attributes: not loaded
+#- path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+#- items:
+#     Name Extension Rows Cols   Size        LastModified
+#1 beaver1       rds  114    4 4.8 Kb 2020-11-05 20:47:16
+#2    iris       rds  150    5 7.3 Kb 2020-11-05 20:47:16
+#3  mtcars       rds   32   11 7.3 Kb 2020-11-05 20:47:16
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+ -
- +
- - + + diff --git a/docs/reference/libname.html b/docs/reference/libname.html index be105db..61f0186 100644 --- a/docs/reference/libname.html +++ b/docs/reference/libname.html @@ -1,83 +1,18 @@ - - - - - - - -Create a data library — libname • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a data library — libname • libr - - - - - - - - - - - - - - + + -
-
- -
- -
+
@@ -176,107 +98,117 @@

Create a data library

copy the library, and write any changed data to the file system.

-
libname(
-  name,
-  directory_path,
-  engine = "rds",
-  read_only = FALSE,
-  env = parent.frame(),
-  import_specs = NULL,
-  filter = NULL,
-  standard_eval = FALSE,
-  quiet = FALSE,
-  log = TRUE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name

The unquoted name of the library to create. The library name will +

+
libname(
+  name,
+  directory_path,
+  engine = "rds",
+  read_only = FALSE,
+  env = parent.frame(),
+  import_specs = NULL,
+  filter = NULL,
+  standard_eval = FALSE,
+  quiet = FALSE,
+  log = TRUE,
+  where = NULL
+)
+
+ +
+

Arguments

+
name
+

The unquoted name of the library to create. The library name will be created as a variable in the environment specified on the env parameter. The default environment is the parent frame. If you want to pass the library name as a quoted string or a variable, set the standard_eval parameter to TRUE to turn off the -non-standard evaluation.

directory_path

A directory path to associate with the library. If +non-standard evaluation.

+ + +
directory_path
+

A directory path to associate with the library. If the directory contains data files of the type specified on the engine parameter, they will be imported into the library list. If the directory does not contains data sets of the appropriate type, it will be created as an empty library. If the directory does not exist, -it will be created by the libname function.

engine

The engine to associate with the library. The specified +it will be created by the libname function.

+ + +
engine
+

The engine to associate with the library. The specified engine will be used to import and export data. The engine name corresponds to the standard file extension of the data file type. The default engine is 'rds'. Valid values are 'rds', 'Rdata', 'rda', 'sas7bdat', 'xpt', 'xls', 'xlsx', -'dbf', and 'csv'.

read_only

Whether the library should be created as read-only. +'dbf', and 'csv'.

+ + +
read_only
+

Whether the library should be created as read-only. Default is FALSE. If TRUE, the user will be restricted from -appending, removing, or writing any data from memory to the file system.

env

The environment to use for the libname. -Default is parent.frame(). When working inside a function, the -parent.frame() will refer to the local function scope. When - working outside a function, the parent.frame() will be the +appending, removing, or writing any data from memory to the file system.

+ + +
env
+

The environment to use for the libname. +Default is parent.frame(). When working inside a function, the +parent.frame() will refer to the local function scope. When + working outside a function, the parent.frame() will be the global environment. If the env parameter is set to a custom environment, the custom environment will be used for all subsequent - operations with that libname.

import_specs

A collection of import specifications, -defined using the specs function. + operations with that libname.

+ + +
import_specs
+

A collection of import specifications, +defined using the specs function. The import specs should be named according to the file names in -the library directory. See the specs function for additional -information.

filter

One or more quoted strings to use as filters for the incoming +the library directory. See the specs function for additional +information.

+ + +
filter
+

One or more quoted strings to use as filters for the incoming file names. For more than one filter string, pass them as a vector of strings. The filter string can be a full or partial file name, without extension. If using a partial file name, use a wild-card character (*) -to identify the missing portion. The match will be case-insensitive.

standard_eval

A TRUE or FALSE value which indicates whether to +to identify the missing portion. The match will be case-insensitive.

+ + +
standard_eval
+

A TRUE or FALSE value which indicates whether to use standard (quoted) or non-standard (unquoted) evaluation on the library name parameter. Use standard evaluation when you want to pass -the library name with a variable. Default is FALSE.

quiet

When TRUE, minimizes output to the console when loading -files. Default is FALSE.

log

Whether to log the libname operation. Default is TRUE. This -parameter is used internally.

- -

Value

- -

The library object, with all data files loaded into the library +the library name with a variable. Default is FALSE.

+ + +
quiet
+

When TRUE, minimizes output to the console when loading +files. Default is FALSE.

+ + +
log
+

Whether to log the libname operation. Default is TRUE. This +parameter is used internally.

+ + +
where
+

An expression used to subset all datasets in the library. +The where clause will be executed when the library is created. Use the +Base R expression function to define the subset. If a where clause +is supplied, the library will be opened read-only.

+ +
+
+

Value

+ + +

The library object, with all data files loaded into the library list. Items in the list will be named according the the file name, minus the file extension.

-

Details

- +
+
+

Details

For most projects, a data file does not exist in isolation. There are sets of related files of the same file type. The aim of the libname function is to take advantage of this fact, and give you an easy way to manage @@ -287,7 +219,7 @@

Details executes, it will load all the data in the directory into the list, and assign the file name (without extension) as the list item name. Data can be accessed using list syntax, or loaded directly into the local environment using the -lib_load function.

+lib_load function.

The libname function provides several data engines to read data of different types. For example, there is an engine for Excel files, and another engine for SAS® datasets. The engines are identified @@ -304,14 +236,14 @@

Details import options on the libname function. If your data does not follow standard conventions, it is recommended that you import your data using a package that gives you more control over import options.

-

Data Engines

- +
+
+

Data Engines

The libname function currently provides seven different engines for seven different types of data files. -Here is a complete list of available engines and some commentary about each:

+
+

File Filters

If you wish to import only a portion of your data files into a library, @@ -372,8 +303,9 @@

< wild-card (*) for partial matching. For example, "te*" means any file name that that begins with a "te", and "*st" means any file name that ends with an "st".

-

Import Specifications

- +

+
+

Import Specifications

In most cases, it is not necessary to specify the data types for incoming @@ -385,109 +317,109 @@

specs and +import_spec documentation for further information and examples of defining an import spec.

-

See also

- -

specs to define import specifications, -dictionary to view the data dictionary for a library, -and datastep to perform a data step.

+
+
+

See also

+

specs to define import specifications, +dictionary to view the data dictionary for a library, +and datastep to perform a data step.

Other lib: -is.lib(), -lib_add(), -lib_copy(), -lib_delete(), -lib_export(), -lib_info(), -lib_load(), -lib_path(), -lib_remove(), -lib_replace(), -lib_size(), -lib_sync(), -lib_unload(), -lib_write(), -print.lib()

- -

Examples

-
# Create temp directory
-tmp <- tempdir()
+is.lib(),
+lib_add(),
+lib_copy(),
+lib_delete(),
+lib_export(),
+lib_info(),
+lib_load(),
+lib_path(),
+lib_remove(),
+lib_replace(),
+lib_size(),
+lib_sync(),
+lib_unload(),
+lib_write(),
+print.lib()

+
+ +
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Save some data to temp directory
-# for illustration purposes
-saveRDS(trees, file.path(tmp, "trees.rds"))
-saveRDS(rock, file.path(tmp, "rocks.rds"))
-saveRDS(beaver1, file.path(tmp, "beaver1.rds"))
+# Save some data to temp directory
+# for illustration purposes
+saveRDS(trees, file.path(tmp, "trees.rds"))
+saveRDS(rock, file.path(tmp, "rocks.rds"))
+saveRDS(beaver1, file.path(tmp, "beaver1.rds"))
 
-# Create data library
-libname(dat, tmp)
-# # library 'dat': 3 items
-# - attributes: rds not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpklJcfl
-# - items:
-#      Name Extension Rows Cols   Size        LastModified
-# 1 beaver1       rds  114    4 5.9 Kb 2020-12-06 15:21:30
-# 2   rocks       rds   48    4 3.6 Kb 2020-12-06 15:21:30
-# 3   trees       rds   31    3 2.9 Kb 2020-12-06 15:21:30
+# Create data library
+libname(dat, tmp)
+# # library 'dat': 3 items
+# - attributes: rds not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpklJcfl
+# - items:
+#      Name Extension Rows Cols   Size        LastModified
+# 1 beaver1       rds  114    4 5.9 Kb 2020-12-06 15:21:30
+# 2   rocks       rds   48    4 3.6 Kb 2020-12-06 15:21:30
+# 3   trees       rds   31    3 2.9 Kb 2020-12-06 15:21:30
 
-# Print dictionary for library
-dictionary(dat)
-# A tibble: 11 x 10
-#    Name    Column Class   Label Description Format Width Justify  Rows   NAs
-#    <chr>   <chr>  <chr>   <chr> <chr>       <lgl>  <lgl> <chr>   <int> <int>
-#  1 beaver1 day    numeric NA    NA          NA     NA    NA        114     0
-#  2 beaver1 time   numeric NA    NA          NA     NA    NA        114     0
-#  3 beaver1 temp   numeric NA    NA          NA     NA    NA        114     0
-#  4 beaver1 activ  numeric NA    NA          NA     NA    NA        114     0
-#  5 rocks   area   integer NA    NA          NA     NA    NA         48     0
-#  6 rocks   peri   numeric NA    NA          NA     NA    NA         48     0
-#  7 rocks   shape  numeric NA    NA          NA     NA    NA         48     0
-#  8 rocks   perm   numeric NA    NA          NA     NA    NA         48     0
-#  9 trees   Girth  numeric NA    NA          NA     NA    NA         31     0
-# 10 trees   Height numeric NA    NA          NA     NA    NA         31     0
-# 11 trees   Volume numeric NA    NA          NA     NA    NA         31     0
+# Print dictionary for library
+dictionary(dat)
+# A tibble: 11 x 10
+#    Name    Column Class   Label Description Format Width Justify  Rows   NAs
+#    <chr>   <chr>  <chr>   <chr> <chr>       <lgl>  <lgl> <chr>   <int> <int>
+#  1 beaver1 day    numeric NA    NA          NA     NA    NA        114     0
+#  2 beaver1 time   numeric NA    NA          NA     NA    NA        114     0
+#  3 beaver1 temp   numeric NA    NA          NA     NA    NA        114     0
+#  4 beaver1 activ  numeric NA    NA          NA     NA    NA        114     0
+#  5 rocks   area   integer NA    NA          NA     NA    NA         48     0
+#  6 rocks   peri   numeric NA    NA          NA     NA    NA         48     0
+#  7 rocks   shape  numeric NA    NA          NA     NA    NA         48     0
+#  8 rocks   perm   numeric NA    NA          NA     NA    NA         48     0
+#  9 trees   Girth  numeric NA    NA          NA     NA    NA         31     0
+# 10 trees   Height numeric NA    NA          NA     NA    NA         31     0
+# 11 trees   Volume numeric NA    NA          NA     NA    NA         31     0
 
-# Load library into workspace 
-lib_load(dat)
+# Load library into workspace 
+lib_load(dat)
 
-# Print summaries for each data frame
-# Note that once loaded into the workspace, 
-# data can be accessed using two-level syntax.
-summary(dat.rocks)
-summary(dat.trees)
-summary(dat.beaver1)
+# Print summaries for each data frame
+# Note that once loaded into the workspace, 
+# data can be accessed using two-level syntax.
+summary(dat.rocks)
+summary(dat.trees)
+summary(dat.beaver1)
 
-#Unload from workspace
-lib_unload(dat)
+#Unload from workspace
+lib_unload(dat)
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+
-
- +
- - + + diff --git a/docs/reference/libr.html b/docs/reference/libr.html index fd3c047..30af98d 100644 --- a/docs/reference/libr.html +++ b/docs/reference/libr.html @@ -1,55 +1,5 @@ - - - - - - - -Libnames, Data Dictionaries and Data Steps — libr • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Libnames, Data Dictionaries and Data Steps — libr • libr - - - - - - - - - - - - - - + + -
-
- -
- -
+
@@ -197,26 +119,25 @@

Libnames, Data Dictionaries and Data Steps

an entire directory of data files. A data dictionary is a data frame full of information about a data library, data frame, or tibble. And a data step allows row-by-row processing of data.

-

The functions contained in the libr package are as follows:

    -
  • libname: Creates a data library

  • -
  • dictionary: Creates a data dictionary

  • -
  • datastep: Perform row-by-row processing of data

  • -
  • lib_load: Loads a library into the workspace

  • -
  • lib_unload: Unloads a library from the workspace

  • -
  • lib_sync: Synchronizes the workspace with the library +

    The functions contained in the libr package are as follows:

    • libname: Creates a data library

    • +
    • dictionary: Creates a data dictionary

    • +
    • datastep: Perform row-by-row processing of data

    • +
    • lib_load: Loads a library into the workspace

    • +
    • lib_unload: Unloads a library from the workspace

    • +
    • lib_sync: Synchronizes the workspace with the library list

    • -
    • lib_write: Writes library data to the file system

    • -
    • lib_add: Adds data to a library

    • -
    • lib_replace: Replaces data in a library

    • -
    • lib_remove: Removes data from a library

    • -
    • lib_copy: Copies a data library

    • -
    • lib_delete: Deletes a data library

    • -
    • lib_info: Returns a data frame of information about the +

    • lib_write: Writes library data to the file system

    • +
    • lib_add: Adds data to a library

    • +
    • lib_replace: Replaces data in a library

    • +
    • lib_remove: Removes data from a library

    • +
    • lib_copy: Copies a data library

    • +
    • lib_delete: Deletes a data library

    • +
    • lib_info: Returns a data frame of information about the library

    • -
    • lib_path: Returns the path of a data library

    • -
    • lib_size: Returns the size of the data library in bytes

    • -
    • import_spec: Defines an import spec for a specific file

    • -
    • specs: Contains all the import specs for a library

    • +
    • lib_path: Returns the path of a data library

    • +
    • lib_size: Returns the size of the data library in bytes

    • +
    • import_spec: Defines an import spec for a specific file

    • +
    • specs: Contains all the import specs for a library

    Note that the libr package is intended to be used with small and medium-sized data sets. It is not recommended for big data, as big data requires very careful control over which data is or is not loaded into memory. @@ -226,32 +147,27 @@

    Libnames, Data Dictionaries and Data Steps

    -
+
-
- +
- - + + diff --git a/docs/reference/output.html b/docs/reference/output.html index 4c3d4c2..603c6f7 100644 --- a/docs/reference/output.html +++ b/docs/reference/output.html @@ -1,81 +1,16 @@ - - - - - - - -Outputs an observation from a datastep — output • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Outputs an observation from a datastep — output • libr - - - - - - - - - - - + + - - - -
-
- -
- -
+
@@ -172,125 +94,128 @@

Outputs an observation from a datastep

in that you can output multiple rows for the same input observation.

-
output()
- - -

Value

+
+
output()
+
-

Observation is marked with a output flag. No return value.

-

See also

+
+

Value

+ -

Other datastep: -[.dsarray(), -datastep(), -delete(), -dsarray(), -dsattr(), -length.dsarray()

+

Observation is marked with a output flag. No return value.

+
+
+

See also

+

Other datastep: +[.dsarray(), +datastep(), +delete(), +dsarray(), +dsattr(), +length.dsarray()

+
-

Examples

-
#' # Example 1: Output all cars that are 4 cylinder 
-df <- datastep(mtcars, 
-               keep = c("mpg", "cyl", "disp"), {
+    
+

Examples

+
#' # Example 1: Output all cars that are 4 cylinder 
+df <- datastep(mtcars, 
+               keep = c("mpg", "cyl", "disp"), {
                  
-  if (cyl == 4)
-    output()
+  if (cyl == 4)
+    output()
                  
-})
+})
 
-df
-#     mpg cyl  disp
-# 1  22.8   4 108.0
-# 2  24.4   4 146.7
-# 3  22.8   4 140.8
-# 4  32.4   4  78.7
-# 5  30.4   4  75.7
-# 6  33.9   4  71.1
-# 7  21.5   4 120.1
-# 8  27.3   4  79.0
-# 9  26.0   4 120.3
-# 10 30.4   4  95.1
-# 11 21.4   4 121.0
+df
+#     mpg cyl  disp
+# 1  22.8   4 108.0
+# 2  24.4   4 146.7
+# 3  22.8   4 140.8
+# 4  32.4   4  78.7
+# 5  30.4   4  75.7
+# 6  33.9   4  71.1
+# 7  21.5   4 120.1
+# 8  27.3   4  79.0
+# 9  26.0   4 120.3
+# 10 30.4   4  95.1
+# 11 21.4   4 121.0
 
-# Example 2: Output two rows for each 6 cylinder car
+# Example 2: Output two rows for each 6 cylinder car
 
-# Prepare sample data
-dat <- data.frame(name = rownames(mtcars), mtcars, stringsAsFactors = FALSE)
+# Prepare sample data
+dat <- data.frame(name = rownames(mtcars), mtcars, stringsAsFactors = FALSE)
 
-# Perform datastep
-df <- datastep(dat, 
-               keep = c("name", "mpg", "cyl", "disp", "seq"), {
+# Perform datastep
+df <- datastep(dat, 
+               keep = c("name", "mpg", "cyl", "disp", "seq"), {
                  
-  if (cyl == 6) {
-    seq <- 1
-    output()
-    seq <- 2
-    output()
-  }
+  if (cyl == 6) {
+    seq <- 1
+    output()
+    seq <- 2
+    output()
+  }
                  
-})
+})
 
-df
-#              name  mpg cyl  disp seq
-# 1       Mazda RX4 21.0   6 160.0   1
-# 2       Mazda RX4 21.0   6 160.0   2
-# 3   Mazda RX4 Wag 21.0   6 160.0   1
-# 4   Mazda RX4 Wag 21.0   6 160.0   2
-# 5  Hornet 4 Drive 21.4   6 258.0   1
-# 6  Hornet 4 Drive 21.4   6 258.0   2
-# 7         Valiant 18.1   6 225.0   1
-# 8         Valiant 18.1   6 225.0   2
-# 9        Merc 280 19.2   6 167.6   1
-# 10       Merc 280 19.2   6 167.6   2
-# 11      Merc 280C 17.8   6 167.6   1
-# 12      Merc 280C 17.8   6 167.6   2
-# 13   Ferrari Dino 19.7   6 145.0   1
-# 14   Ferrari Dino 19.7   6 145.0   2
+df
+#              name  mpg cyl  disp seq
+# 1       Mazda RX4 21.0   6 160.0   1
+# 2       Mazda RX4 21.0   6 160.0   2
+# 3   Mazda RX4 Wag 21.0   6 160.0   1
+# 4   Mazda RX4 Wag 21.0   6 160.0   2
+# 5  Hornet 4 Drive 21.4   6 258.0   1
+# 6  Hornet 4 Drive 21.4   6 258.0   2
+# 7         Valiant 18.1   6 225.0   1
+# 8         Valiant 18.1   6 225.0   2
+# 9        Merc 280 19.2   6 167.6   1
+# 10       Merc 280 19.2   6 167.6   2
+# 11      Merc 280C 17.8   6 167.6   1
+# 12      Merc 280C 17.8   6 167.6   2
+# 13   Ferrari Dino 19.7   6 145.0   1
+# 14   Ferrari Dino 19.7   6 145.0   2
 
-# Example 3: Create data frame using output() functions
-df <- datastep(data.frame(), {
+# Example 3: Create data frame using output() functions
+df <- datastep(data.frame(), {
 
-  # Row 1
-  COL1 <- 1
-  COL2 <- "One"
-  output()
+  # Row 1
+  COL1 <- 1
+  COL2 <- "One"
+  output()
   
-  # Row 2
-  COL1 <- 2
-  COL2 <- "Two"
-  output()
+  # Row 2
+  COL1 <- 2
+  COL2 <- "Two"
+  output()
 
-})
+})
 
-df
-#   COL1 COL2
-# 1    1  One
-# 2    2  Two
+df +# COL1 COL2 +# 1 1 One +# 2 2 Two
+
+ -
- +
- - + + diff --git a/docs/reference/print.lib.html b/docs/reference/print.lib.html index c61f36f..01fd4e4 100644 --- a/docs/reference/print.lib.html +++ b/docs/reference/print.lib.html @@ -1,79 +1,14 @@ - - - - - - - -Print a data library — print.lib • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Print a data library — print.lib • libr - - - - - - - - - - - + + - - - -
-
- -
- -
+
@@ -168,97 +90,98 @@

Print a data library

Use verbose = TRUE to print the library as a list.

-
# S3 method for lib
-print(x, ..., verbose = FALSE)
+
+
# S3 method for lib
+print(x, ..., verbose = FALSE)
+
+ +
+

Arguments

+
x
+

The library to print.

+ + +
...
+

Any follow-on parameters.

-

Arguments

- - - - - - - - - - - - - - -
x

The library to print.

...

Any follow-on parameters.

verbose

Whether or not to print the library in verbose style. -By default, the parameter is FALSE, meaning to print in summary style.

-

Value

+
verbose
+

Whether or not to print the library in verbose style. +By default, the parameter is FALSE, meaning to print in summary style.

-

The object, invisibly.

-

See also

+
+
+

Value

+ - +

The object, invisibly.

+
+
+

See also

+ +
-

Examples

-
# Create temp directory
-tmp <- tempdir()
+    
+

Examples

+
# Create temp directory
+tmp <- tempdir()
 
-# Create data library
-libname(dat, tmp)
+# Create data library
+libname(dat, tmp)
 
-# Add data to library
-lib_add(dat, iris, ToothGrowth, PlantGrowth)
+# Add data to library
+lib_add(dat, iris, ToothGrowth, PlantGrowth)
 
-# Print library summary 
-print(dat)
-# library 'dat': 3 items
-# - attributes: not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
-# - items:
-#          Name Extension Rows Cols   Size        LastModified
-# 1        iris       rds  150    5 7.8 Kb 2020-11-05 22:26:59
-# 2 PlantGrowth       rds   30    2 2.5 Kb 2020-11-05 22:26:59
-# 3 ToothGrowth       rds   60    3 3.4 Kb 2020-11-05 22:26:59
+# Print library summary 
+print(dat)
+# library 'dat': 3 items
+# - attributes: not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpCSJ6Gc
+# - items:
+#          Name Extension Rows Cols   Size        LastModified
+# 1        iris       rds  150    5 7.8 Kb 2020-11-05 22:26:59
+# 2 PlantGrowth       rds   30    2 2.5 Kb 2020-11-05 22:26:59
+# 3 ToothGrowth       rds   60    3 3.4 Kb 2020-11-05 22:26:59
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+ -
- +
- - + + diff --git a/docs/reference/print.specs.html b/docs/reference/print.specs.html index f616967..5e95b85 100644 --- a/docs/reference/print.specs.html +++ b/docs/reference/print.specs.html @@ -1,77 +1,12 @@ - - - - - - - -Print import specifications — print.specs • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Print import specifications — print.specs • libr - - - - + + -
-
- -
- -
+
@@ -164,63 +86,62 @@

Print import specifications

A function to print the import specification collection.

-
# S3 method for specs
-print(x, ..., verbose = FALSE)
- -

Arguments

- - - - - - - - - - - - - - -
x

The specifications to print.

...

Any follow-on parameters to the print function.

verbose

Whether or not to print the specifications in verbose style. -By default, the parameter is FALSE, meaning to print in summary style.

- -

Value

- -

The specification object, invisibly.

-

See also

- -

Other specs: -import_spec(), -read.specs(), -specs(), -write.specs()

+
+
# S3 method for specs
+print(x, ..., verbose = FALSE)
+
+ +
+

Arguments

+
x
+

The specifications to print.

+ + +
...
+

Any follow-on parameters to the print function.

+ + +
verbose
+

Whether or not to print the specifications in verbose style. +By default, the parameter is FALSE, meaning to print in summary style.

+ +
+
+

Value

+ + +

The specification object, invisibly.

+
+
+

See also

+

Other specs: +import_spec(), +read.specs(), +specs(), +write.specs()

+
+
-
- +
- - + + diff --git a/docs/reference/read.specs.html b/docs/reference/read.specs.html index a948eb0..0b0faf8 100644 --- a/docs/reference/read.specs.html +++ b/docs/reference/read.specs.html @@ -1,81 +1,16 @@ - - - - - - - -Read import specs from the file system — read.specs • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Read import specs from the file system — read.specs • libr - - - - - - - - - - + + - - - - -
-
- -
- -
+
@@ -172,59 +94,58 @@

Read import specs from the file system

extension and read it.

-
read.specs(file_path = getwd())
+
+
read.specs(file_path = getwd())
+
-

Arguments

- - - - - - -
file_path

The full or relative path to the file system. Default is +

+

Arguments

+
file_path
+

The full or relative path to the file system. Default is the current working directory. If the file_path is a file name that does not contain the '.specs' file extension, the function will add the extension. If the file_path contains a directory name, the function will search the directory for a file with an extension of '.specs'. If more than one file with an extension of '.specs' is founds, -the function will generate an error.

- -

Value

+the function will generate an error.

-

The specifications object.

-

See also

+
+
+

Value

+ -

Other specs: -import_spec(), -print.specs(), -specs(), -write.specs()

+

The specifications object.

+
+
+

See also

+

Other specs: +import_spec(), +print.specs(), +specs(), +write.specs()

+
+
-
- +
- - + + diff --git a/docs/reference/specs.html b/docs/reference/specs.html index eb75793..caab7f2 100644 --- a/docs/reference/specs.html +++ b/docs/reference/specs.html @@ -1,55 +1,5 @@ - - - - - - - -Create an Import Spec Collection — specs • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create an Import Spec Collection — specs • libr - - - - - - - - - - - - + + - - -
-
- -
- -
+

A function to capture a set of import specifications for a directory of data files. These specs can be used on the -libname function to correctly assign the data types for +libname function to correctly assign the data types for imported data files. The import engines will guess at the data types for any columns that are not explicitly defined in the import specifications. Import -specifications are defined with the import_spec function. +specifications are defined with the import_spec function. The import spec syntax is the same for all data engines.

Note that the na and trim_ws parameters on the specs function will be applied globally to all files in the library. -These global settings can be overridden on the import_spec +These global settings can be overridden on the import_spec for any particular data file.

Also note that the specs collection is defined as an object so it can be stored and reused. -See the write.specs and read.specs functions +See the write.specs and read.specs functions for additional information on saving specs.

-
specs(..., na = c("", "NA"), trim_ws = TRUE)
+
+
specs(..., na = c("", "NA"), trim_ws = TRUE)
+
-

Arguments

- - - - - - - - - - - - - - -
...

Named input specs. The name should correspond to the file name, +

+

Arguments

+
...
+

Named input specs. The name should correspond to the file name, without the file extension. The spec is defined as an import_spec object. See the -import_spec function for additional information on -parameters for that object.

na

A vector of values to be treated as NA. For example, the +import_spec function for additional information on +parameters for that object.

+ + +
na
+

A vector of values to be treated as NA. For example, the vector c('', ' ') will cause empty strings and single blanks to be converted to NA values. For most file types, empty strings and the string 'NA' ('', 'NA') are considered NA. For SAS® datasets and transport files, a single blank and a single dot c(" ", ".") are considered NA. The value of the na parameter on the specs function can be overridden -by the na parameter on the import_spec function.

trim_ws

Whether or not to trim white space from the input data values. +by the na parameter on the import_spec function.

+ + +
trim_ws
+

Whether or not to trim white space from the input data values. Valid values are TRUE, and FALSE. Default is TRUE. The value of the trim_ws parameter on the specs function can be overridden -by the trim_ws parameter on the import_spec function.

- -

Value

+by the trim_ws parameter on the import_spec function.

-

The import specifications object.

-

See also

+
+
+

Value

+ -

libname to create a data library, -dictionary for generating a data dictionary, and -import_spec for additional information on defining an +

The import specifications object.

+
+
+

See also

+

libname to create a data library, +dictionary for generating a data dictionary, and +import_spec for additional information on defining an import spec.

Other specs: -import_spec(), -print.specs(), -read.specs(), -write.specs()

+import_spec(), +print.specs(), +read.specs(), +write.specs()

+
-

Examples

-
library(readr)
+    
+

Examples

+
library(readr)
 
-# Create temp path
-tmp <- file.path(tempdir(), "mtcars.csv")
+# Create temp path
+tmp <- file.path(tempdir(), "mtcars.csv")
 
-# Create data for illustration purposes
-df <- data.frame(vehicle = rownames(mtcars), mtcars[c("mpg", "cyl", "disp")],
-                 stringsAsFactors = FALSE)
+# Create data for illustration purposes
+df <- data.frame(vehicle = rownames(mtcars), mtcars[c("mpg", "cyl", "disp")],
+                 stringsAsFactors = FALSE)
 
-# Kill rownames
-rownames(df) <- NULL
+# Kill rownames
+rownames(df) <- NULL
 
-# Add some columns
-df <- datastep(df[1:10, ], {
+# Add some columns
+df <- datastep(df[1:10, ], {
 
-        recdt <- "10JUN1974"
+        recdt <- "10JUN1974"
 
-        if (mpg >= 20)
-          mpgcat <- "High"
-        else 
-          mpgcat <- "Low"
+        if (mpg >= 20)
+          mpgcat <- "High"
+        else 
+          mpgcat <- "Low"
       
-        if (cyl == 8)
-          cyl8 <- TRUE
-  })
+        if (cyl == 8)
+          cyl8 <- TRUE
+  })
   
-df
-#              vehicle  mpg cyl  disp     recdt mpgcat cyl8
-# 1          Mazda RX4 21.0   6 160.0 10JUN1974   High   NA
-# 2      Mazda RX4 Wag 21.0   6 160.0 10JUN1974   High   NA
-# 3         Datsun 710 22.8   4 108.0 10JUN1974   High   NA
-# 4     Hornet 4 Drive 21.4   6 258.0 10JUN1974   High   NA
-# 5  Hornet Sportabout 18.7   8 360.0 10JUN1974    Low TRUE
-# 6            Valiant 18.1   6 225.0 10JUN1974    Low   NA
-# 7         Duster 360 14.3   8 360.0 10JUN1974    Low TRUE
-# 8          Merc 240D 24.4   4 146.7 10JUN1974   High   NA
-# 9           Merc 230 22.8   4 140.8 10JUN1974   High   NA
-# 10          Merc 280 19.2   6 167.6 10JUN1974    Low   NA
+df
+#              vehicle  mpg cyl  disp     recdt mpgcat cyl8
+# 1          Mazda RX4 21.0   6 160.0 10JUN1974   High   NA
+# 2      Mazda RX4 Wag 21.0   6 160.0 10JUN1974   High   NA
+# 3         Datsun 710 22.8   4 108.0 10JUN1974   High   NA
+# 4     Hornet 4 Drive 21.4   6 258.0 10JUN1974   High   NA
+# 5  Hornet Sportabout 18.7   8 360.0 10JUN1974    Low TRUE
+# 6            Valiant 18.1   6 225.0 10JUN1974    Low   NA
+# 7         Duster 360 14.3   8 360.0 10JUN1974    Low TRUE
+# 8          Merc 240D 24.4   4 146.7 10JUN1974   High   NA
+# 9           Merc 230 22.8   4 140.8 10JUN1974   High   NA
+# 10          Merc 280 19.2   6 167.6 10JUN1974    Low   NA
 
-# Save to temp directory for this example
-write_csv(df, tmp)
+# Save to temp directory for this example
+write_csv(df, tmp)
 
-## Start Example ##
+## Start Example ##
 
-# Define import spec
-spcs <- specs(mtcars = import_spec(vehicle = "character",
-                                   cyl = "integer",
-                                   recdt = "date=%d%b%Y",
-                                   mpgcat = "guess",
-                                   cyl8 = "logical"))
+# Define import spec
+spcs <- specs(mtcars = import_spec(vehicle = "character",
+                                   cyl = "integer",
+                                   recdt = "date=%d%b%Y",
+                                   mpgcat = "guess",
+                                   cyl8 = "logical"))
                                    
-# Create library
-libname(dat, tempdir(), "csv", import_specs = spcs)
-# $mtcars
-# library 'dat': 1 items
-# - attributes: csv not loaded
-# - path: C:\Users\User\AppData\Local\Temp\RtmpqAMV6L
-# - items:
-#     Name Extension Rows Cols   Size        LastModified
-# 1 mtcars       csv   10    7 9.3 Kb 2020-11-29 09:47:52
+# Create library
+libname(dat, tempdir(), "csv", import_specs = spcs)
+# $mtcars
+# library 'dat': 1 items
+# - attributes: csv not loaded
+# - path: C:\Users\User\AppData\Local\Temp\RtmpqAMV6L
+# - items:
+#     Name Extension Rows Cols   Size        LastModified
+# 1 mtcars       csv   10    7 9.3 Kb 2020-11-29 09:47:52
 
-# View data types
-dictionary(dat)
-# # A tibble: 7 x 10
-#   Name   Column  Class     Label Description Format Width Justify  Rows   NAs
-#   <chr>  <chr>   <chr>     <chr> <chr>       <lgl>  <int> <chr>   <int> <int>
-# 1 mtcars vehicle character NA    NA          NA        17 NA         10     0
-# 2 mtcars mpg     numeric   NA    NA          NA        NA NA         10     0
-# 3 mtcars cyl     integer   NA    NA          NA        NA NA         10     0
-# 4 mtcars disp    numeric   NA    NA          NA        NA NA         10     0
-# 5 mtcars mpgcat  character NA    NA          NA         4 NA         10     0
-# 6 mtcars recdt   Date      NA    NA          NA        NA NA         10     0
-# 7 mtcars cyl8    logical   NA    NA          NA        NA NA         10     8
+# View data types
+dictionary(dat)
+# # A tibble: 7 x 10
+#   Name   Column  Class     Label Description Format Width Justify  Rows   NAs
+#   <chr>  <chr>   <chr>     <chr> <chr>       <lgl>  <int> <chr>   <int> <int>
+# 1 mtcars vehicle character NA    NA          NA        17 NA         10     0
+# 2 mtcars mpg     numeric   NA    NA          NA        NA NA         10     0
+# 3 mtcars cyl     integer   NA    NA          NA        NA NA         10     0
+# 4 mtcars disp    numeric   NA    NA          NA        NA NA         10     0
+# 5 mtcars mpgcat  character NA    NA          NA         4 NA         10     0
+# 6 mtcars recdt   Date      NA    NA          NA        NA NA         10     0
+# 7 mtcars cyl8    logical   NA    NA          NA        NA NA         10     8
 
-# Clean up
-lib_delete(dat)
+# Clean up +lib_delete(dat)
+
+ -
- +
- - + + diff --git a/docs/reference/sub-.dsarray.html b/docs/reference/sub-.dsarray.html index 2e0e1e5..71e0d64 100644 --- a/docs/reference/sub-.dsarray.html +++ b/docs/reference/sub-.dsarray.html @@ -1,82 +1,17 @@ - - - - - - - -Indexer for Data Step Array — [.dsarray • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Indexer for Data Step Array — [.dsarray • libr - - - - - - - - - - - + + - - - -
-
- -
- -
+
@@ -171,114 +93,115 @@

Indexer for Data Step Array

access all columns, leave the indexer empty. Otherwise, specify the the column name(s) or number(s) to return data for. The indexer will always act upon the current row in the datastep. -For additional details, see the dsarray function.

+For additional details, see the dsarray function.

+
+ +
+
# S3 method for dsarray
+[(x, i = NULL)
-
# S3 method for dsarray
-[(x, i = NULL)
+
+

Arguments

+
x
+

The dsarray object.

-

Arguments

- - - - - - - - - - -
x

The dsarray object.

i

The index of the datastep array item to return a value for. + +

i
+

The index of the datastep array item to return a value for. This index can be a column name or position in the array. It can also be a vector of column names or positions. If no index -is supplied, a vector of all array values will be returned.

+is supplied, a vector of all array values will be returned.

-

Value

+
+
+

Value

+ -

The value of the specified column for the current row in the +

The value of the specified column for the current row in the datastep. If no index is supplied, a vector of all column values will be returned.

-

See also

- -

Other datastep: -datastep(), -delete(), -dsarray(), -dsattr(), -length.dsarray(), -output()

+
+
+

See also

+

Other datastep: +datastep(), +delete(), +dsarray(), +dsattr(), +length.dsarray(), +output()

+
-

Examples

-
library(libr)
+    
+

Examples

+
library(libr)
 
-# Create AirPassengers Data Frame
-df <- as.data.frame(t(matrix(AirPassengers, 12, 
-                    dimnames = list(month.abb, seq(1949, 1960)))), 
-                    stringsAsFactors = FALSE)
+# Create AirPassengers Data Frame
+df <- as.data.frame(t(matrix(AirPassengers, 12, 
+                    dimnames = list(month.abb, seq(1949, 1960)))), 
+                    stringsAsFactors = FALSE)
 
-# Use datastep array to get sums by quarter
-# Examine different ways of referencing data inside datastep
-dat <- datastep(df,
-                keep = c("Q1", "Q2", "Q3", "Q4", "Tot"),
-                arrays = list(months = dsarray(names(df))),
-                {
+# Use datastep array to get sums by quarter
+# Examine different ways of referencing data inside datastep
+dat <- datastep(df,
+                keep = c("Q1", "Q2", "Q3", "Q4", "Tot"),
+                arrays = list(months = dsarray(names(df))),
+                {
                 
-                   # Reference by column name
-                   Q1 <- Jan + Feb + Mar
+                   # Reference by column name
+                   Q1 <- Jan + Feb + Mar
                    
-                   # Reference by array positions
-                   Q2 <- sum(months[4:6])
+                   # Reference by array positions
+                   Q2 <- sum(months[4:6])
                    
-                   # Reference by array names
-                   Q3 <- sum(months[c("Jul", "Aug", "Sep")])
+                   # Reference by array names
+                   Q3 <- sum(months[c("Jul", "Aug", "Sep")])
                    
-                   # Reference by row position
-                   Q4 <- rw$Oct + rw[["Nov"]] + rw[[12]]
+                   # Reference by row position
+                   Q4 <- rw$Oct + rw[["Nov"]] + rw[[12]]
                    
-                   # Empty indexer returns all column values in array
-                   Tot <- sum(months[])
+                   # Empty indexer returns all column values in array
+                   Tot <- sum(months[])
                   
-                })
+                })
 
-dat
-#        Q1   Q2   Q3   Q4  Tot
-# 1949  362  385  432  341 1520
-# 1950  382  409  498  387 1676
-# 1951  473  513  582  474 2042
-# 1952  544  582  681  557 2364
-# 1953  628  707  773  592 2700
-# 1954  627  725  854  661 2867
-# 1955  742  854 1023  789 3408
-# 1956  878 1005 1173  883 3939
-# 1957  972 1125 1336  988 4421
-# 1958 1020 1146 1400 1006 4572
-# 1959 1108 1288 1570 1174 5140
-# 1960 1227 1468 1736 1283 5714
+dat +# Q1 Q2 Q3 Q4 Tot +# 1949 362 385 432 341 1520 +# 1950 382 409 498 387 1676 +# 1951 473 513 582 474 2042 +# 1952 544 582 681 557 2364 +# 1953 628 707 773 592 2700 +# 1954 627 725 854 661 2867 +# 1955 742 854 1023 789 3408 +# 1956 878 1005 1173 883 3939 +# 1957 972 1125 1336 988 4421 +# 1958 1020 1146 1400 1006 4572 +# 1959 1108 1288 1570 1174 5140 +# 1960 1227 1468 1736 1283 5714
+
+ -
- +
- - + + diff --git a/docs/reference/write.specs.html b/docs/reference/write.specs.html index 74570f5..38ec89a 100644 --- a/docs/reference/write.specs.html +++ b/docs/reference/write.specs.html @@ -1,81 +1,16 @@ - - - - - - - -Write import specs to the file system — write.specs • libr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Write import specs to the file system — write.specs • libr - - - - - - - - - - - - + + - - -
-
- -
- -
+
@@ -172,64 +94,63 @@

Write import specs to the file system

in multiple programs or across multiple teams.

-
write.specs(x, dir_path = getwd(), file_name = NULL)
- -

Arguments

- - - - - - - - - - - - - - -
x

A specifications object of class 'specs'.

dir_path

A full or relative path to save the specs. Default is the -current working directory.

file_name

The file name to save to specs, without a file extension. -The file extension will be added automatically. If no file name is -supplied, the function will use the variable name as the file name.

+
+
write.specs(x, dir_path = getwd(), file_name = NULL)
+
+ +
+

Arguments

+
x
+

A specifications object of class 'specs'.

-

Value

-

The full file path.

-

See also

+
dir_path
+

A full or relative path to save the specs. Default is the +current working directory.

+ + +
file_name
+

The file name to save to specs, without a file extension. +The file extension will be added automatically. If no file name is +supplied, the function will use the variable name as the file name.

-

Other specs: -import_spec(), -print.specs(), -read.specs(), -specs()

+
+
+

Value

+ + +

The full file path.

+
+
+

See also

+

Other specs: +import_spec(), +print.specs(), +read.specs(), +specs()

+
+
-
- +
- - + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 39c5c55..ba6cf0e 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -1,8 +1,44 @@ + + https://libr.r-sassy.org/404.html + + + https://libr.r-sassy.org/articles/index.html + + + https://libr.r-sassy.org/articles/libr-basics.html + + + https://libr.r-sassy.org/articles/libr-datastep.html + + + https://libr.r-sassy.org/articles/libr-disclaimer.html + + + https://libr.r-sassy.org/articles/libr-example1.html + + + https://libr.r-sassy.org/articles/libr-example2.html + + + https://libr.r-sassy.org/articles/libr-faq.html + + + https://libr.r-sassy.org/articles/libr-management.html + + + https://libr.r-sassy.org/articles/libr.html + + + https://libr.r-sassy.org/authors.html + https://libr.r-sassy.org/index.html + + https://libr.r-sassy.org/news/index.html + https://libr.r-sassy.org/reference/datastep.html @@ -21,6 +57,9 @@ https://libr.r-sassy.org/reference/import_spec.html + + https://libr.r-sassy.org/reference/index.html + https://libr.r-sassy.org/reference/is.lib.html @@ -93,28 +132,4 @@ https://libr.r-sassy.org/reference/write.specs.html - - https://libr.r-sassy.org/articles/libr-basics.html - - - https://libr.r-sassy.org/articles/libr-datastep.html - - - https://libr.r-sassy.org/articles/libr-disclaimer.html - - - https://libr.r-sassy.org/articles/libr-example1.html - - - https://libr.r-sassy.org/articles/libr-example2.html - - - https://libr.r-sassy.org/articles/libr-faq.html - - - https://libr.r-sassy.org/articles/libr-management.html - - - https://libr.r-sassy.org/articles/libr.html - diff --git a/man/datastep.Rd b/man/datastep.Rd index 726305f..457265c 100644 --- a/man/datastep.Rd +++ b/man/datastep.Rd @@ -49,7 +49,9 @@ retain their original names.} \item{by}{A vector of quoted variable names to use for by-group processing. This parameter will activate the \code{first.} and \code{last.} automatic variables, that indicate the first or last rows in a group. These -automatic variables are useful for conditional processing on groups.} +automatic variables are useful for conditional processing on groups. +The function will also create first and last automatic variables for each +variable specified in the by group.} \item{calculate}{Steps to set up calculated variables. Calculated variables are commonly generated with summary functions such as @@ -112,8 +114,9 @@ values will be filled with NA.} \item{merge}{A dataset or list of datasets to merge with the input data. The merge operation will occur at the beginning of the datastep, prior to the execution of any steps. When the \code{merge} operation is -requested, the \code{by} parameter will be used to indicate which variable(s) -to merge by.} +requested, the \code{merge_by} parameter will be used to indicate which variable(s) +to merge by. If no \code{merge_by} is specified, the merge dataset columns will +simply be appended to the right of the input dataset.} \item{merge_by}{If the \code{merge} parameter is set, the \code{merge_by} parameter will be used to identify the variable(s) to merge by. If merge @@ -174,6 +177,13 @@ be accessed at any point within the data step: Automatic variables will be dropped from the data frame at the end of the data step. If you wish to keep the automatic variable values, assign the automatic variable to a new variable and keep that variable. + +If there are multiple by group variables, the \code{first.} and \code{last.} +automatic variables indicates a either/or combination of all by variables. +In addition, +\code{first.X} and \code{last.X} automatic variables will be created for +each variable, where "X" represents the name of the specified variable. +As always, these names are case-sensitive. } \section{Column Attributes}{ diff --git a/man/lib_write.Rd b/man/lib_write.Rd index 3b7f602..072b5a0 100644 --- a/man/lib_write.Rd +++ b/man/lib_write.Rd @@ -29,6 +29,8 @@ data in memory to the data on disk. If there are differences in the data, the function will overwrite the version on disk. To override the default behavior, use the \code{force} option to force \code{lib_write} to write every data file to disk. + +Note that writing sas7bdat files to disk is not supported. } \examples{ # Create temp directory diff --git a/man/libname.Rd b/man/libname.Rd index 5ad7728..48465ef 100644 --- a/man/libname.Rd +++ b/man/libname.Rd @@ -15,7 +15,8 @@ libname( filter = NULL, standard_eval = FALSE, quiet = FALSE, - log = TRUE + log = TRUE, + where = NULL ) } \arguments{ @@ -74,6 +75,11 @@ files. Default is FALSE.} \item{log}{Whether to log the libname operation. Default is TRUE. This parameter is used internally.} + +\item{where}{An expression used to subset all datasets in the library. +The where clause will be executed when the library is created. Use the +Base R \code{\link{expression}} function to define the subset. If a where clause +is supplied, the library will be opened read-only.} } \value{ The library object, with all data files loaded into the library @@ -162,7 +168,7 @@ type information than either csv or Excel. In most cases, you will not need to define import specifications for SAS® datasets. The sas7bdat engine interprets empty strings, single blanks, and a single dot (".") as missing values. While the import of SAS® datasets is fairly reliable, sas7bdat files -exported with the sas7bdat engine sometimes cannot be read by SAS® software. +cannot be written or exported with the sas7bdat engine. In these cases, it is recommended to export to another file format, such as csv or dbf, and then import into SAS®.} \item{\strong{xpt}: The SAS® transport file engine. Transport format is diff --git a/man/libr.Rd b/man/libr.Rd index 3fc7498..903d960 100644 --- a/man/libr.Rd +++ b/man/libr.Rd @@ -3,6 +3,7 @@ \docType{package} \name{libr} \alias{libr} +\alias{libr-package} \title{Libnames, Data Dictionaries and Data Steps} \description{ The \strong{libr} package brings the concepts of data libraries, data @@ -40,3 +41,4 @@ requires very careful control over which data is or is not loaded into memory. The \strong{libr} package, on the other hand, tends to load all data into memory indiscriminately. } +\keyword{internal} diff --git a/tests/testthat/data/demo_studya.dbf b/tests/testthat/data/demo_studya.dbf new file mode 100644 index 0000000000000000000000000000000000000000..850c9a9f7a89d5acd2ea89e4e32d373fc7c257f4 GIT binary patch literal 1621 zcmdT^OHYI_5MHy1##c1)=4pO`DX>5}HYVOQn|SbOSg5gy%NnReul{mNp_>j=WV0tH zfp(sJ{iehG`RMv&Hk+}7Z}lEZB|q}7)_mJ6hrJL{)W-7AY_86X+OF@Z6ugj)()us3 zzZR9y-e3GxR&rkCspMV$DbBC>r!i=JbE9=@nP$A(YX4==pKn&i;>0gYp31d8fzCfR z(cO9uj}1c%z$1j*sAnt6gmmQqz#-b?Q9oMPk@o;kuR?OLB@Tkn4kF7pb+pw+#f%{E zQOX#Cz>d5FU@xV}(gN6;w;PCkip#kWc=&(-sBXi}WDZy*)c0+nB*BzULV&p434x;v zG{og5A;66r;{dVc?!toNax9ag)^}y(9>jKc_vIiW6qWlY2r%gVx)4!6`ICdeEZ~$e i1n82f|AAu`LJv=)s|$SuJ$)Ah!&yj(hpsExBKQGQRVt_e literal 0 HcmV?d00001 diff --git a/tests/testthat/data/demo_studya.xpt b/tests/testthat/data/demo_studya.xpt new file mode 100644 index 0000000000000000000000000000000000000000..8f7a84fd6db25e8b05c0f639502260266a787480 GIT binary patch literal 2720 zcmcJQ&u`N(6vy5D>ULln7Y=9}o`yi;FtuTHPzg4-F54u+s;1i5X-eB{i$v2Y2^HM- zCvfM&jT4;t^Kf3`#&+y>AmKybd$!*v-}me$M=e5I9o%U(PdfBjI^G>OPG4e-kHodD zbX2Aej2+^zOvy4F#a$`RW6tb=#raT?gXZv$^Qx`^UWgDwP5^ngtl6IfT#>DnY4Chm(|i2R)CoJuDlB8grfrg%)|HU`xzj zMst2JhP2~g>_xGE<;J5BZ<|OMH(E|NSp^bfNHMF9^GpP^Drn$ts%8GhjeReOaXYh9 zWB^`zc2UbJ=NYxEZ~=l%wJePN$d6g#DcD&`X7=R)=NYw>1r6LywJb*c(DQ<3o{*!j6_Q;IQQ7(3lMB_cS~p~@;*b3lUXf5m{jko5opP&cF;(>Nr$!4uqpvC@tEg~BTL)7p-wl+)8EwB1 JDft!1y+7RIwYvZS literal 0 HcmV?d00001 diff --git a/tests/testthat/data/demo_studyb.dbf b/tests/testthat/data/demo_studyb.dbf new file mode 100644 index 0000000000000000000000000000000000000000..94a309ec0bed5a4a18dcb0b86153b9b9ba0f9246 GIT binary patch literal 581 zcma)2%L>9U5UdsyQBm;bX}>`Fuu_jz@TLeJJWATG7_hCRvFOz=w@n){zHlMg-I>`i zS$)?W)-_G@YPbA;kCdOoYYESb)OeB5IFrb#CPw^rF;ZK)cl^F@JDh1zv;lsq2$VdO2A>onuftYVeb(OoYDK<0%unfZ( z<>wIqrqbsGfGQD?>Ds2ETmozllp{Qc<2qyIIWOQe6Ph0mEM??x2MYji&uBnm@FFY= PnAlU*{rZ?M*G~ZM9hFfI literal 0 HcmV?d00001 diff --git a/tests/testthat/data/demo_studyb.xpt b/tests/testthat/data/demo_studyb.xpt new file mode 100644 index 0000000000000000000000000000000000000000..cc7d27271795feeef073745c26071aa7d14b587b GIT binary patch literal 2160 zcmbW2&u`N(6vy5D>ULlnmx&WH0)fO~YQxk)FDR)4n?zWZR6{$VwDC4eG(m}};J}&x zl0Snpj-993uG1EL=zGu3`{etc9rrx+Nf_fe99_obr5YbDf~(gU<8yuOi5k6?1LMR` zM5bW{4&%W|Z&1%qtL*f|t3CJd<hdUJX5O*zXPdo*lchHlHf8sCo)$W%aJ7NDYY1Guz z`_Ye*we`L+3T*<6(H25z4|QazLPt`Uyn&u)%`+lf4m$>38w%}0re-_B-$z?4U&yqJ zd_gPDZc|>A_-Pvn;~-2X^{ONZh7pVEfY(Mqhnfa(m6q+VDQ7h2c-+_(G9=!3_p+8l z;I(Qw00Ej+T6Pw!Vq7F%NxM$T+P)kEuT@J|(*Um0vRlncO7kowDW|x&!HJAne%k`O zzK$Y7FqRm>Fk*xr15ePL13d%*nyt@n2klk#yUa-%>jY?%_F1*!=fKla3>R%{8o;f! z9HDNR&SV8~V~s>dV7(hz@eANNQsord0Rfs-d$gYw>h6L)3RubS+CXC7*puZwIs;xC z0zJ?)fLr&aiw<~6Q$DBpq_OomUNx*f*nN2myfzMUd`GVLx$e`;Ao>6Q-}?RxMn6^g z{t>i6S@QmubfQmp%sA=f__ti}XPhmlx`pp4SY^X6P~Q1BC7Y!=_UBYO2?+^l35iK532CfGk`d0%cS>|6Bxd|@5aQF~NqCUJwDhgSf`(R3 u4+EnO1_ezfPgS2S6JwTS61aSz$%nD6V4C2ZR34T8j0{~%+H}i+rUC#`HXdaF literal 0 HcmV?d00001 diff --git a/tests/testthat/test-datastep.R b/tests/testthat/test-datastep.R index bca6fbf..ee34f4d 100644 --- a/tests/testthat/test-datastep.R +++ b/tests/testthat/test-datastep.R @@ -1624,3 +1624,80 @@ test_that("ds44: Merge works with factors.", { }) + +test_that("ds45: Output function works as expected when the names have spaces.", { + + + dat <- mtcars[ , c("mpg", "cyl")] + + rownames(dat) <- NULL + names(dat) <- c("miles per gallon", "cylinders") + + + d1 <- datastep(dat, + { + + if (cylinders == 4) { + output() + #fork <- 1 + } + + }) + + d1 + + expect_equal(nrow(d1), 11) + expect_equal(ncol(d1), 2) + +}) + +test_that("ds46: Skip loop when there is no code.", { + + + dat <- mtcars[ , c("mpg", "cyl")] + + + + d1 <- datastep(dat, where = expression(cyl == 4), + {}) + + d1 + + expect_equal(nrow(d1), 11) + expect_equal(ncol(d1), 2) + +}) + + +test_that("ds47: Multiple group bys works as expected.", { + + libname(dat, base_path, "rds") + + lib_load(dat) + + + dt <- sort(dat.demo_studya, by = c("treatment", "sex")) + + d1 <- datastep(dt, by = c("treatment", "sex"), { + + f1 <- first. + l1 <- last. + + f2 <- first.treatment + l2 <- last.treatment + + f3 <- first.sex + l3 <- last.sex + + }, sort_check = TRUE) + + d1 + + expect_equal(sum(d1$f1), 4) + expect_equal(sum(d1$l1), 4) + expect_equal(sum(d1$f2), 2) + expect_equal(sum(d1$l2), 2) + expect_equal(sum(d1$f2), 2) + expect_equal(sum(d1$l2), 2) + +}) diff --git a/tests/testthat/test-libname.R b/tests/testthat/test-libname.R index ed82152..cad258d 100644 --- a/tests/testthat/test-libname.R +++ b/tests/testthat/test-libname.R @@ -337,7 +337,7 @@ test_that("lib18: libname() standard_eval parameter works as expected with all o libname(myvar1, file.path( base_path, "SDTM"), - engine = "sas7bdat", filter = c("ae", "dm", "lb", "vs"), + engine = "csv", filter = c("ae", "dm", "lb", "vs"), standard_eval = TRUE) @@ -349,11 +349,11 @@ test_that("lib18: libname() standard_eval parameter works as expected with all o lib_load(myvar1) - expect_equal("bork1.ae" %in% ls(), TRUE) + expect_equal("bork1.AE" %in% ls(), TRUE) lib_unload(myvar1) - expect_equal("bork1.ae" %in% ls(), FALSE) + expect_equal("bork1.AE" %in% ls(), FALSE) lib_copy(myvar1, myvar2, paste0(base_path, "2"), standard_eval = TRUE) @@ -363,7 +363,7 @@ test_that("lib18: libname() standard_eval parameter works as expected with all o lib_load(myvar2) - expect_equal("bork2.ae" %in% ls(), TRUE) + expect_equal("bork2.AE" %in% ls(), TRUE) bork2.mtcars <- mtcars @@ -385,7 +385,7 @@ test_that("lib18: libname() standard_eval parameter works as expected with all o lib_unload(myvar2) - expect_equal("bork2.ae" %in% ls(), FALSE) + expect_equal("bork2.AE" %in% ls(), FALSE) lib_add(myvar2, mtcars) @@ -530,7 +530,7 @@ test_that("lib23: libname works with rda files", { test_that("lib24: lib_export() creates new library.", { - libname(dat, base_path, "xpt") + libname(dat, base_path, "xpt", filter = "AD*") pth2 <- paste0(base_path, "2") @@ -545,3 +545,33 @@ test_that("lib24: lib_export() creates new library.", { }) + +test_that("lib25: libname() function works as expected with rds", { + + libname(dat, base_path, engine = "rds", where = expression(sex == 'F')) + + # dat$demo_studya + # dat$demo_studyb + + expect_equal(class(dat)[[1]], "lib") + expect_equal(length(dat), 2) + expect_equal(nrow(dat[[1]]), 3) + expect_equal(ncol(dat[[1]]), 9) + expect_equal(nrow(dat[[2]]), 1) + expect_equal(ncol(dat[[2]]), 9) +}) + + + +# +# test_that("lib24: lib_export() creates new library.", { +# +# v1 <- c(P1 = 10, P2 = 20, P3 = 30) +# +# saveRDS(v1, "C:/packages/libr/tests/testthat/data/vect.rds") +# +# libname(testme, base_path, "rds") +# +# +# }) + diff --git a/tests/testthat/test-manipulation.R b/tests/testthat/test-manipulation.R index 7a5d8d4..94eb839 100644 --- a/tests/testthat/test-manipulation.R +++ b/tests/testthat/test-manipulation.R @@ -243,7 +243,7 @@ test_that("lib_add(), lib_remove() functions work as expected loaded.", { }) -# +# This idea was killed a long time ago. Now all libnames are of a single type. # test_that("lib_add() function can add a new items of different types.", { # # alt_path <- tempdir() @@ -386,12 +386,36 @@ test_that("lib_add() function can add a new item and save as sas7bdat", { res <- file.exists(file.path(alt_path, "mtcars.sas7bdat")) - expect_equal(res, TRUE) + #expect_equal(res, TRUE) + expect_equal(res, FALSE) # FALSE for now lib_delete(dat) }) +test_that("lib_write() function can add a new item to sas7bdat libname from workspace.", { + + alt_path <- tempdir() + libname(dat, alt_path, engine = "sas7bdat") + + + lib_load(dat) + + dat.demo_studyc <- mtcars + + lib_write(dat) + + pth <- file.path(lib_path(dat), "demo_studyc.sas7bdat") + + res <- file.exists(pth) + + # expect_equal(res, TRUE) + expect_equal(res, FALSE) # For now this is false. Hope to make it true in the future. + + lib_delete(dat) + +}) + test_that("lib_write() function can add a new item from workspace.", { @@ -533,8 +557,8 @@ test_that("lib_write non-changed sas7bdat data works as expected.", { d3 <- subset(info1, Name == "demo_studyb") d4 <- subset(info2, Name == "demo_studyb") - expect_equal(d1[1, 6] == d2[1, 6], FALSE) - expect_equal(d3[1, 6] == d4[1, 6], TRUE) + expect_equal(d1[1, 5] == d2[1, 5], FALSE) + expect_equal(d3[1, 5] == d4[1, 5], TRUE) lib_delete(dat2) @@ -615,7 +639,7 @@ test_that("Read existing xpt files.", { tmp <- tempdir() - libname(dat, base_path, "xpt") + libname(dat, base_path, "xpt", filter = "ad*") expect_equal(length(dat), 2) diff --git a/vignettes/libr-datastep.Rmd b/vignettes/libr-datastep.Rmd index 1d194a1..aa8056f 100644 --- a/vignettes/libr-datastep.Rmd +++ b/vignettes/libr-datastep.Rmd @@ -177,6 +177,61 @@ df # Merc 280 19.2 6 4 End ``` +### By Group Processing of Multiple Variables +If desired, you can pass multiple variables on the `by` parameter. When there +are multiple by groups, the `first.` and `last.` automatic variables described +above will represent an "or" combination of values for all by-variables. In +addition, automatic variables will be created for each variable in the by +group, similar to SAS®. Observe: + +#### Example 5: Multiple By Groups +```{r eval=FALSE, echo=TRUE} +library(libr) + +# Create sample data +df <- data.frame(HairEyeColor)[seq(2, 32, 2), ] + +# Sort by groups +df <- sort(df, by = c("Sex", "Hair")) + +# Identify start and end of by-groups +df2 <- datastep(df, + drop = c("Eye", "Freq"), + by = c("Sex", "Hair"), { + + fSex <- first.Sex + lSex <- last.Sex + fHair <- first.Hair + lHair <- last.Hair + + }) + +df2 +# Hair Sex fSex lSex fHair lHair +# 1 Brown Male TRUE FALSE TRUE FALSE +# 2 Brown Male FALSE FALSE FALSE FALSE +# 3 Brown Male FALSE FALSE FALSE FALSE +# 4 Brown Male FALSE FALSE FALSE TRUE +# 5 Blond Male FALSE FALSE TRUE FALSE +# 6 Blond Male FALSE FALSE FALSE FALSE +# 7 Blond Male FALSE FALSE FALSE FALSE +# 8 Blond Male FALSE TRUE FALSE TRUE +# 9 Brown Female TRUE FALSE TRUE FALSE +# 10 Brown Female FALSE FALSE FALSE FALSE +# 11 Brown Female FALSE FALSE FALSE FALSE +# 12 Brown Female FALSE FALSE FALSE TRUE +# 13 Blond Female FALSE FALSE TRUE FALSE +# 14 Blond Female FALSE FALSE FALSE FALSE +# 15 Blond Female FALSE FALSE FALSE FALSE +# 16 Blond Female FALSE TRUE FALSE TRUE + +``` +The above `first.Sex`, `last.Sex`, `first.Hair`, and `last.Hair` variables +may also be used in conditions, functions, or any other expression +inside your datastep. Note that like `first.` and `last.` they are +dropped automatically at the end of the datastep. If you want to retain +their values, assign them to a new variable as shown above. + ### Using Summary Functions There may be times when you want to combine row-by-row conditional processing with column-by-column vector operations. For example, @@ -187,7 +242,7 @@ function. The function will execute the `calculate` block first, add any assigned variables to the data frame, and then execute the data step. Below is an example of such a scenario: -#### Example 5: Calculate Block +#### Example 6: Calculate Block ```{r eval=FALSE, echo=TRUE} library(libr) @@ -226,7 +281,7 @@ function. Therefore, within a **dplyr** pipeline, it is not necessary to use any `datastep` parameters. The following example recreates the above data frame from Example 5, but with a **dplyr** pipeline. -#### Example 6: Data Pipeline +#### Example 7: Data Pipeline ```{r eval=FALSE, echo=TRUE} library(libr) library(dplyr) @@ -271,7 +326,7 @@ the `datastep()` function provides an _attrib_ parameter that allows you to supply such attributes as part of a data step. Attributes are assigned with a named list and the `dsattr()` object. -#### Example 7: Attributes +#### Example 8: Attributes ```{r eval=FALSE, echo=TRUE} library(libr) @@ -336,7 +391,7 @@ to extract values. You can use the indexer to extract a single value or a subset of values. An empty indexer will return all the values in the array. -#### Example 8: Using a Data Step Array +#### Example 9: Using a Data Step Array ```{r eval=FALSE, echo=TRUE} library(libr) @@ -639,6 +694,53 @@ res # 5 A03 AAA Mills S R04 FALSE South ``` +### Datastep Performance +One weakness of the **libr** `datastep()` function is performance. The +function is far slower than the equivalent SAS® datastep. The performance +profile may limit the number of records you are able to reasonably process +with the `datastep()`. + +One thing you can do to increase performance is to reduce the number of rows +and columns on the input data. You can perform this pre-filtering with Base R +or **Tidyverse** functions. This strategy is particularly recommended if +you were planning to subset the data anyway using the "where" or "keep" options. + +The Base R `subset()` function is convenient to use because it is always available. +Here is an example showing how to reduce the size of the **iris** sample +dataframe using Base R `subset()` before sending it to a datastep. + +#### Example 10: Increasing Performance +``` +# Subset the input dataset first for only needed rows and columns +dat <- subset(iris, Species == 'versicolor', c('Petal.Length', 'Petal.Width')) |> + datastep({ + + if (Petal.Length < 3.5) + Petal.Size <- "Short" + else if (Petal.Length > 4.5) + Petal.Size <- "Long" + else + Petal.Size <- "Medium" + + }) + +# View Some Results +dat[1:10, ] +# Petal.Length Petal.Width Petal.Size +# 1 4.7 1.4 Long +# 2 4.5 1.5 Medium +# 3 4.9 1.5 Long +# 4 4.0 1.3 Medium +# 5 4.6 1.5 Long +# 6 4.5 1.3 Medium +# 7 4.7 1.6 Long +# 8 3.3 1.0 Short +# 9 4.6 1.3 Long +# 10 3.9 1.4 Medium + +``` + + Next: [Disclaimer](libr-disclaimer.html) diff --git a/vignettes/libr-faq.Rmd b/vignettes/libr-faq.Rmd index 64fd825..28a05e5 100644 --- a/vignettes/libr-faq.Rmd +++ b/vignettes/libr-faq.Rmd @@ -29,6 +29,8 @@ on the links below to navigate to the full question and answer content. * [How do I export data to another file format?](#export) * [How do I copy a library?](#copy) * [Can I really do a datastep in R?](#datastep) +* [Why is the datastep so slow?](#performance) +* [Can I do "set" and "merge" operations with the datastep?](#merge) ## Content @@ -50,24 +52,9 @@ specified on the second parameter. The libname will use the CSV engine. If there are any CSV files in the directory, they will be all loaded into the library. To work directly with the datasets, you can then do: ```{r eval=FALSE, echo=TRUE} -lib_load(mylib) -``` -This statement will load the datasets into your workspace, where you can begin -using them. For instance, you could get summary statistics for a variable like -this: -```{r eval=FALSE, echo=TRUE} -summary(mylib.dat1$var1) -``` - -If you made any changes to the data, and want to keep those changes, remember -to save them with: -```{r eval=FALSE, echo=TRUE} -lib_write(mylib) -``` -When you are done, unload the datasets with: -```{r eval=FALSE, echo=TRUE} -lib_unload(mylib) +mylib$mydataset ``` +To access your datasets. [top](#top) @@ -82,7 +69,8 @@ support? **A:** The package supports the following data formats: csv, sas7bdat, rds, Rdata, rda, xls, xlsx, xpt, and dbf. The `libname()` help page has a full list, -and a short discussion of some details on each format. +and a short discussion of some details on each format. Note that the sas7bdat +file format is read-only at this time. [top](#top) @@ -207,11 +195,8 @@ library(libr) # Define data library libname(dat, "./data", "csv") -# Loads data into workspace -lib_load(dat) - # Prepare data -dm_mod <- dat.DM %>% +dm_mod <- dat$DM %>% select(USUBJID, SEX, AGE, ARM) %>% filter(ARM != "SCREEN FAILURE") %>% datastep({ @@ -226,8 +211,7 @@ dm_mod <- dat.DM %>% AGECAT <- ">= 65" }) - -lib_unload(dat) + ``` The datastep example above is part of a **dplyr** pipeline, but it can also function independently. @@ -243,6 +227,74 @@ for additional examples and complete documentation. ****** +### Why is the datastep so slow? {#performance} + +**Q:** I like the `datastep()` function very much. But it seems quite slow. +Is there anything I can do to speed it up? + +**A:** Yes. Performance of the `datastep()` is directly related to the size +of the input data. The best thing you can do to increase performance +is to reduce the input data to only those rows and columns that you need. +The Base R `subset()` function and **Tidyverse** `select()` and `filter()` +functions are useful for this purpose. Or you can use the Base R subset brackets ([]) +if you are familiar with that syntax. If the datastep performance is still +not satisfactory, it is recommended that you explore other R functions to +perform your intended operation. + +[top](#top) + +****** + +### Can I do "set" and "merge" operations with the datastep? {#merge} + +**Q:** In SAS®, I used the datastep frequently to combine two or more datasets. +Does the **libr** datastep support "set" and "merge"? + +**A:** Yes. The `datastep()` function supports both "set" and "merge" operations. +The "set" parameter accepts a list of one or more datasets to stack together, +and the "merge" parameters are used in almost the same way as SAS®. +Here is an example: +``` +# Subset iris dataset +dat1 <- subset(mtcars, cyl == 4, c('mpg', 'cyl', 'disp'))[1:5, ] +dat2 <- subset(mtcars, cyl == 6, c('mpg', 'cyl', 'disp'))[1:5, ] +dat3 <- mtcars[1:10, c('hp', 'drat', 'wt')] + +# Stack datasets using set operation +res1 <- datastep(dat1, set = dat2, {}) +# mpg cyl disp +# 1 22.8 4 108.0 +# 2 24.4 4 146.7 +# 3 22.8 4 140.8 +# 4 32.4 4 78.7 +# 5 30.4 4 75.7 +# 6 21.0 6 160.0 +# 7 21.0 6 160.0 +# 8 21.4 6 258.0 +# 9 18.1 6 225.0 +# 10 19.2 6 167.6 + +# Merge row by row +res2 <- datastep(res1, merge = dat3, {}) +# mpg cyl disp hp drat wt +# 1 22.8 4 108.0 110 3.90 2.620 +# 2 24.4 4 146.7 110 3.90 2.875 +# 3 22.8 4 140.8 93 3.85 2.320 +# 4 32.4 4 78.7 110 3.08 3.215 +# 5 30.4 4 75.7 175 3.15 3.440 +# 6 21.0 6 160.0 105 2.76 3.460 +# 7 21.0 6 160.0 245 3.21 3.570 +# 8 21.4 6 258.0 62 3.69 3.190 +# 9 18.1 6 225.0 95 3.92 3.150 +# 10 19.2 6 167.6 123 3.92 3.440 +``` +The above merge shows how you can append columns even without a key column. +If you want to merge by a key, use the "merge_by" and "merge_in" parameters. +See the `datastep()` documentation for more information and examples. + +[top](#top) + +****** diff --git a/vignettes/libr.Rmd b/vignettes/libr.Rmd index 245b108..617ca8a 100644 --- a/vignettes/libr.Rmd +++ b/vignettes/libr.Rmd @@ -119,23 +119,71 @@ cols( The column specification shows how the data was imported. Since 'csv' files do not contain well-defined data type information on each of the columns, -the `libname` function has to guess at the data types. The column +the `libname()` function has to guess at the data types. The column specification shows you what the guesses were. This is useful information. -You should review these column specifications to see if the `libname` function +You should review these column specifications to see if the `libname()` function guessed correctly. If it did not guess correctly, you can control the import data types by sending a `specs()` collection of `import_spec()` objects to the `import_specs` parameter on the `libname()` function. See the `specs()` documentation for an example and additional details. -#### The `lib_load()` Function +#### Accessing Data Observe that there is difference between the SAS® `libname` statement and the **libr** `libname()` function. The difference is that after the SAS® `libname` statement is called, the data is immediately available to your code using two-level (\.\) syntax. With the **libr** function, on the other hand, the data is immediately -available using list syntax on the library variable name. To get the -two-level syntax, you first have to call the `lib_load()` function. +available using list syntax on the library variable name. That means you can +get to your data using the dollar sign ($), like this: +```{r eval=FALSE, echo=TRUE} +# View a dataset +sdtm$DM +# # A tibble: 87 × 24 +# STUDYID DOMAIN USUBJID SUBJID RFSTDTC RFENDTC RFXSTDTC RFXENDTC RFICDTC RFPENDTC +# +# 1 ABC DM ABC-01… 049 2006-11-07 NA NA NA 2006-10-25 NA +# 2 ABC DM ABC-01… 050 2006-11-02 NA NA NA 2006-10-25 NA +# 3 ABC DM ABC-01… 051 2006-11-02 NA NA NA 2006-10-25 NA +# 4 ABC DM ABC-01… 052 2006-11-06 NA NA NA 2006-10-31 NA +# 5 ABC DM ABC-01… 053 2006-11-08 NA NA NA 2006-11-01 NA +# 6 ABC DM ABC-01… 054 2006-11-16 NA NA NA 2006-11-07 NA +# 7 ABC DM ABC-01… 055 2006-12-06 NA NA NA 2006-10-31 NA +# 8 ABC DM ABC-01… 056 2006-11-28 NA NA NA 2006-11-21 NA +# 9 ABC DM ABC-01… 113 2006-12-05 NA NA NA 2006-11-28 NA +# 10 ABC DM ABC-01… 114 2006-12-14 NA NA NA 2006-12-01 NA +# # 77 more rows +# # 14 more variables: DTHDTC , DTHFL , SITEID , BRTHDTC , AGE , +# # AGEU , SEX , RACE , ETHNIC , ARMCD , ARM , ACTARMCD , +# # ACTARM , COUNTRY +# # Use `print(n = ...)` to see more rows + +``` +Using this syntax, your dataset can be passed into any R function. For example, +here we can subset the dataset for a particular subject: +``` +# Subset the data +dat <- subset(sdtm$DM, SUBJID == '050') + +# View results +dat +# # A tibble: 1 × 24 +# STUDYID DOMAIN USUBJID SUBJID RFSTDTC RFENDTC RFXSTDTC RFXENDTC RFICDTC RFPENDTC +# +# 1 ABC DM ABC-01-… 050 2006-11-02 NA NA NA 2006-10-25 NA +# # 14 more variables: DTHDTC , DTHFL , SITEID , BRTHDTC , AGE , +# # AGEU , SEX , RACE , ETHNIC , ARMCD , ARM , ACTARMCD , +# # ACTARM , COUNTRY + +``` +The dollar sign syntax show above is recommended for the most memory-efficient +programming. If you are writing production code to be run in batch, use +the dollar sign syntax. + +#### The `lib_load()` Function +For convenience, the package also provides a way to get two-level dot syntax, +similar to SAS®. +To get the dot syntax, you first have to call the `lib_load()` function. ```{r eval=FALSE, echo=TRUE} lib_load(sdtm) @@ -157,7 +205,7 @@ lib_load(sdtm) Notice on the console printout that the library is now "loaded". That means the data has been loaded into the workspace, and is available using -two-level syntax. If you are working in **RStudio**, the environment +two-level dot syntax. If you are working in **RStudio**, the environment pane will now show all the datasets available in the library.