Skip to content

Commit

Permalink
Merge pull request #34 from JanMarvin/select_rows
Browse files Browse the repository at this point in the history
Select rows
  • Loading branch information
JanMarvin authored Sep 4, 2022
2 parents 56bff6d + fc2e8f2 commit 4eada96
Show file tree
Hide file tree
Showing 9 changed files with 166 additions and 93 deletions.
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ Description: Package to read the SAS file format.
License: GPL (>= 2) | file LICENSE
URL: https://github.com/JanMarvin/readsas
BugReports: https://github.com/JanMarvin/readsas/issues
Imports: Rcpp (>= 1.0.2), stringi
Depends: R (>= 3.4.0)
Imports: Rcpp, stringi
LinkingTo: Rcpp
SystemRequirements: C++11
Suggests: testthat, foreign, datasets
ByteCompile: yes
RoxygenNote: 7.2.1
Encoding: UTF-8
Language: en-US
RoxygenNote: 7.2.1
Roxygen: list(markdown = TRUE)
Config/testthat/edition: 3
Config/testthat/parallel: true
5 changes: 3 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#' @param selectrows_ integer vector of selected rows
#' @param selectcols_ character vector of selected rows
#' @param empty_to_na logical convert '' to NA_character_
#' @param tempstr filepath used for temp output when uncompressing
#' @import Rcpp
#' @export
readsas <- function(filePath, debug, selectrows_, selectcols_, empty_to_na) {
.Call(`_readsas_readsas`, filePath, debug, selectrows_, selectcols_, empty_to_na)
readsas <- function(filePath, debug, selectrows_, selectcols_, empty_to_na, tempstr) {
.Call(`_readsas_readsas`, filePath, debug, selectrows_, selectcols_, empty_to_na, tempstr)
}

86 changes: 47 additions & 39 deletions R/readsas.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,42 @@
#' read.sas
#'
#'@author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de}
#' @description `read.sas` is a general function for reading sas7bdat files.
#' It supports a variety of complex sas7bdat files, x86 and x64, big and small
#' endian, and both compression types. It has been tested with numeric and
#' character data, and provides helper functions for converting sas7bdat to R
#' types `Date` and `POSIXct`. Time variables are converted to characters.
#' Conversion to date variables is applied if a known date, datetime or time
#' format is found in the sas7bdat file. For user-defined formats, the package
#' provides functions to convert from sas7bdat to R.
#'
#'@param file file to read
#'@param debug print debug information
#'@param convert_dates default is TRUE
#'@param recode default is TRUE
#'@param select.rows \emph{integer.} Vector of one or two numbers. If single
#' value rows from 1:val are selected. If two values of a range are selected
#' the rows in range will be selected.
#'@param select.cols \emph{character:} Vector of variables to select.
#'@param remove_deleted logical if deleted rows should be removed from data
#'@param rownames first column will be used as rowname and removed from data
#'@param empty_to_na logical. In SAS empty characters are missing. this option
#' Input files may contain deleted rows that are marked as deleted instead of
#' being removed from the input data. These are removed on import, if you still
#' need them look at `remove_deleted`. Formats, labels and additional file
#' information are available with `attributes()`.
#'
#' @param file file to read
#' @param debug print debug information
#' @param convert_dates default is `TRUE`
#' @param recode default is `TRUE`
#' @param select.rows \emph{integer.} Vector of rows to import. Minimum 0. Rows
#' imported are sorted. If 0 is in `select.rows`, zero rows are returned.
#' @param select.cols \emph{character:} Vector of variables to select.
#' @param remove_deleted logical if deleted rows should be removed from data
#' @param rownames first column will be used as rowname and removed from data
#' @param empty_to_na logical. In SAS empty characters are missing. this option
#' allows to convert `""` to `NA_character_` when importing.
#'
#'@useDynLib readsas, .registration=TRUE
#'@importFrom utils download.file
#'@importFrom stringi stri_encode
#' @useDynLib readsas, .registration=TRUE
#' @importFrom utils download.file
#' @importFrom stringi stri_encode
#'
#' @seealso \link[foreign]{read.xport}
#'
#' @examples
#' fl <- system.file("extdata", "cars.sas7bdat", package = "readsas")
#' read.sas(fl)
#'
#'@export
#' @export
read.sas <- function(file, debug = FALSE, convert_dates = TRUE, recode = TRUE,
select.rows = NULL, select.cols = NULL, remove_deleted = TRUE,
rownames = FALSE, empty_to_na = FALSE) {
Expand All @@ -45,25 +62,9 @@ read.sas <- function(file, debug = FALSE, convert_dates = TRUE, recode = TRUE,
if (!is.numeric(select.rows)) {
return(message("select.rows must be of type numeric"))
} else {
# guard against negative values
if (any(select.rows < 0))
select.rows <- abs(select.rows)

# check that length is not > 2
if (length(select.rows) > 2)
return(message("select.rows must be of length 1 or 2."))
if (any(select.rows < 0)) stop("select.rows must be >= 0")

# if length 1 start at row 1
if (length(select.rows) == 1)
select.rows <- c(1, select.rows)

# reorder if 2 is bigger than 1
if (select.rows[2] < select.rows[1])
select.rows <- c(select.rows[2], select.rows[1])

# make sure to start at index position 1 if select.rows[2] > 0
if (select.rows[2] > 0 && select.rows[1] == 0)
select.rows[1] <- 1
select.rows <- sort(select.rows - 1)
}

}
Expand All @@ -72,18 +73,25 @@ read.sas <- function(file, debug = FALSE, convert_dates = TRUE, recode = TRUE,
return(message("select.cols must be of type character"))
}

data <- readsas(filepath, debug, select.rows, select.cols, empty_to_na)

tempstr <- tempfile()
on.exit(unlink(tempstr), add = TRUE)
data <- readsas(filepath, debug, select.rows, select.cols, empty_to_na,
tempstr)

# rowcount <- attr(data, "rowcount")
# if rownames, remove the rowname variable from attributes
cvec <- ifelse(rownames, -1, substitute())

# rownames start at 0
row_names <- attr(data, "rvec") + 1

# only if a row was returned
if (nrow(data)) row.names(data) <- row_names

if (rownames) {
rownames(data) <- data[[1]]
data[[1]] <- NULL
}


encoding <- attr(data, "encoding") <- attr(data, "encoding")

## shorten attributes and reassign
Expand Down Expand Up @@ -186,9 +194,9 @@ read.sas <- function(file, debug = FALSE, convert_dates = TRUE, recode = TRUE,

if (remove_deleted) {

sel <- row_names
del_rows <- attr(data, "deleted_rows")

sel <- attr(data, "rvec")
val <- attr(data, "valid")[sel]
del <- attr(data, "deleted")[sel]
attr(data, "deleted") <- NULL
Expand Down
34 changes: 25 additions & 9 deletions man/read.sas.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/readsas.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// readsas
Rcpp::List readsas(const char * filePath, const bool debug, Nullable<IntegerVector> selectrows_, Nullable<CharacterVector> selectcols_, const bool empty_to_na);
RcppExport SEXP _readsas_readsas(SEXP filePathSEXP, SEXP debugSEXP, SEXP selectrows_SEXP, SEXP selectcols_SEXP, SEXP empty_to_naSEXP) {
Rcpp::List readsas(const char * filePath, const bool debug, Nullable<IntegerVector> selectrows_, Nullable<CharacterVector> selectcols_, const bool empty_to_na, std::string tempstr);
RcppExport SEXP _readsas_readsas(SEXP filePathSEXP, SEXP debugSEXP, SEXP selectrows_SEXP, SEXP selectcols_SEXP, SEXP empty_to_naSEXP, SEXP tempstrSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Expand All @@ -21,13 +21,14 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< Nullable<IntegerVector> >::type selectrows_(selectrows_SEXP);
Rcpp::traits::input_parameter< Nullable<CharacterVector> >::type selectcols_(selectcols_SEXP);
Rcpp::traits::input_parameter< const bool >::type empty_to_na(empty_to_naSEXP);
rcpp_result_gen = Rcpp::wrap(readsas(filePath, debug, selectrows_, selectcols_, empty_to_na));
Rcpp::traits::input_parameter< std::string >::type tempstr(tempstrSEXP);
rcpp_result_gen = Rcpp::wrap(readsas(filePath, debug, selectrows_, selectcols_, empty_to_na, tempstr));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_readsas_readsas", (DL_FUNC) &_readsas_readsas, 5},
{"_readsas_readsas", (DL_FUNC) &_readsas_readsas, 6},
{NULL, NULL, 0}
};

Expand Down
Loading

0 comments on commit 4eada96

Please sign in to comment.