Skip to content

Commit

Permalink
encoding should probably be the same as the SAS encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
JanMarvin committed Dec 2, 2023
1 parent 5dbc8bc commit 32beda1
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 25 deletions.
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ readsas <- function(filePath, debug, selectrows_, selectcols_, empty_to_na, temp
#' @param dateval timestamp
#' @keywords internal
#' @noRd
writesas <- function(filePath, dat, compress, debug, bit32, headersize, pagesize, dateval) {
invisible(.Call(`_readsas_writesas`, filePath, dat, compress, debug, bit32, headersize, pagesize, dateval))
writesas <- function(filePath, dat, compress, debug, bit32, headersize, pagesize, dateval, encoding32) {
invisible(.Call(`_readsas_writesas`, filePath, dat, compress, debug, bit32, headersize, pagesize, dateval, encoding32))
}

20 changes: 10 additions & 10 deletions R/writesas.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
#'@param bit32 write 32bit file
#'@param varlabels optional variable labels
#'@param size optional header/pagesize
#'@param encoding encoding 62 = windows, 20 = utf
#'
#'@useDynLib readsas, .registration=TRUE
#'
#'@export
write.sas <- function(dat, filepath, compress = 0, debug = FALSE, bit32 = FALSE,
varlabels, size) {
varlabels, size, encoding = 20) {

filepath <- path.expand(filepath)

Expand Down Expand Up @@ -96,16 +97,15 @@ write.sas <- function(dat, filepath, compress = 0, debug = FALSE, bit32 = FALSE,
# for numerics
# formats <- rep("BEST", ncol(dat))

attr(dat, "vartypes") <- as.integer(vartypes)
attr(dat, "colwidth") <- as.integer(colwidth)
attr(dat, "formats") <- formats
attr(dat, "width") <- width
attr(dat, "decim") <- decim
attr(dat, "labels") <- labels
attr(dat, "vartypes") <- as.integer(vartypes)
attr(dat, "colwidth") <- as.integer(colwidth)
attr(dat, "formats") <- formats
attr(dat, "width") <- width
attr(dat, "decim") <- decim
attr(dat, "labels") <- labels
attr(dat, "varlabels") <- varlabels


writesas(filepath, dat, compress = 0, debug = debug, bit32 = bit32,
headersize = size[1], pagesize = size[2], dateval = as_datetime(Sys.time()))

headersize = size[1], pagesize = size[2],
dateval = as_datetime(Sys.time()), encoding32 = encoding)
}
5 changes: 4 additions & 1 deletion man/write.sas.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ BEGIN_RCPP
END_RCPP
}
// writesas
void writesas(const char * filePath, Rcpp::DataFrame dat, uint8_t compress, bool debug, bool bit32, int32_t headersize, int32_t pagesize, double dateval);
RcppExport SEXP _readsas_writesas(SEXP filePathSEXP, SEXP datSEXP, SEXP compressSEXP, SEXP debugSEXP, SEXP bit32SEXP, SEXP headersizeSEXP, SEXP pagesizeSEXP, SEXP datevalSEXP) {
void writesas(const char * filePath, Rcpp::DataFrame dat, uint8_t compress, bool debug, bool bit32, int32_t headersize, int32_t pagesize, double dateval, int32_t encoding32);
RcppExport SEXP _readsas_writesas(SEXP filePathSEXP, SEXP datSEXP, SEXP compressSEXP, SEXP debugSEXP, SEXP bit32SEXP, SEXP headersizeSEXP, SEXP pagesizeSEXP, SEXP datevalSEXP, SEXP encoding32SEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const char * >::type filePath(filePathSEXP);
Expand All @@ -39,14 +39,15 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< int32_t >::type headersize(headersizeSEXP);
Rcpp::traits::input_parameter< int32_t >::type pagesize(pagesizeSEXP);
Rcpp::traits::input_parameter< double >::type dateval(datevalSEXP);
writesas(filePath, dat, compress, debug, bit32, headersize, pagesize, dateval);
Rcpp::traits::input_parameter< int32_t >::type encoding32(encoding32SEXP);
writesas(filePath, dat, compress, debug, bit32, headersize, pagesize, dateval, encoding32);
return R_NilValue;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_readsas_readsas", (DL_FUNC) &_readsas_readsas, 6},
{"_readsas_writesas", (DL_FUNC) &_readsas_writesas, 8},
{"_readsas_writesas", (DL_FUNC) &_readsas_writesas, 9},
{NULL, NULL, 0}
};

Expand Down
20 changes: 12 additions & 8 deletions src/writesas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ using namespace Rcpp;
// [[Rcpp::export]]
void writesas(const char * filePath, Rcpp::DataFrame dat, uint8_t compress,
bool debug, bool bit32, int32_t headersize, int32_t pagesize,
double dateval) {
double dateval, int32_t encoding32) {

int8_t encoding = (int8_t)encoding32;

uint32_t k = dat.size();
uint64_t n = dat.nrows();
Expand Down Expand Up @@ -149,7 +151,7 @@ void writesas(const char * filePath, Rcpp::DataFrame dat, uint8_t compress,

double created = dateval, created2 = 0; // 8
double modified = dateval, modified2 = 0; // 16
double thrdts = 0;
double thrdts = dateval;

// possibly make headersize and pagesize variable
// int32_t headersize = 65536;
Expand Down Expand Up @@ -183,7 +185,7 @@ void writesas(const char * filePath, Rcpp::DataFrame dat, uint8_t compress,
if (bit32 == 1) U64_BYTE_CHECKER_VALUE = 50;
if (U64_BYTE_CHECKER_VALUE == 51) ALIGN_2_VALUE = 4;

pkt2 = 20, pkt3 = 0;
pkt2 = 34, pkt3 = 0;

writebin(ALIGN_1_CHECKER_VALUE, sas, swapit);
writebin(pkt2, sas, swapit); // 34
Expand All @@ -194,8 +196,8 @@ void writesas(const char * filePath, Rcpp::DataFrame dat, uint8_t compress,
int8_t ENDIANNESS = 1; // 0 is swapit = 1
uint8_t PLATFORM = 49; // (1) 49 Unix (2) 50 Win

pkt1 = 0, pkt3 = 0;
// if (bit32 == 1) pkt1 = 50;
pkt1 = 51, pkt3 = 2;
if (bit32 == 1) pkt1 = 50;
writebin(pkt1, sas, swapit); // 51
writebin(ENDIANNESS, sas, swapit);
writebin(pkt3, sas, swapit); // 2
Expand Down Expand Up @@ -258,7 +260,7 @@ void writesas(const char * filePath, Rcpp::DataFrame dat, uint8_t compress,
writebin(pkt4, sas, swapit);

// packet 10 -------------------------------------- //
int8_t encoding = 20; // utf8
// int8_t encoding = 20; // utf8
pkt1 = 51, pkt2 = 0, pkt3 = 0, pkt4 = 20;
if (bit32 == 1) pkt1 = 50;
writebin(pkt1, sas, swapit); // 51
Expand Down Expand Up @@ -341,10 +343,12 @@ void writesas(const char * filePath, Rcpp::DataFrame dat, uint8_t compress,

// large unk
// something related to file password?
uint32_t pktu32 = 1157289805;
// 0000 and 4 byte from date
uint32_t pktu32 = 0;
std::memcpy(&pktu32, &created, sizeof(int32_t));
writebin(pktu32, sas, swapit);

pktu32 = 563452161;
pktu32 = 545930268;
// three identical smaller unks
// required so that the file is identified as SAS file
writebin(pktu32, sas, swapit);
Expand Down

0 comments on commit 32beda1

Please sign in to comment.