Skip to content

Commit

Permalink
Merge pull request #264 from Crunch-io/codebooks2022
Browse files Browse the repository at this point in the history
Codebooks2022
  • Loading branch information
1beb authored Oct 5, 2022
2 parents 723e78a + f0bfa18 commit 75b07ce
Show file tree
Hide file tree
Showing 14 changed files with 90 additions and 55 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check-standard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:

- name: Query dependencies
run: |
install.packages('remotes')
install.packages(c('remotes', 'markdown'))
saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
shell: Rscript {0}
Expand Down
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: In order to generate custom survey reports, this package provides
'banners' (cross-tabulations) and codebooks of datasets in the Crunch
(<https://crunch.io/>) web service. Reports can be written in 'PDF' format
using 'LaTeX' or in Microsoft Excel '.xlsx' files.
Version: 1.4.4
Version: 2.0.0
Authors@R: c(
person("Persephone", "Tsebelis", role="aut"),
person("Kamil", "Sedrowicz", role="aut"),
Expand All @@ -28,15 +28,15 @@ Imports:
methods,
openxlsx,
rlang,
stringi,
tinytex
Suggests:
arrow,
covr,
knitr,
mockery,
rmarkdown,
stringi,
testthat (>= 2.1.0)
RoxygenNote: 7.1.1
RoxygenNote: 7.2.1
VignetteBuilder: knitr
Encoding: UTF-8
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## crunchtabs 2.0.0

- Rounding in variables summaries changed to match Crunch method of rounding, rounding half to up rather than rounding half to even. This ensures summaries shown in reports match Crunch variable summaries.
- Rounding calculated after variable subtotals to address Crunch discrepancies
- Fixes issue with calculation of variable subtotals to allow for differences as well as sums to be reported
- writeCodebookLatexGeneric ensures large vectors to be summarized are stored in memory by R in such a way as to drastically improve compute times. Integer, numeric, and factor variables read from arrow datasets using dplyr::pull were inefficiently being stored as int, numeric, and factor, rather than Large Integer, Large Numeric, and Large Factor, making them harder for summaries to be calculated quickly for very large vectors.

## crunchtabs 1.4.4

- Fixes a regression where absolutelynopagebreak was duplicated or not functional in some situations. (pagebreak_in_banner=TRUE and one_per_page=FALSE)
Expand Down
8 changes: 7 additions & 1 deletion R/codeBookGeneric.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,15 @@ writeCodeBookLatexGeneric <- function(
if (any(class(ds) %in% c("ArrowObject", "arrow_dplyr_query"))) {
cls <- get_class(ds, nm)
if(cls == "character") {
x <- ds %>% filter(cd == 1) %>% dplyr::select(nm) %>% dplyr::collect() %>% dplyr::pull(nm)
x <- ds %>% filter(.data$cd_number == 1) %>% dplyr::select(nm) %>% dplyr::collect() %>% dplyr::pull(nm)
} else {
x <- ds %>% dplyr::select(nm) %>% dplyr::collect() %>% dplyr::pull(nm)
if (cls %in% c("integer", "numeric")) {
x <- unlist(as.vector(x, mode = "list"))
}
if (cls == "factor") {
x <- factor(levels(x)[as.integer(x)], levels = levels(x))
}
}

} else {
Expand Down
36 changes: 27 additions & 9 deletions R/forNowTransforms.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,34 @@ calcTabInsertions <- function(vec, elements, var_cats) {
# if element is a subtotal, sum the things it corresponds to which are
# found with arguments()
if (crunch::is.Subtotal(element)) {
# grab category combinations, and then sum those categories.
combos <- element$categories
which.cats <- names(var_cats)[crunch::ids(var_cats) %in% combos]
if (any(is.na(var_cats)[crunch::ids(var_cats) %in% combos])) {
return(NA) # nocov
# Check if subtotal is strictly a sum of variables or a sum/difference
if (is.null(element$negative) || length(element$negative) == 0) {
# grab category combinations, and then sum those categories.
combos <- element$categories
which.cats <- names(var_cats)[crunch::ids(var_cats) %in% combos]
if (any(is.na(var_cats)[crunch::ids(var_cats) %in% combos])) {
return(NA) # nocov
}
if (dim(vec)[2] == 1) {
return(sum(vec[which.cats, ]))
}
return(colSums(vec[which.cats, , drop = FALSE]))
} else {
# if element has a "negative" item, these need to be subtracted from
# the other categories
# grab category combinations, and then sum those categories.
combos <- element$categories
combos_negative <- as.integer(unlist(element$negative))
which.cats <- names(var_cats)[crunch::ids(var_cats) %in% combos]
which.cats_negative <- names(var_cats)[crunch::ids(var_cats) %in% combos_negative]
if (any(is.na(var_cats)[crunch::ids(var_cats) %in% c(combos, combos_negative)])) {
return(NA) # nocov
}
if (dim(vec)[2] == 1) {
return(sum(vec[which.cats, ]) - sum(vec[which.cats_negative, ]))
}
return(colSums(vec[which.cats, , drop = FALSE]) - colSums(vec[which.cats_negative, , drop = FALSE]))
}
if (dim(vec)[2] == 1) {
return(sum(vec[which.cats, ]))
}
return(colSums(vec[which.cats, , drop = FALSE]))
}
}))

Expand Down
11 changes: 7 additions & 4 deletions R/reformatResults.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,13 @@ reformatVar <- function(var, banner_name, theme, proportions, banner_info, latex
should_round <- ifelse(var$alias %in% theme$latex_round_percentages_exception,
!should_round, should_round
)

# Calculate tabInsertions before rounding!
if (var$type %in% c("categorical", "categorical_array") && dt %in% "body" &&
any(var$inserts %in% c("Heading", "Subtotal"))) {
data <- as.matrix(calcTabInsertions(data, var$inserts_obj, var$categories))
}

if (should_round & dt != "weighted_n") {
data[] <- apply(data, 2, roundPropCategorical, theme$digits)
} else if (!is.null(rdig) && !is.infinite(rdig)) {
Expand Down Expand Up @@ -232,10 +239,6 @@ reformatVar <- function(var, banner_name, theme, proportions, banner_info, latex
dimnames = list(c(theme$format_totals_row$name), colnames(data))
)
}
if (var$type %in% c("categorical", "categorical_array") && dt %in% "body" &&
any(var$inserts %in% c("Heading", "Subtotal"))) {
data <- as.matrix(calcTabInsertions(data, var$inserts_obj, var$categories))
}

if (weight_v && nrow(data) > 1) {
data <- rbind(apply(data, 2, min, na.rm = TRUE), apply(data, 2, max, na.rm = TRUE))
Expand Down
15 changes: 10 additions & 5 deletions R/tabbook-additions.R
Original file line number Diff line number Diff line change
Expand Up @@ -321,20 +321,24 @@ tabBookSingle_crunchtabs <- function(multitable, dataset, weight) {
)

out <- download_result(result)
return(crunch:::TabBookResult(out))
TabBookResult <- utils::getFromNamespace("TabBookResult", "crunch")
return(TabBookResult(out))
}


varFilter <- function(dataset) {
crunch:::variablesFilter(dataset)
variablesFilter <- utils::getFromNamespace("variablesFilter", "crunch")
variablesFilter(dataset)
}

download_result <- function(result) {
crunch:::retry(crunch::crGET(result), wait = 0.5) # For mocks
retry <- utils::getFromNamespace("retry", "crunch")
retry(crunch::crGET(result), wait = 0.5) # For mocks
}

tabBookResult <- function(...) {
crunch:::TabBookResult(...) # For mocks
TabBookResult <- utils::getFromNamespace("TabBookResult", "crunch")
TabBookResult(...) # For mocks
}

#' @importFrom stats ave
Expand Down Expand Up @@ -485,7 +489,8 @@ tabBookWeightSpec <- function(dataset, weights, append_default_wt = TRUE) {


getCatalog <- function(dataset) {
crunch:::ShojiCatalog(crGET(self(allVariables(dataset)), query = list(relative = "on")))
ShojiCatalog <- utils::getFromNamespace("ShojiCatalog", "crunch")
ShojiCatalog(crGET(self(allVariables(dataset)), query = list(relative = "on")))
}


Expand Down
16 changes: 8 additions & 8 deletions man/codeBookItemBody.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 9 additions & 9 deletions man/codeBookSummary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/crosstabs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 1 addition & 5 deletions man/crunchtabs-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions tests/testthat/ref/tabbook1.tex
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@

\textbf{header} & & & & & \\
Cat & 30\% & 38\% & 25\% & 18\% & 44\% \\
\textbf{Net: Cat/Bird} & \textbf{50\%} & \textbf{38\%} & \textbf{58\%} & \textbf{30\%} & \textbf{73\%} \\
\textbf{Net: Cat/Bird} & \textbf{50\%} & \textbf{38\%} & \textbf{58\%} & \textbf{31\%} & \textbf{73\%} \\
Dog & 50\% & 62\% & 42\% & 69\% & 27\% \\
\textbf{Net: Cat/Dog} & \textbf{80\%} & \textbf{100\%} & \textbf{67\%} & \textbf{87\%} & \textbf{71\%} \\
\textbf{Net: Cat/Dog} & \textbf{80\%} & \textbf{100\%} & \textbf{67\%} & \textbf{88\%} & \textbf{71\%} \\
Bird & 20\% & 0\% & 33\% & 12\% & 29\% \\
\midrule
Totals & 100\% & 100\% & 100\% & 99\% & 100\% \\
Expand Down Expand Up @@ -200,7 +200,7 @@
Cat & 49\% & 22\% & 66\% & 100\% & 26\% \\
Dog & 43\% & 56\% & 34\% & 0\% & 62\% \\
Bird & 9\% & 22\% & 0\% & 0\% & 13\% \\
\textbf{Net: Cat/Dog} & \textbf{92\%} & \textbf{78\%} & \textbf{100\%} & \textbf{100\%} & \textbf{88\%} \\
\textbf{Net: Cat/Dog} & \textbf{91\%} & \textbf{78\%} & \textbf{100\%} & \textbf{100\%} & \textbf{87\%} \\
\midrule
Totals & 101\% & 100\% & 100\% & 100\% & 101\% \\
Unweighted N & \multicolumn{1}{c}{11} & \multicolumn{1}{c}{3} & \multicolumn{1}{c}{8} & \multicolumn{1}{c}{5} & \multicolumn{1}{c}{6} \\
Expand All @@ -218,7 +218,7 @@
Cat & 49\% & 22\% & 100\% & 46\% \\
Dog & 43\% & 56\% & 0\% & 54\% \\
Bird & 9\% & 22\% & 0\% & 0\% \\
\textbf{Net: Cat/Dog} & \textbf{92\%} & \textbf{78\%} & \textbf{100\%} & \textbf{100\%} \\
\textbf{Net: Cat/Dog} & \textbf{91\%} & \textbf{78\%} & \textbf{100\%} & \textbf{100\%} \\
\midrule
Totals & 101\% & 100\% & 100\% & 100\% \\
Unweighted N & \multicolumn{1}{c}{11} & \multicolumn{1}{c}{3} & \multicolumn{1}{c}{2} & \multicolumn{1}{c}{6} \\
Expand All @@ -241,7 +241,7 @@
Cat & 42\% & 50\% & 36\% & 54\% & 18\% \\
Dog & 37\% & 14\% & 54\% & 46\% & 18\% \\
Bird & 21\% & 36\% & 11\% & 0\% & 64\% \\
\textbf{Net: Cat/Dog} & \textbf{79\%} & \textbf{64\%} & \textbf{90\%} & \textbf{100\%} & \textbf{36\%} \\
\textbf{Net: Cat/Dog} & \textbf{79\%} & \textbf{64\%} & \textbf{89\%} & \textbf{100\%} & \textbf{36\%} \\
\midrule
Totals & 100\% & 100\% & 101\% & 100\% & 100\% \\
Unweighted N & \multicolumn{1}{c}{16} & \multicolumn{1}{c}{4} & \multicolumn{1}{c}{12} & \multicolumn{1}{c}{9} & \multicolumn{1}{c}{7} \\
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/ref/topline1.tex
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
\endfoot
\endlastfoot

& Home & 49\% & 43\% & 9\% & \textbf{92\%} \\
& Home & 49\% & 43\% & 9\% & \textbf{91\%} \\
& Work & 42\% & 37\% & 21\% & \textbf{79\%} \\

\end{longtable}
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-crosstabs-sort.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ test_that("Numeric sorts with banner", {
)

expect_equal(
as.vector(ct$results$allpets$crosstabs$`banner 1`$`___total___`$proportions)[1:3, ],
ct$results$allpets$crosstabs$`banner 1`$`___total___`$proportions$Total,
c(0.625, 0.5, 0.454545454545455)
)
})
Expand All @@ -353,7 +353,7 @@ test_that("Numeric sorts with banner", {
)

expect_equal(
as.vector(ct$results$allpets$crosstabs$`banner 1`$`___total___`$proportions)[1:3, ],
ct$results$allpets$crosstabs$`banner 1`$`___total___`$proportions$Total,
rev(c(0.625, 0.5, 0.454545454545455))
)
})

0 comments on commit 75b07ce

Please sign in to comment.