Skip to content

Commit

Permalink
Number of observations"
Browse files Browse the repository at this point in the history
  • Loading branch information
astra-cdc committed Mar 11, 2024
1 parent eef6adb commit 771d3af
Show file tree
Hide file tree
Showing 42 changed files with 2,230 additions and 839 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ LazyData: true
LazyDataCompression: bzip2
Imports:
assertthat,
forcats,
huxtable,
magrittr,
survey
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Improved output.
* Allows an unweighted survey as a `data.frame`.
* Can set certain options using an argument.
* Tabulation functions show the number of observations.

# surveytable 0.9.2

Expand Down
2 changes: 1 addition & 1 deletion R/codebook.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ codebook = function(all = FALSE
lret[[2]] = .write_out(ret, csv = csv)

if (all) {
op_ = options(surveytable.check_present = FALSE)
op_ = options(surveytable.find_lpe = FALSE)
on.exit(options(op_))
for (ii in 1:ncol(design$variables)) {
n1 = nn[ii]
Expand Down
3 changes: 2 additions & 1 deletion R/print.surveytable_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ print.surveytable_table = function(x, ...) {
hh = df1 %>% hux %>% set_all_borders

if (!is.null(txt <- attr(df1, "title"))) {
caption(hh) = paste(strwrap(txt), collapse = "\n")
# caption(hh) = paste(strwrap(txt), collapse = "\n")
caption(hh) = txt
}

if (!is.null(nc <- attr(df1, "num"))) {
Expand Down
4 changes: 2 additions & 2 deletions R/set_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ set_count_1k = function() {
# If making changes, update .onLoad() and set_survey()
options(
surveytable.tx_count = ".tx_count_1k"
, surveytable.names_count = c("Number (000)", "SE (000)", "LL (000)", "UL (000)")
, surveytable.names_count = c("n", "Number (000)", "SE (000)", "LL (000)", "UL (000)")
)
message(paste0("* Rounding counts to the nearest 1,000."
, "\n* ?set_count_1k for other options."))
Expand All @@ -31,7 +31,7 @@ set_count_1k = function() {
set_count_int = function() {
options(
surveytable.tx_count = ".tx_count_int"
, surveytable.names_count = c("Number", "SE", "LL", "UL")
, surveytable.names_count = c("n", "Number", "SE", "LL", "UL")
)
message(paste0("* Rounding counts to the nearest integer."
, "\n* ?set_count_int for other options."))
Expand Down
12 changes: 6 additions & 6 deletions R/set_survey.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
#' `opts`:
#' * `"nchs"`:
#' * Round counts to the nearest 1,000 -- see [set_count_1k()].
#' * Identify low-precision estimates (`surveytable.check_present` option).
#' * Identify low-precision estimates (`surveytable.find_lpe` option).
#' * Percentage CI's: adjust Korn-Graubard CI's for the number of degrees of freedom, matching the SUDAAN calculation (`surveytable.adjust_svyciprop` option).
#' * `"general":`
#' * Round counts to the nearest integer -- see [set_count_int()].
#' * Do not look for low-precision estimates (`surveytable.check_present` option).
#' * Do not look for low-precision estimates (`surveytable.find_lpe` option).
#' * Percentage CI's: use standard Korn-Graubard CI's.
#'
#' Optionally, the survey can have an attribute called `label`, which is the
Expand Down Expand Up @@ -41,15 +41,15 @@ set_survey = function(design, opts = "NCHS"
if (opts == "nchs") {
options(
surveytable.tx_count = ".tx_count_1k"
, surveytable.names_count = c("Number (000)", "SE (000)", "LL (000)", "UL (000)")
, surveytable.check_present = TRUE
, surveytable.names_count = c("n", "Number (000)", "SE (000)", "LL (000)", "UL (000)")
, surveytable.find_lpe = TRUE
, surveytable.adjust_svyciprop = TRUE
)
} else if (opts == "general") {
options(
surveytable.tx_count = ".tx_count_int"
, surveytable.names_count = c("Number", "SE", "LL", "UL")
, surveytable.check_present = FALSE
, surveytable.names_count = c("n", "Number", "SE", "LL", "UL")
, surveytable.find_lpe = FALSE
, surveytable.adjust_svyciprop = FALSE
)
} else {
Expand Down
126 changes: 78 additions & 48 deletions R/tab.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,14 @@ tab = function(...
if (nlv < 2) {
assert_that(all(design$variables[,vr] == design$variables[1,vr]))
mp = .total(design)
assert_that(ncol(mp) %in% c(4L, 5L))
fa = attr(mp, "footer")
mp = cbind(
data.frame(Level = design$variables[1,vr])
, mp)
if (!is.null(fa)) {
attr(mp, "footer") = fa
}
attr(mp, "num") = 2:5
attr(mp, "num") = 2:6
attr(mp, "title") = .getvarname(design, vr)
return(.write_out(mp, csv = csv))
} else if (nlv > max_levels) {
Expand All @@ -142,17 +141,21 @@ tab = function(...
##
counts = svyby(frm, frm, design, unwtd.count)$counts
assert_that(length(counts) == nlv)
if (getOption("surveytable.check_present")) {
if (getOption("surveytable.find_lpe")) {
assert_that(is.vector(counts), all(counts >= 1), is.numeric(counts)
, all(counts == trunc(counts)))
pro = getOption("surveytable.present_restricted") %>% do.call(list(counts))
} else {
pro = list(flags = rep("", length(counts)), has.flag = c())
assert_that(is.list(pro)
, setequal(names(pro), c("id", "descriptions", "flags", "has.flag"))
, all(pro$has.flag %in% names(pro$descriptions)))
}

##
sto = svytotal(frm, design) # , deff = "replace")
mmcr = data.frame(x = as.numeric(sto)
, s = sqrt(diag(attr(sto, "var"))) )
mmcr$counts = counts
counts_sum = sum(counts)

# deff = attr(sto, "deff") %>% diag
# I am having trouble interpreting this deff.
Expand All @@ -175,14 +178,18 @@ tab = function(...
mmcr$ll = exp(mmcr$lnx - mmcr$k)
mmcr$ul = exp(mmcr$lnx + mmcr$k)

if (getOption("surveytable.check_present")) {
if (getOption("surveytable.find_lpe")) {
assert_that(is.data.frame(mmcr), nrow(mmcr) >= 1
, all(c("x", "s", "ll", "ul", "samp.size", "counts", "degf") %in% names(mmcr)))
pco = getOption("surveytable.present_count") %>% do.call(list(mmcr))
} else {
pco = list(flags = rep("", nrow(mmcr)), has.flag = c())
assert_that(is.list(pco)
, setequal(names(pco), c("id", "descriptions", "flags", "has.flag"))
, all(pco$has.flag %in% names(pco$descriptions)))
}

mmcr = mmcr[,c("x", "s", "ll", "ul")]
mmc = getOption("surveytable.tx_count") %>% do.call(list(mmcr))
mmc = getOption("surveytable.tx_count") %>% do.call(list(mmcr[,c("x", "s", "ll", "ul")]))
mmc$counts = mmcr$counts
mmc = mmc[,c("counts", "x", "s", "ll", "ul")]
names(mmc) = getOption("surveytable.names_count")

##
Expand Down Expand Up @@ -214,60 +221,83 @@ tab = function(...
}
ret$degf = df1

if (getOption("surveytable.check_present")) {
if (getOption("surveytable.find_lpe")) {
assert_that(is.data.frame(ret), nrow(ret) >= 1
, all(c("Proportion", "SE", "LL", "UL", "n numerator", "n denominator") %in% names(ret)))
ppo = getOption("surveytable.present_prop") %>% do.call(list(ret))
} else {
nlvs = design$variables[, vr] %>% nlevels
ppo = list(flags = rep("", nlvs), has.flag = c())
assert_that(is.list(ppo)
, setequal(names(ppo), c("id", "descriptions", "flags", "has.flag"))
, all(ppo$has.flag %in% names(ppo$descriptions)))
}

mp2 = getOption("surveytable.tx_prct") %>% do.call(list(ret[,c("Proportion", "SE", "LL", "UL")]))
names(mp2) = getOption("surveytable.names_prct")

##
assert_that(nrow(mmc) == nrow(mp2)
, nrow(mmc) == nrow(mmcr)
, nrow(mmc) == length(pro$flags)
, nrow(mmc) == length(pco$flags)
, nrow(mmc) == length(ppo$flags) )

, nrow(mmc) == nrow(mmcr))
mp = cbind(mmc, mp2)
flags = paste(pro$flags, pco$flags, ppo$flags) %>% trimws
if (any(nzchar(flags))) {
mp$Flags = flags
}

##
rownames(mp) = NULL
mp = cbind(data.frame(Level = lvs), mp)

attr(mp, "num") = 2:5
attr(mp, "title") = .getvarname(design, vr)
mp %<>% .add_flags( c(pro$has.flag, pco$has.flag, ppo$has.flag) )
attr(mp, "num") = 2:6
attr(mp, "title") = .getvarname(design, vr)
attr(mp, "footer") = paste0("N = ", counts_sum, ".")

if (getOption("surveytable.find_lpe")) {
assert_that(nrow(mmc) == length(pro$flags)
, nrow(mmc) == length(pco$flags)
, nrow(mmc) == length(ppo$flags))
flags = paste(pro$flags, pco$flags, ppo$flags) %>% trimws
if (any(nzchar(flags))) {
mp$Flags = flags
}
mp %<>% .add_flags( list(pro, pco, ppo) )
}

.write_out(mp, csv = csv)
}

.add_flags = function(df1, has.flag) {
if (!getOption("surveytable.check_present")) {
attr(df1, "footer") = NULL
} else if (is.null(has.flag)) {
attr(df1, "footer") = "(Checked presentation standards. Nothing to report.)"
} else {
v1 = c()
for (ff in has.flag) {
v1 %<>% c(switch(ff
, R = "R: If the data is confidential, suppress *all* estimates, SE's, CI's, etc."
, Cx = "Cx: suppress count (and rate)"
, Cr = "Cr: footnote count - RSE" # .present_count_3030
, Cdf = "Cdf: review count (and rate) - degrees of freedom"
, Px = "Px: suppress percent"
, Pc = "Pc: footnote percent - complement"
, Pdf = "Pdf: review percent - degrees of freedom"
, P0 = "P0: review percent - 0% or 100%"
, paste0(ff, ": unknown flag!")
))
}
attr(df1, "footer") = v1 %>% paste(collapse="; ")
}
.add_flags = function(df1, lfo) {
if (!getOption("surveytable.find_lpe")) {
return(df1)
}

retR = list()
retNR = c()
for (fo in lfo) {
if (!is.null(fo$has.flag)) {
v1 = paste0(fo$descriptions[ fo$has.flag ], collapse = "; ")
if (is.null(retR[[ fo$id ]])) {
retR[[ fo$id ]] = v1
} else {
retR[[ fo$id ]] %<>% paste(v1, sep = "; ")
}
}
retNR %<>% c(fo$id)
}
retNR %<>% unique
retNR = retNR[which( !(retNR %in% names(retR)))]
assert_that(!(is.null(retR) && length(retNR) == 0))

ret = ""
if (!is.null(retR)) {
for (nn in names(retR)) {
v1 = paste0("Checked ", nn, ": ", retR[[nn]], ".")
ret %<>% paste(v1)
}
}
if (length(retNR) > 0) {
v1 = paste0("Checked ", paste(retNR, collapse = ", "), ". Nothing to report.")
ret %<>% paste(v1)
}

if (is.null(v1 <- attr(df1, "footer"))) {
attr(df1, "footer") = ret
} else {
attr(df1, "footer") = paste0(v1, ret)
}
df1
}
9 changes: 5 additions & 4 deletions R/tab_rate.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ tab_rate = function(vr, pop
, class(design$variables[,vr])[1] ))

op_ = options(surveytable.tx_count = ".tx_count_none"
, surveytable.names_count = c("Number", "SE_count"
, surveytable.names_count = c("n", "Number", "SE_count"
, "LL_count", "UL_count"))
on.exit(options(op_))
tfo = .tab_factor(design = design
Expand All @@ -79,19 +79,20 @@ tab_rate = function(vr, pop
}
assert_that(isTRUE(all(m1$Population > 0 | is.na(m1$Population) ))
, msg = paste("Population values for each level of", vr, "must be positive."))
m1[,c("Rate", "SE", "LL", "UL")] = NULL
m1[,c("Rate", "SE", "LL", "UL")] = m1[,c("Number", "SE_count"
, "LL_count", "UL_count")] / m1$Population
cc = if ("Flags" %in% names(m1)) {
c("Level", "Rate", "SE", "LL", "UL", "Flags")
c("Level", "n", "Rate", "SE", "LL", "UL", "Flags")
} else {
c("Level", "Rate", "SE", "LL", "UL")
c("Level", "n", "Rate", "SE", "LL", "UL")
}
m1 = m1[,cc]
cc = c("Rate", "SE", "LL", "UL")
m1[,cc] = getOption("surveytable.tx_rate") %>% do.call(list(m1[,cc]))

attr(m1, "title") = paste(.getvarname(design, vr), "(rate per", per, "population)")
attr(m1, "num") = 2:5
attr(m1, "num") = 2:6
attr(m1, "footer") = attr(tfo, "footer")

.write_out(m1, csv = csv)
Expand Down
9 changes: 5 additions & 4 deletions R/tab_subset_rate.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ tab_subset_rate = function(vr, vrby

pop$Population = pop$Population / per
op_ = options(surveytable.tx_count = ".tx_count_none"
, surveytable.names_count = c("Number", "SE_count", "LL_count", "UL_count"))
, surveytable.names_count = c("n", "Number", "SE_count", "LL_count", "UL_count"))
on.exit(options(op_))

ret = list()
Expand All @@ -91,19 +91,20 @@ tab_subset_rate = function(vr, vrby
}
assert_that(isTRUE(all(m1$Population > 0 | is.na(m1$Population) ))
, msg = paste("Population values for each level of", vr, "must be positive."))
m1[,c("Rate", "SE", "LL", "UL")] = NULL
m1[,c("Rate", "SE", "LL", "UL")] = m1[,c("Number", "SE_count"
, "LL_count", "UL_count")] / m1$Population
cc = if ("Flags" %in% names(m1)) {
c("Level", "Rate", "SE", "LL", "UL", "Flags")
c("Level", "n", "Rate", "SE", "LL", "UL", "Flags")
} else {
c("Level", "Rate", "SE", "LL", "UL")
c("Level", "n", "Rate", "SE", "LL", "UL")
}
m1 = m1[,cc]
cc = c("Rate", "SE", "LL", "UL")
m1[,cc] = getOption("surveytable.tx_rate") %>% do.call(list(m1[,cc]))

attr(m1, "title") = paste(.getvarname(d1, vr), "(rate per", per, "population)")
attr(m1, "num") = 2:5
attr(m1, "num") = 2:6
attr(m1, "footer") = attr(tfo, "footer")

ret[[ii]] = .write_out(m1, csv = csv)
Expand Down
Loading

0 comments on commit 771d3af

Please sign in to comment.