Number of observations"

CDCgov · Mar 11, 2024 · 771d3af · 771d3af
1 parent eef6adb
commit 771d3af
Show file tree

Hide file tree

Showing 42 changed files with 2,230 additions and 839 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -27,7 +27,6 @@ LazyData: true
 LazyDataCompression: bzip2
 Imports: 
     assertthat,
-    forcats,
     huxtable,
     magrittr,
     survey

diff --git a/NEWS.md b/NEWS.md
@@ -5,6 +5,7 @@
   * Improved output.
   * Allows an unweighted survey as a `data.frame`.
   * Can set certain options using an argument.
+* Tabulation functions show the number of observations.
 
 # surveytable 0.9.2
 

diff --git a/R/codebook.R b/R/codebook.R
@@ -84,7 +84,7 @@ codebook = function(all = FALSE
   lret[[2]] = .write_out(ret, csv = csv)
 
   if (all) {
-    op_ = options(surveytable.check_present = FALSE)
+    op_ = options(surveytable.find_lpe = FALSE)
     on.exit(options(op_))
     for (ii in 1:ncol(design$variables)) {
       n1 = nn[ii]

diff --git a/R/print.surveytable_table.R b/R/print.surveytable_table.R
@@ -19,7 +19,8 @@ print.surveytable_table = function(x, ...) {
   hh = df1 %>% hux %>% set_all_borders
 
   if (!is.null(txt <- attr(df1, "title"))) {
-    caption(hh) = paste(strwrap(txt), collapse = "\n")
+    # caption(hh) = paste(strwrap(txt), collapse = "\n")
+    caption(hh) = txt
   }
 
   if (!is.null(nc <- attr(df1, "num"))) {

diff --git a/R/set_count.R b/R/set_count.R
@@ -20,7 +20,7 @@ set_count_1k = function() {
   # If making changes, update .onLoad() and set_survey()
   options(
     surveytable.tx_count = ".tx_count_1k"
-    , surveytable.names_count = c("Number (000)", "SE (000)", "LL (000)", "UL (000)")
+    , surveytable.names_count = c("n", "Number (000)", "SE (000)", "LL (000)", "UL (000)")
   )
   message(paste0("* Rounding counts to the nearest 1,000."
              , "\n* ?set_count_1k for other options."))
@@ -31,7 +31,7 @@ set_count_1k = function() {
 set_count_int = function() {
   options(
     surveytable.tx_count = ".tx_count_int"
-    , surveytable.names_count = c("Number", "SE", "LL", "UL")
+    , surveytable.names_count = c("n", "Number", "SE", "LL", "UL")
   )
   message(paste0("* Rounding counts to the nearest integer."
                  , "\n* ?set_count_int for other options."))

diff --git a/R/set_survey.R b/R/set_survey.R
@@ -6,11 +6,11 @@
 #' `opts`:
 #' * `"nchs"`:
 #'    * Round counts to the nearest 1,000 -- see [set_count_1k()].
-#'    * Identify low-precision estimates (`surveytable.check_present` option).
+#'    * Identify low-precision estimates (`surveytable.find_lpe` option).
 #'    * Percentage CI's: adjust Korn-Graubard CI's for the number of degrees of freedom, matching the SUDAAN calculation (`surveytable.adjust_svyciprop` option).
 #' * `"general":`
 #'    * Round counts to the nearest integer -- see [set_count_int()].
-#'    * Do not look for low-precision estimates (`surveytable.check_present` option).
+#'    * Do not look for low-precision estimates (`surveytable.find_lpe` option).
 #'    * Percentage CI's: use standard Korn-Graubard CI's.
 #'
 #' Optionally, the survey can have an attribute called `label`, which is the
@@ -41,15 +41,15 @@ set_survey = function(design, opts = "NCHS"
   if (opts == "nchs") {
     options(
       surveytable.tx_count = ".tx_count_1k"
-      , surveytable.names_count = c("Number (000)", "SE (000)", "LL (000)", "UL (000)")
-      , surveytable.check_present = TRUE
+      , surveytable.names_count = c("n", "Number (000)", "SE (000)", "LL (000)", "UL (000)")
+      , surveytable.find_lpe = TRUE
       , surveytable.adjust_svyciprop = TRUE
     )
   } else if (opts == "general") {
     options(
       surveytable.tx_count = ".tx_count_int"
-      , surveytable.names_count = c("Number", "SE", "LL", "UL")
-      , surveytable.check_present = FALSE
+      , surveytable.names_count = c("n", "Number", "SE", "LL", "UL")
+      , surveytable.find_lpe = FALSE
       , surveytable.adjust_svyciprop = FALSE
     )
   } else {

diff --git a/R/tab.R b/R/tab.R
@@ -114,15 +114,14 @@ tab = function(...
 	if (nlv < 2) {
     assert_that(all(design$variables[,vr] == design$variables[1,vr]))
 	  mp = .total(design)
-	  assert_that(ncol(mp) %in% c(4L, 5L))
 	  fa = attr(mp, "footer")
 	  mp = cbind(
 	    data.frame(Level = design$variables[1,vr])
       , mp)
 	  if (!is.null(fa)) {
 	    attr(mp, "footer") = fa
 	  }
-	  attr(mp, "num") = 2:5
+	  attr(mp, "num") = 2:6
 	  attr(mp, "title") = .getvarname(design, vr)
     return(.write_out(mp, csv = csv))
 	} else if (nlv > max_levels) {
@@ -142,17 +141,21 @@ tab = function(...
 	##
 	counts = svyby(frm, frm, design, unwtd.count)$counts
 	assert_that(length(counts) == nlv)
-	if (getOption("surveytable.check_present")) {
+	if (getOption("surveytable.find_lpe")) {
+	  assert_that(is.vector(counts), all(counts >= 1), is.numeric(counts)
+	              , all(counts == trunc(counts)))
 	  pro = getOption("surveytable.present_restricted") %>% do.call(list(counts))
-	} else {
-		pro = list(flags = rep("", length(counts)), has.flag = c())
+	  assert_that(is.list(pro)
+	              , setequal(names(pro), c("id", "descriptions", "flags", "has.flag"))
+	              , all(pro$has.flag %in% names(pro$descriptions)))
 	}
 
 	##
 	sto = svytotal(frm, design) # , deff = "replace")
 	mmcr = data.frame(x = as.numeric(sto)
 		, s = sqrt(diag(attr(sto, "var"))) )
 	mmcr$counts = counts
+	counts_sum = sum(counts)
 
 	# deff = attr(sto, "deff") %>% diag
 	# I am having trouble interpreting this deff.
@@ -175,14 +178,18 @@ tab = function(...
   mmcr$ll = exp(mmcr$lnx - mmcr$k)
   mmcr$ul = exp(mmcr$lnx + mmcr$k)
 
-	if (getOption("surveytable.check_present")) {
+	if (getOption("surveytable.find_lpe")) {
+	  assert_that(is.data.frame(mmcr), nrow(mmcr) >= 1
+	              , all(c("x", "s", "ll", "ul", "samp.size", "counts", "degf") %in% names(mmcr)))
 	  pco = getOption("surveytable.present_count") %>% do.call(list(mmcr))
-	} else {
-		pco = list(flags = rep("", nrow(mmcr)), has.flag = c())
+	  assert_that(is.list(pco)
+	              , setequal(names(pco), c("id", "descriptions", "flags", "has.flag"))
+	              , all(pco$has.flag %in% names(pco$descriptions)))
 	}
 
-  mmcr = mmcr[,c("x", "s", "ll", "ul")]
-	mmc = getOption("surveytable.tx_count") %>% do.call(list(mmcr))
+	mmc = getOption("surveytable.tx_count") %>% do.call(list(mmcr[,c("x", "s", "ll", "ul")]))
+	mmc$counts = mmcr$counts
+	mmc = mmc[,c("counts", "x", "s", "ll", "ul")]
 	names(mmc) = getOption("surveytable.names_count")
 
 	##
@@ -214,60 +221,83 @@ tab = function(...
 	}
 	ret$degf = df1
 
-	if (getOption("surveytable.check_present")) {
+	if (getOption("surveytable.find_lpe")) {
+	  assert_that(is.data.frame(ret), nrow(ret) >= 1
+    , all(c("Proportion", "SE", "LL", "UL", "n numerator", "n denominator") %in% names(ret)))
 	  ppo = getOption("surveytable.present_prop") %>% do.call(list(ret))
-	} else {
-		nlvs = design$variables[, vr] %>% nlevels
-		ppo = list(flags = rep("", nlvs), has.flag = c())
+	  assert_that(is.list(ppo)
+	              , setequal(names(ppo), c("id", "descriptions", "flags", "has.flag"))
+	              , all(ppo$has.flag %in% names(ppo$descriptions)))
 	}
 
 	mp2 = getOption("surveytable.tx_prct") %>% do.call(list(ret[,c("Proportion", "SE", "LL", "UL")]))
 	names(mp2) = getOption("surveytable.names_prct")
 
 	##
 	assert_that(nrow(mmc) == nrow(mp2)
-    , nrow(mmc) == nrow(mmcr)
-		, nrow(mmc) == length(pro$flags)
-		, nrow(mmc) == length(pco$flags)
-		, nrow(mmc) == length(ppo$flags) )
-
+    , nrow(mmc) == nrow(mmcr))
 	mp = cbind(mmc, mp2)
-	flags = paste(pro$flags, pco$flags, ppo$flags) %>% trimws
-	if (any(nzchar(flags))) {
-		mp$Flags = flags
-	}
 
 	##
 	rownames(mp) = NULL
 	mp = cbind(data.frame(Level = lvs), mp)
 
-  attr(mp, "num") = 2:5
-  attr(mp, "title") = .getvarname(design, vr)
-	mp %<>% .add_flags( c(pro$has.flag, pco$has.flag, ppo$has.flag) )
+	attr(mp, "num") = 2:6
+	attr(mp, "title") = .getvarname(design, vr)
+	attr(mp, "footer") = paste0("N = ", counts_sum, ".")
+
+	if (getOption("surveytable.find_lpe")) {
+	  assert_that(nrow(mmc) == length(pro$flags)
+            , nrow(mmc) == length(pco$flags)
+            , nrow(mmc) == length(ppo$flags))
+	  flags = paste(pro$flags, pco$flags, ppo$flags) %>% trimws
+	  if (any(nzchar(flags))) {
+	    mp$Flags = flags
+	  }
+	  mp %<>% .add_flags( list(pro, pco, ppo) )
+	}
+
 	.write_out(mp, csv = csv)
 }
 
-.add_flags = function(df1, has.flag) {
-  if (!getOption("surveytable.check_present")) {
-    attr(df1, "footer") = NULL
-  } else if (is.null(has.flag)) {
-	  attr(df1, "footer") = "(Checked presentation standards. Nothing to report.)"
-	} else {
-		v1 = c()
-		for (ff in has.flag) {
-			v1 %<>% c(switch(ff
-				, R = "R: If the data is confidential, suppress *all* estimates, SE's, CI's, etc."
-				, Cx = "Cx: suppress count (and rate)"
-				, Cr = "Cr: footnote count - RSE" # .present_count_3030
-  			, Cdf = "Cdf: review count (and rate) - degrees of freedom"
-				, Px = "Px: suppress percent"
-				, Pc = "Pc: footnote percent - complement"
-				, Pdf = "Pdf: review percent - degrees of freedom"
-				, P0 = "P0: review percent - 0% or 100%"
-				, paste0(ff, ": unknown flag!")
-			))
-		}
-		attr(df1, "footer") = v1 %>% paste(collapse="; ")
-	}
+.add_flags = function(df1, lfo) {
+  if (!getOption("surveytable.find_lpe")) {
+    return(df1)
+  }
+
+  retR = list()
+  retNR = c()
+  for (fo in lfo) {
+    if (!is.null(fo$has.flag)) {
+      v1 = paste0(fo$descriptions[ fo$has.flag ], collapse = "; ")
+      if (is.null(retR[[ fo$id ]])) {
+        retR[[ fo$id ]] = v1
+      } else {
+        retR[[ fo$id ]] %<>% paste(v1, sep = "; ")
+      }
+    }
+    retNR %<>% c(fo$id)
+  }
+  retNR %<>% unique
+  retNR = retNR[which( !(retNR %in% names(retR)))]
+  assert_that(!(is.null(retR) && length(retNR) == 0))
+
+  ret = ""
+  if (!is.null(retR)) {
+    for (nn in names(retR)) {
+      v1 = paste0("Checked ", nn, ": ", retR[[nn]], ".")
+      ret %<>% paste(v1)
+    }
+  }
+  if (length(retNR) > 0) {
+    v1 = paste0("Checked ", paste(retNR, collapse = ", "), ". Nothing to report.")
+    ret %<>% paste(v1)
+  }
+
+  if (is.null(v1 <- attr(df1, "footer"))) {
+    attr(df1, "footer") = ret
+  } else {
+    attr(df1, "footer") = paste0(v1, ret)
+  }
   df1
 }
diff --git a/R/tab_rate.R b/R/tab_rate.R
@@ -55,7 +55,7 @@ tab_rate = function(vr, pop
                              , class(design$variables[,vr])[1] ))
 
   op_ = options(surveytable.tx_count = ".tx_count_none"
-                  , surveytable.names_count = c("Number", "SE_count"
+                  , surveytable.names_count = c("n", "Number", "SE_count"
                     , "LL_count", "UL_count"))
   on.exit(options(op_))
   tfo = .tab_factor(design = design
@@ -79,19 +79,20 @@ tab_rate = function(vr, pop
   }
   assert_that(isTRUE(all(m1$Population > 0 | is.na(m1$Population) ))
               , msg = paste("Population values for each level of", vr, "must be positive."))
+  m1[,c("Rate", "SE", "LL", "UL")] = NULL
   m1[,c("Rate", "SE", "LL", "UL")] = m1[,c("Number", "SE_count"
                                            , "LL_count", "UL_count")] / m1$Population
   cc = if ("Flags" %in% names(m1)) {
-    c("Level", "Rate", "SE", "LL", "UL", "Flags")
+    c("Level", "n", "Rate", "SE", "LL", "UL", "Flags")
   } else {
-    c("Level", "Rate", "SE", "LL", "UL")
+    c("Level", "n", "Rate", "SE", "LL", "UL")
   }
   m1 = m1[,cc]
   cc = c("Rate", "SE", "LL", "UL")
   m1[,cc] = getOption("surveytable.tx_rate") %>% do.call(list(m1[,cc]))
 
   attr(m1, "title") = paste(.getvarname(design, vr), "(rate per", per, "population)")
-  attr(m1, "num") = 2:5
+  attr(m1, "num") = 2:6
   attr(m1, "footer") = attr(tfo, "footer")
 
   .write_out(m1, csv = csv)

diff --git a/R/tab_subset_rate.R b/R/tab_subset_rate.R
@@ -67,7 +67,7 @@ tab_subset_rate = function(vr, vrby
 
   pop$Population = pop$Population / per
   op_ = options(surveytable.tx_count = ".tx_count_none"
-                , surveytable.names_count = c("Number", "SE_count", "LL_count", "UL_count"))
+                , surveytable.names_count = c("n", "Number", "SE_count", "LL_count", "UL_count"))
   on.exit(options(op_))
 
   ret = list()
@@ -91,19 +91,20 @@ tab_subset_rate = function(vr, vrby
     }
     assert_that(isTRUE(all(m1$Population > 0 | is.na(m1$Population) ))
                 , msg = paste("Population values for each level of", vr, "must be positive."))
+    m1[,c("Rate", "SE", "LL", "UL")] = NULL
     m1[,c("Rate", "SE", "LL", "UL")] = m1[,c("Number", "SE_count"
         , "LL_count", "UL_count")] / m1$Population
     cc = if ("Flags" %in% names(m1)) {
-      c("Level", "Rate", "SE", "LL", "UL", "Flags")
+      c("Level", "n", "Rate", "SE", "LL", "UL", "Flags")
     } else {
-      c("Level", "Rate", "SE", "LL", "UL")
+      c("Level", "n", "Rate", "SE", "LL", "UL")
     }
     m1 = m1[,cc]
     cc = c("Rate", "SE", "LL", "UL")
     m1[,cc] = getOption("surveytable.tx_rate") %>% do.call(list(m1[,cc]))
 
     attr(m1, "title") = paste(.getvarname(d1, vr), "(rate per", per, "population)")
-    attr(m1, "num") = 2:5
+    attr(m1, "num") = 2:6
     attr(m1, "footer") = attr(tfo, "footer")
 
     ret[[ii]] = .write_out(m1, csv = csv)