Skip to content

Commit

Permalink
Generalised calculate_proportion() function
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewgthomas committed Jul 24, 2022
1 parent 22baebe commit cd5c07b
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 25 deletions.
7 changes: 6 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,10 @@ Depends:
R (>= 4.1.0)
Imports:
dplyr,
janitor,
rlang,
sf
sf,
tibble
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
48 changes: 33 additions & 15 deletions R/calculate.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,30 +81,48 @@ calculate_extent <-
#' Calculate proportion of small areas in the higher-level geography that are
#' within the 10% most deprived areas in the nation.
#'
#' @param data Data frame containing a variable to be aggregated, lower level
#' geography population estimates, and a higher level geographical
#' grouping variable
#' @param var Name of the variable in the data frame containing the variable to
#' be aggregated (e.g. decile) for the lower level geography
#' @param data Data frame containing a variable to be aggregated and a higher
#' level geographical grouping variable
#' @param var Name of the variable in the data frame for which you want to
#' calculate proportions. It must have only two possible values
#' @param higher_level_geography Name of the variable in the data frame
#' containing the higher level geography names/codes
#' @param max_quantile Get proportion of small areas categorised as less than
#' or equal to `max_quantile` (default = 1)
#'
#' @examples
#' \dontrun{
#' calculate_proportion(IMD::imd_england_lsoa, IMD_decile, msoa_code, n_people)
#' }
#'
#' @importFrom rlang .data
#' @importFrom rlang :=
#' @export
calculate_proportion <-
function(data,
var,
higher_level_geography,
max_quantile = 1) {
higher_level_geography) {
# Get the unique values of `var`...
var_values <-
data |>
dplyr::distinct({{var}}) |>
dplyr::pull({{var}})

#... make sure `var` only contains two possible values
if (length(var_values) != 2) {
stop("data$var must contain two possible values")
}

#... and use these unique values to make new `proportion` columns
prop_column_1 <- paste0("proportion_", var_values[1])
prop_column_2 <- paste0("proportion_", var_values[2])

# Count the number of each value of `var` in each `higher_level_geography`
data |>
# Label LSOAs by whether they're in top 10% most-deprived then summarise by this label
dplyr::mutate(Top10 = ifelse({{ var }} <= max_quantile, "Top10", "Other")) |>
janitor::tabyl({{higher_level_geography}}, .data$Top10) |>
janitor::tabyl({{higher_level_geography}}, {{var}}) |>

# Calculate proportions
dplyr::mutate(!!prop_column_1 := .data[[var_values[1]]] / (.data[[var_values[1]]] + .data[[var_values[2]]])) |>
dplyr::mutate(!!prop_column_2 := .data[[var_values[2]]] / (.data[[var_values[1]]] + .data[[var_values[2]]])) |>

# Calculate proportion of most deprived LSOAs
dplyr::mutate(Proportion = .data$Top10 / (.data$Top10 + .data$Other)) |>
dplyr::select({{ higher_level_geography }}, .data$Proportion) |>
dplyr::select({{ higher_level_geography }}, .data[[prop_column_1]], .data[[prop_column_2]]) |>
tibble::as_tibble()
}
20 changes: 11 additions & 9 deletions man/calculate_proportion.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(geographr)

test_check("geographr")
14 changes: 14 additions & 0 deletions tests/testthat/test-calculate_proportion.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
test_that("proportion calculation works", {
test_data <-
tibble::tribble(
~code, ~higher_geog, ~left_behind,
1, 1, "yes",
2, 1, "no"
)

test_data_proportions <-
test_data |>
calculate_proportion(left_behind, higher_geog)

expect_equal(test_data_proportions$proportion_yes, 0.5)
})

0 comments on commit cd5c07b

Please sign in to comment.