Skip to content

Commit

Permalink
Merge pull request #13 from humaniverse/protected-char
Browse files Browse the repository at this point in the history
 Added datasets on disability, sexual orientation, age and gender and…
  • Loading branch information
jennajt authored Jul 16, 2024
2 parents f0f6c6e + 2967c02 commit 93fef5d
Show file tree
Hide file tree
Showing 21 changed files with 526 additions and 1 deletion.
106 changes: 106 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,26 @@ NULL
#' @source \url{https://www.nisra.gov.uk/}
"age_gender_hsct20_ni"

#' Age and gender grouped data for England and Wales from 2021 Census
#'
#' A dataset containing Census 2021 age and gender count for each LA in England and Wales
#'
#' @format A data frame of class "tbl" with 11 rows and 11 variables:
#' \describe{
#' \item{ltla21_code}{Local Authority code}
#' \item{total_population}{Total population}
#' \item{total_female_population}{Total female population}
#' \item{total_male_population}{Total male population}
#' \item{younger_females}{Count of females aged under 20 years}
#' \item{working_age_females}{Count of females aged between 20 and 65}
#' \item{older_females}{Count of females aged over 65}
#' \item{younger_males}{Count of males aged under 20 years}
#' \item{working_age_males}{Count of males aged between 20 and 65}
#' \item{older_males}{Count of males aged over 65}
#' }
#' @source \url{https://www.nomisweb.co.uk/sources/census_2021_bulk}
"age_gender21_ltla21_ew"

#' Age and gender grouped data for Northern Ireland from 2021 Census
#' (Local Government Districts)
#'
Expand All @@ -47,6 +67,42 @@ NULL
#' @source \url{https://www.nisra.gov.uk/}
"age_gender_ltla21_ni"

#' Disability data for Northern Ireland from the 2021 Census (LGD)
#'
#' A data LSOA containing Census 2021 disability numbers and percentages for
#' each LTLA in England and Wales.
#'
#' @format A data frame of class "tbl":
#' \describe{
#' \item{lgd21_code}{LGD code}
#' \item{disability}{Disability status}
#' \item{count}{The count of people with the disability status}
#' \item{prop}{The percentage of people with the disability status}

#' ...
#' }
#' @source \url{https://www.nisra.gov.uk/}
"disability21_lgd21_ni"

#' Disability data for England and Wales from the 2021 Census (Lower Super Output
#' Area)
#'
#' A dataset containing Census 2021 disability numbers and percentages for
#' each LSOA in England and Wales.
#'
#' @format A data frame of class "tbl":
#' \describe{
#' \item{lsoa21_code}{LSOA code}
#' \item{disability}{Disability status}
#' \item{n}{The count of people with the disability status}
#' \item{total_residents}{Total residents in LSOA}
#' \item{prop}{The percentage of people with the disability status}

#' ...
#' }
#' @source \url{https://www.nomisweb.co.uk/sources/census_2021_bulk}
"disability21_lsoa21_ew"

#' Ethnic group data for England and Wales from the 2021 Census (Local Authority)
#'
#' A data sets containing Census 2021 ethnic group numbers and percentages for
Expand All @@ -63,6 +119,7 @@ NULL
#' @source \url{https://ons.gov.uk/}
"ethnicity21_ltla21"


#' Ethnic group data for Northern Ireland from the 2021 Census (Local Authority)
#'
#' A data sets containing Census 2021 ethnic group numbers and percentages for
Expand Down Expand Up @@ -467,3 +524,52 @@ NULL
#' }
#' @source \url{https://ons.gov.uk/}
"religion21_lsoa21"

#' Religion data for NI from the 2021 Census (LGD)
#'
#' A data sets containing Census 2021 religion numbers and percentages for
#' each Local Government District in Northern Ireland.
#'
#' @format A data frame of class "tbl" with 5 variables:
#' \describe{
#' \item{lgd21_code}{LGD code}
#' \item{religion}{Religion}
#' \item{count}{Number of people}
#' \item{prop}{Percentage of people}
#' ...
#' }
#' @source \url{https://www.nisra.gov.uk/}
"religion21_lsoa21"

#' Religion data for NI from the 2021 Census (LGD)
#'
#' A data sets containing Census 2021 sexual orientation numbers and percentages for
#' each Local Government District in Northern Ireland.
#'
#' @format A data frame of class "tbl" with 4 variables:
#' \describe{
#' \item{lgd21_code}{LGD code}
#' \item{sexual_orientation}{Sexual Orientation}
#' \item{count}{Number of people}
#' \item{prop}{Percentage of people}
#' ...
#' }
#' @source \url{https://www.nisra.gov.uk/}
"sexualorientation21_lgd21_ni"

#' Religion data for England and Wales from the 2021 Census (MSOA)
#'
#' A data sets containing Census 2021 sexual orientation numbers and percentages for
#' each MSOA in England and Wales
#'
#' @format A data frame of class "tbl" with 4 variables:
#' \describe{
#' \item{msoa21_code}{MSOA code}
#' \item{sexual_orientation}{Sexual Orientation}
#' \item{count}{Number of people}
#' \item{prop}{Percentage of people}
#' ...
#' }
#' @source \url{https://ons.gov.uk/}
"sexualorientation21_msoa21_ew"

Binary file modified R/sysdata.rda
Binary file not shown.
74 changes: 74 additions & 0 deletions data-raw/age_gender_ltla21_ew.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# ---- Load libs ----
library(tidyverse)
library(geographr)
library(devtools)
library(httr2)

# ---- Load internal sysdata.rda file with URLs ----
load_all(".")

# ---- Download data ----
query_url <-
query_urls |>
filter(id == "age_gender_ltla21_ew") |>
pull(query)

download <- tempfile(fileext = ".zip")

request(query_url) |>
req_perform(download)

unzip(download, exdir = tempdir())

list.files(tempdir())

raw <- read_csv(file.path(tempdir(), "census2021-ts009-ltla.csv")) # No data available at LSOA level

# ---- Clean data ----
age_gender21_ltla21_ew <-
raw |>
mutate(
younger_females =
`Sex: Female; Age: Aged under 1 year; measures: Value` +
`Sex: Female; Age: Aged 4 years and under; measures: Value` +
`Sex: Female; Age: Aged 5 to 9 years; measures: Value` +
`Sex: Female; Age: Aged 10 to 15 years; measures: Value`,
working_age_females =
`Sex: Female; Age: Aged 16 to 19 years; measures: Value` +
`Sex: Female; Age: Aged 20 to 24 years; measures: Value` +
`Sex: Female; Age: Aged 25 to 34 years; measures: Value` +
`Sex: Female; Age: Aged 35 to 49 years; measures: Value` +
`Sex: Female; Age: Aged 50 to 64 years; measures: Value`,
older_females =
`Sex: Female; Age: Aged 65 to 74 years; measures: Value`, +
`Sex: Female; Age: Aged 75 to 84 years; measures: Value` +
`Sex: Female; Age: Aged 85 years and over; measures: Value` +
`Sex: Female; Age: Aged 90 years and over; measures: Value`,
younger_males =
`Sex: Male; Age: Aged under 1 year; measures: Value` +
`Sex: Male; Age: Aged 4 years and under; measures: Value` +
`Sex: Male; Age: Aged 5 to 9 years; measures: Value` +
`Sex: Male; Age: Aged 10 to 15 years; measures: Value`,
working_age_males =
`Sex: Male; Age: Aged 16 to 19 years; measures: Value` +
`Sex: Male; Age: Aged 20 to 24 years; measures: Value` +
`Sex: Male; Age: Aged 25 to 34 years; measures: Value` +
`Sex: Male; Age: Aged 35 to 49 years; measures: Value` +
`Sex: Male; Age: Aged 50 to 64 years; measures: Value`,
older_males =
`Sex: Male; Age: Aged 65 to 74 years; measures: Value`, +
`Sex: Male; Age: Aged 75 to 84 years; measures: Value` +
`Sex: Male; Age: Aged 85 years and over; measures: Value` +
`Sex: Male; Age: Aged 90 years and over; measures: Value`,
) |>
select(
ltla21_code = `geography code`,
total_population = `Sex: All persons; Age: Total; measures: Value`,
total_females = `Sex: Female; Age: Total; measures: Value`,
total_males = `Sex: Male; Age: Total; measures: Value`,
younger_females, working_age_females, older_females, younger_males, working_age_males, older_males
)


# ---- Save output to data/ folder ----
usethis::use_data(age_gender21_ltla21_ew, overwrite = TRUE)
30 changes: 30 additions & 0 deletions data-raw/disability21_lgd21_ni.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# ---- Load libs ----
library(tidyverse)
library(geographr)
library(devtools)
library(rio)

# ---- Load internal sysdata.rda file with URLs ----
load_all(".")

# ---- Download data ----
query_url <-
query_urls |>
filter(id == "disability21_lgd21_ni") |>
pull(query)

raw <- import(query_url, which = "LGD")
colnames(raw) <- raw[8,]

# ---- Clean data ----
disability21_lgd21_ni <- raw |>
slice(9:19) |>
pivot_longer(cols = (4:6), names_to = "disability", values_to = "count") |>
mutate(prop = as.integer(count)/as.integer(`All households`)) |>
select(lgd21_code = `Geography code`,
disability,
count,
prop)

# ---- Save output to data/ folder ----
usethis::use_data(disability21_lgd21_ni, overwrite = TRUE)
41 changes: 41 additions & 0 deletions data-raw/disability21_lsoa21_ew.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# ---- Load libs ----
library(tidyverse)
library(geographr)
library(devtools)
library(httr2)

# ---- Load internal sysdata.rda file with URLs ----
load_all(".")

# ---- Download data ----
query_url <-
query_urls |>
filter(id == "disability21_lsoa21_ew") |>
pull(query)

download <- tempfile(fileext = ".zip")

request(query_url) |>
req_perform(download)

unzip(download, exdir = tempdir())

list.files(tempdir())

raw <- read_csv(file.path(tempdir(), "census2021-ts038-lsoa.csv"))

names(raw) <- str_remove(names(raw), "Disability: ")

# ---- Detailed ethnic categories ----
disability21_lsoa21_ew <-
raw |>
select(
lsoa21_code = `geography code`,
total_residents = `Total: All usual residents`, !contains(":"), -date, -geography
) |>
pivot_longer(cols = -c(lsoa21_code, total_residents), names_to = "disability", values_to = "n") |>
mutate(prop = n / total_residents) |>
relocate(total_residents, .before = prop)

# ---- Save output to data/ folder ----
usethis::use_data(disability21_lsoa21_ew, overwrite = TRUE)
14 changes: 13 additions & 1 deletion data-raw/query_urls.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@ query_urls <-
"ethnicity", "ethnicity21_msoa21", "29.11.22", "OGLv3", "https://www.nomisweb.co.uk/output/census/2021/census2021-ts022.zip", "https://www.nomisweb.co.uk/sources/census_2021_bulk",
"ethnicity", "ethnicity21_ltla21_ni", "22.09.22", "OGLv3", "https://www.nisra.gov.uk/system/files/statistics/census-2021-ms-b01.xlsx", "https://www.nisra.gov.uk/publications/census-2021-main-statistics-ethnicity-tables",

# - Disability -
"disability", "disability21_lsoa21_ew", "2021", "OGLv3", "https://www.nomisweb.co.uk/output/census/2021/census2021-ts038.zip", "https://www.nomisweb.co.uk/sources/census_2021_bulk",
"disability", "disability21_lgd21_ni", "2021", "OGLv3", "https://www.nisra.gov.uk/system/files/statistics/census-2021-ms-d03.xlsx", "https://www.nisra.gov.uk/publications/census-2021-main-statistics-health-disability-and-unpaid-care-tables",

# - Households -
"population", "age_gender_hsct20_ni", "06.07.23", "OGLv3", "https://www.ninis2.nisra.gov.uk/Download/Population/Population%20Estimates%20Broad%20Age%20Bands%20(administrative%20geographies).ods", "https://www.ninis2.nisra.gov.uk/public/AreaProfileReportViewer.aspx?tabchangeReportName=Investing%20for%20Health?",
"population", "age_gender_ltla21_ni", "31.05.23", "OGLv3", "https://www.nisra.gov.uk/system/files/statistics/census-2021-ms-a08.xlsx", "https://www.nisra.gov.uk/publications/census-2021-main-statistics-demography-tables-age-and-sex",
"population", "age_gender_ltla21_ew", "31.05.23", "OGLv3", "https://www.nomisweb.co.uk/output/census/2021/census2021-ts009.zip", "https://www.nomisweb.co.uk/sources/census_2021_bulk",
"population", "households_england_wales", "28.06.22", "OGLv3", "https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/populationandhouseholdestimatesenglandandwalescensus2021/census2021/census2021firstresultsenglandwales1.xlsx", "https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/populationandhouseholdestimatesenglandandwalescensus2021",
"population", "households_northern_ireland", "24.05.22", "OGLv3", "https://www.nisra.gov.uk/system/files/statistics/census-2021-population-and-household-estimates-for-northern-ireland-tables-24-may-2022.xlsx", "https://www.nisra.gov.uk/publications/census-2021-population-and-household-estimates-for-northern-ireland",

Expand Down Expand Up @@ -65,7 +70,14 @@ query_urls <-
"proficiency", "proficiency21_msoa21", "29.11.22", "OGLv3", "https://www.nomisweb.co.uk/output/census/2021/census2021-ts029.zip", "https://www.nomisweb.co.uk/sources/census_2021_bulk",

# Religion
"religion", "religion", "", "OGLv3", "https://www.nomisweb.co.uk/output/census/2021/census2021-ts030.zip", "https://www.nomisweb.co.uk/sources/census_2021_bulk"
"religion", "religion", "", "OGLv3", "https://www.nomisweb.co.uk/output/census/2021/census2021-ts030.zip", "https://www.nomisweb.co.uk/sources/census_2021_bulk",
"religion", "religion21_lgd21_ni", "", "OGLv3", "https://www.nisra.gov.uk/system/files/statistics/census-2021-ms-b19.xlsx", "https://www.nisra.gov.uk/publications/census-2021-main-statistics-religion-tables",

# Sexual orientation
"Sexual orientation", "sexualorientation21_msoa21_ew", "2021", "OGLv3", "https://www.nomisweb.co.uk/output/census/2021/census2021-ts077.zip", "https://www.nomisweb.co.uk/sources/census_2021_bulk",
"Sexual orientation", "sexualorientation21_lgd21_ni", "2021", "OGLv3", "https://www.nisra.gov.uk/system/files/statistics/census-2021-ms-c01.xlsx", "https://www.nisra.gov.uk/publications/census-2021-main-statistics-sexual-orientation-tables"


)

usethis::use_data(query_urls, internal = TRUE, overwrite = TRUE)
31 changes: 31 additions & 0 deletions data-raw/religion21_lgd21_ni.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# ---- Load libs ----
library(tidyverse)
library(geographr)
library(devtools)
library(rio)

# ---- Load internal sysdata.rda file with URLs ----
load_all(".")

# ---- Download data ----
query_url <-
query_urls |>
filter(id == "religion21_lgd21_ni") |>
pull(query)

raw <- import(query_url, which = "LGD")
colnames(raw) <- raw[8,]

# ---- Clean data ----
religion21_lgd21_ni <- raw |>
slice(9:19) |>
pivot_longer(cols = (4:11), names_to = "religion", values_to = "count") |>
mutate(prop = as.integer(count)/as.integer(`All usual residents`)) |>
select(lgd21_code = `Geography code`,
religion,
count,
prop) |>
mutate(religion = str_replace(religion, "Catholic \\r\\n\\[note 2\\]", "Catholic"))

# ---- Save output to data/ folder ----
usethis::use_data(religion21_lgd21_ni, overwrite = TRUE)
30 changes: 30 additions & 0 deletions data-raw/sexualorientation21_lgd21_ni.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# ---- Load libs ----
library(tidyverse)
library(geographr)
library(devtools)
library(rio)

# ---- Load internal sysdata.rda file with URLs ----
load_all(".")

# ---- Download data ----
query_url <-
query_urls |>
filter(id == "sexualorientation21_lgd21_ni") |>
pull(query)

raw <- import(query_url, which = "MS-C01")
colnames(raw) <- raw[7,]

# ---- Clean data ----
sexualorientation21_lgd21_ni <- raw |>
slice(9:19) |>
pivot_longer(cols = (4:9), names_to = "sexual_orientation", values_to = "count") |>
mutate(prop = as.integer(count)/as.integer(`All usual residents aged 16 and over`)) |>
select(lgd21_code = `Geography code`,
sexual_orientation,
count,
prop)

# ---- Save output to data/ folder ----
usethis::use_data(sexualorientation21_lgd21_ni, overwrite = TRUE)
Loading

0 comments on commit 93fef5d

Please sign in to comment.