Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- Student loan plan uprating in economic assumptions: re-labels Plan 1/2/5 each year based on cohort start year, writes off loans after 29 years, and samples new Plan 5 entrants for England using empirical age/income take-up probabilities.
162 changes: 162 additions & 0 deletions policyengine_uk/data/economic_assumptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,31 @@
import numpy as np
import logging

# Base year for the FRS dataset - used to calculate age offsets
_FRS_BASE_YEAR = 2023 # FRS 2023-24 represents calendar year 2024

# Approximate take-up rate for assigning loans to tertiary-educated NONE people.
# This represents P(has loan AND earning above threshold | tertiary educated).
# Derived from SLC forecasts (~4M Plan 2 above threshold) vs UK graduate
# population (~8-10M in relevant age bands), giving roughly 40-50%.
# We use a conservative 0.4 as many graduates have paid off loans or earn
# below threshold.
_GRADUATE_LOAN_TAKE_UP = 0.4

_ENGLAND_REGIONS = {
"NORTH_EAST",
"NORTH_WEST",
"YORKSHIRE",
"EAST_MIDLANDS",
"WEST_MIDLANDS",
"EAST_OF_ENGLAND",
"LONDON",
"SOUTH_EAST",
"SOUTH_WEST",
}

_PLAN1_WRITEOFF_YEARS = 29


def extend_single_year_dataset(
dataset: UKSingleYearDataset,
Expand Down Expand Up @@ -90,6 +115,10 @@ def apply_single_year_uprating(

current_year = uprate_rent(current_year, previous_year, parameters)

current_year = uprate_student_loan_plans(
current_year, previous_year, parameters
)

current_year.validate()

return current_year
Expand Down Expand Up @@ -196,6 +225,139 @@ def uprate_rent(
return current_year


def uprate_student_loan_plans(
current_year: UKSingleYearDataset,
previous_year: UKSingleYearDataset,
parameters: ParameterNode,
) -> UKSingleYearDataset:
"""Assign student loan plans based on cohort and add new entrants.

This function is idempotent: for any given year, it produces the same
cross-sectional snapshot regardless of whether previous years were
processed. It operates on the base year data, not accumulated state.

The FRS base year (2023-24) captures loan holders up to certain ages.
As we project forward, we need to:
1. Re-label existing holders to correct plan based on uni start year
2. Add Plan 1/2 holders in age bands beyond the base year's coverage
3. Add Plan 5 holders (new plan starting 2023)

For (2) and (3), we use highest_education == TERTIARY as the signal
for who is a graduate, then apply a flat take-up probability.
"""
year = int(current_year.time_period)

person = current_year.person.copy()
household = current_year.household[["household_id", "region"]].copy()

# Join region onto person via person_household_id.
person = person.merge(
household.rename(columns={"household_id": "person_household_id"}),
on="person_household_id",
how="left",
)

age = person["age"].values.astype(int)
base_plan = person["student_loan_plan"].values.copy().astype(str)
region = person["region"].values.astype(str)
highest_ed = person["highest_education"].values.astype(str)

# Age in the base year (used to identify "new" cohorts)
base_year_age = age - (year - _FRS_BASE_YEAR)

uni_start_year = year - age + 18
is_england = np.isin(region, list(_ENGLAND_REGIONS))
is_tertiary = highest_ed == "TERTIARY"

# Initialize output arrays
new_plan = base_plan.copy()
repayments = person["student_loan_repayments"].values.copy()

# Deterministic RNG seeded by year for reproducibility
rng = np.random.default_rng(seed=year)

# Helper to assign plans to eligible people
def assign_with_probability(mask, plan_value, prob=_GRADUATE_LOAN_TAKE_UP):
"""Assign plan_value to a random subset of masked people."""
if not mask.any():
return
indices = np.where(mask)[0]
draws = rng.random(len(indices))
sampled = draws < prob
new_plan[indices[sampled]] = plan_value
repayments[indices[sampled]] = 0.0

# === Step 1: Re-label existing loan holders ===
has_loan = base_plan != "NONE"
written_off = has_loan & (uni_start_year + _PLAN1_WRITEOFF_YEARS <= year)
is_plan1_cohort = has_loan & ~written_off & (uni_start_year < 2012)
is_plan2_cohort = (
has_loan
& ~written_off
& (uni_start_year >= 2012)
& (uni_start_year < 2023)
)
is_plan5_cohort = has_loan & ~written_off & (uni_start_year >= 2023)

new_plan[written_off] = "NONE"
repayments[written_off] = 0.0
new_plan[is_plan1_cohort] = "PLAN_1"
new_plan[is_plan2_cohort] = "PLAN_2"
new_plan[is_plan5_cohort & is_england] = "PLAN_5"
new_plan[is_plan5_cohort & ~is_england] = "PLAN_2"

# === Step 2: Add Plan 1 holders in extended age bands ===
# In base year, Plan 1 holders exist up to ~age 40 (started pre-2012).
# By 2029, Plan 1 should include people up to age 46.
# Target: NONE people who are tertiary-educated, in the "new" age band,
# whose uni_start_year < 2012 and loan not written off.
max_plan1_age_base = 40 # Approximate max age of Plan 1 in base year
plan1_new_cohort = (
(new_plan == "NONE")
& is_tertiary
& (base_year_age > max_plan1_age_base)
& (uni_start_year < 2012)
& (uni_start_year + _PLAN1_WRITEOFF_YEARS > year)
)
assign_with_probability(plan1_new_cohort, "PLAN_1")

# === Step 3: Add Plan 2 holders in extended age bands ===
# In base year (2024), Plan 2 holders exist up to age 29 (started 2012).
# By 2029, Plan 2 should include people up to age 35.
# Target: NONE people who are tertiary-educated, in the "new" age band,
# whose uni_start_year is 2012-2022.
max_plan2_age_base = 29 # Max age of Plan 2 in base year
plan2_new_cohort = (
(new_plan == "NONE")
& is_tertiary
& (base_year_age > max_plan2_age_base)
& (uni_start_year >= 2012)
& (uni_start_year < 2023)
)
assign_with_probability(plan2_new_cohort, "PLAN_2")

# === Step 4: Add Plan 5 holders (new plan from 2023) ===
# Plan 5 didn't exist in base year. Eligible: tertiary-educated NONE
# people in England who would have started uni 2023+.
# Age constraint: must be 21+ (finished 3-year degree) to be repaying.
plan5_eligible = (
(new_plan == "NONE")
& is_tertiary
& is_england
& (uni_start_year >= 2023)
& (age >= 21)
)
assign_with_probability(plan5_eligible, "PLAN_5")

# Write back to the person table (without the merged region column).
person_out = current_year.person.copy()
person_out["student_loan_plan"] = new_plan
person_out["student_loan_repayments"] = repayments
current_year.person = person_out

return current_year


def reset_uprating(
dataset: UKMultiYearDataset,
):
Expand Down