-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
290adb9
commit f320775
Showing
23 changed files
with
1,004 additions
and
280 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import pandas as pd | ||
import numpy as np | ||
|
||
|
||
def _time_between_date_series( | ||
later_date: pd.Series, | ||
earlier_date: pd.Series, | ||
years: bool = False, | ||
days: bool = False, | ||
) -> pd.Series: | ||
""" | ||
Returns the number of days between two date series. | ||
:param later_date: The later date. | ||
:param earlier_date: The earlier date. | ||
:param years: If True, returns the number of years between the two dates. The default is False. | ||
:param days: If True, returns the number of days between the two dates. The default is True. | ||
:returns: The number of days between the dates. | ||
""" | ||
time = later_date - earlier_date | ||
time = time.dt.days | ||
|
||
if days: | ||
time = time.astype("Int64") | ||
return time | ||
|
||
elif years: | ||
time = (time / 365).apply(np.floor) | ||
time = time.astype("Int64") | ||
return time | ||
|
||
|
||
def _filter_events(data: pd.DataFrame, day_column: str, max_days: int) -> pd.DataFrame: | ||
""" | ||
Filters the data to only include events that occur within the specified maximum days. | ||
:param data: The data to filter. | ||
:param day_column: The column containing the date. | ||
:param max_days: The maximum number of days to include. | ||
:returns: The filtered data. | ||
""" | ||
data = data[((data[day_column] <= max_days) & (data[day_column] >= 0))] | ||
return data |
File renamed without changes.
74 changes: 74 additions & 0 deletions
74
liiatools/cin_census_pipeline/reports/_reports_referrals.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
from liiatools.cin_census_pipeline.spec import load_reports | ||
from liiatools.cin_census_pipeline.reports import _time_between_date_series, _filter_events | ||
|
||
|
||
def referral_outcomes(data: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
Add referral outcomes to the data based on assessment and S47 dates. These can be; | ||
NFA, S17, S47 or BOTH | ||
:param data: The data calculate referral outcomes. | ||
:returns: The data with referral outcomes attached. | ||
""" | ||
reports_config = load_reports() | ||
|
||
s17_dates = data[data["AssessmentActualStartDate"].notna()][ | ||
["LAchildID", "CINreferralDate", "AssessmentActualStartDate"] | ||
].drop_duplicates() | ||
|
||
s17_dates["days_to_s17"] = _time_between_date_series( | ||
s17_dates["CINreferralDate"], s17_dates["AssessmentActualStartDate"], days=True | ||
) | ||
|
||
# Only assessments within config-specified period following referral are valid | ||
s17_dates = _filter_events( | ||
s17_dates, "days_to_s17", max_days=reports_config["ref_assessment"] | ||
) | ||
|
||
s47_dates = data[data["S47ActualStartDate"].notna()][ | ||
["LAchildID", "CINreferralDate", "S47ActualStartDate"] | ||
].drop_duplicates() | ||
|
||
s47_dates["days_to_s47"] = _time_between_date_series( | ||
s47_dates["CINreferralDate"], s47_dates["S47ActualStartDate"], days=True | ||
) | ||
|
||
# Only S47s within config-specified period following referral are valid | ||
s47_dates = _filter_events( | ||
s47_dates, "days_to_s47", max_days=reports_config["ref_assessment"] | ||
) | ||
|
||
merged = data[["LAchildID", "CINreferralDate", "PersonBirthDate"]].drop_duplicates() | ||
merged = merged.merge(s17_dates, how="left", on=["LAchildID", "CINreferralDate"]) | ||
merged = merged.merge(s47_dates, how="left", on=["LAchildID", "CINreferralDate"]) | ||
|
||
neither = ( | ||
merged["AssessmentActualStartDate"].isna() & merged["S47ActualStartDate"].isna() | ||
) | ||
s17_set = ( | ||
merged["AssessmentActualStartDate"].notna() | ||
& merged["S47ActualStartDate"].isna() | ||
) | ||
s47_set = ( | ||
merged["AssessmentActualStartDate"].isna() | ||
& merged["S47ActualStartDate"].notna() | ||
) | ||
both_set = ( | ||
merged["AssessmentActualStartDate"].notna() | ||
& merged["S47ActualStartDate"].notna() | ||
) | ||
|
||
merged["referral_outcome"] = np.select( | ||
[neither, s17_set, s47_set, both_set], | ||
["NFA", "S17", "S47", "BOTH"], | ||
default=None, | ||
) | ||
|
||
merged["Age at referral"] = _time_between_date_series( | ||
merged["CINreferralDate"], merged["PersonBirthDate"], years=True | ||
) | ||
|
||
return merged |
107 changes: 107 additions & 0 deletions
107
liiatools/cin_census_pipeline/reports/_reports_s47_journeys.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import pandas as pd | ||
import numpy as np | ||
from datetime import datetime | ||
|
||
from liiatools.cin_census_pipeline.spec import load_reports | ||
from liiatools.cin_census_pipeline.reports import ( | ||
_time_between_date_series, | ||
) | ||
|
||
|
||
def s47_journeys(data: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
Creates an output that can generate a Sankey diagram of outcomes from S47 events | ||
:param data: The data to calculate S47 event outcomes. | ||
:return: The data with S47 outcomes attached. | ||
""" | ||
reports_config = load_reports() | ||
|
||
s47_dates = data[data["S47ActualStartDate"].notna()][ | ||
["LAchildID", "CINreferralDate", "S47ActualStartDate"] | ||
].drop_duplicates() | ||
|
||
cpp_dates = data[data["CPPstartDate"].notna()][ | ||
["LAchildID", "CINreferralDate", "CPPstartDate"] | ||
].drop_duplicates() | ||
|
||
merged = data[ | ||
[ | ||
"LAchildID", | ||
"CINreferralDate", | ||
"PersonBirthDate", | ||
"DateOfInitialCPC", | ||
"Year", | ||
] | ||
].drop_duplicates() | ||
|
||
merged = merged.merge(s47_dates, how="left", on=["LAchildID", "CINreferralDate"]) | ||
merged = merged.merge(cpp_dates, how="left", on=["LAchildID", "CINreferralDate"]) | ||
|
||
merged["icpc_to_cpp"] = _time_between_date_series( | ||
merged["CPPstartDate"], merged["DateOfInitialCPC"], days=True | ||
) | ||
|
||
merged["s47_to_cpp"] = _time_between_date_series( | ||
merged["CPPstartDate"], merged["S47ActualStartDate"], days=True | ||
) | ||
|
||
# Only keep logically consistent events (as defined in config variables) | ||
merged = merged[ | ||
( | ||
(merged["icpc_to_cpp"] >= 0) | ||
& (merged["icpc_to_cpp"] <= reports_config["icpc_cpp_days"]) | ||
) | ||
| ( | ||
(merged["s47_to_cpp"] >= 0) | ||
& (merged["s47_to_cpp"] <= reports_config["s47_cpp_days"]) | ||
) | ||
] | ||
|
||
# Dates used to define window for S47 events where outcome may not be known because CIN Census is too recent | ||
for y in merged["Year"]: | ||
merged["cin_census_close"] = datetime(int(y), 3, 31) | ||
|
||
merged["s47_max_date"] = merged["cin_census_close"] - pd.Timedelta( | ||
reports_config["s47_day_limit"] | ||
) | ||
merged["icpc_max_date"] = merged["cin_census_close"] - pd.Timedelta( | ||
reports_config["icpc_day_limit"] | ||
) | ||
|
||
merged["Source"] = "S47 strategy discussion" | ||
|
||
icpc = merged["DateOfInitialCPC"].notna() | ||
|
||
cpp_start = merged["DateOfInitialCPC"].isna() & merged["CPPstartDate"].notna() | ||
|
||
# TODO: Check if this (and the default=No ICPC or CPP) ever actually comes up | ||
# (I think they're removed when checking for logical events) | ||
tbd = merged["S47ActualStartDate"] >= merged["s47_max_date"] | ||
|
||
merged["Destination"] = np.select( | ||
[icpc, cpp_start, tbd], | ||
["ICPC", "CPP Start", "TBD - S47 too recent"], | ||
default="No ICPC or CPP", | ||
) | ||
|
||
icpc_destination = merged[merged["Destination"] == "ICPC"] | ||
icpc_destination["Source"] = "ICPC" | ||
|
||
cpp_start_2 = icpc_destination["CPPstartDate"].notna() | ||
|
||
tbd_2 = icpc_destination["DateOfInitialCPC"] >= icpc_destination["icpc_max_date"] | ||
|
||
icpc_destination["Destination"] = np.select( | ||
[cpp_start_2, tbd_2], | ||
["CPP Start", "TBD - ICPC too recent"], | ||
default="No CPP", | ||
) | ||
|
||
s47_journey = pd.concat([merged, icpc_destination]) | ||
|
||
s47_journey["Age at S47"] = _time_between_date_series( | ||
s47_journey["S47ActualStartDate"], s47_journey["PersonBirthDate"], years=True | ||
) | ||
|
||
return s47_journey |
6 changes: 6 additions & 0 deletions
6
liiatools/cin_census_pipeline/reports.py → ...ls/cin_census_pipeline/reports/reports.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,13 @@ | ||
from ._reports_assessment_factors import expanded_assessment_factors | ||
from ._reports_referrals import referral_outcomes | ||
from ._reports_s47_journeys import s47_journeys | ||
from liiatools.cin_census_pipeline.reports import _time_between_date_series, _filter_events | ||
|
||
__ALL__ = [ | ||
"expanded_assessment_factors", | ||
"referral_outcomes", | ||
"s47_journeys", | ||
] | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.