Skip to content

Commit

Permalink
sen2 files copied from cin_census but not working
Browse files Browse the repository at this point in the history
  • Loading branch information
StephenCarterLIIA authored Sep 9, 2024
1 parent 9e51841 commit 5e27649
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 136 deletions.
2 changes: 1 addition & 1 deletion liiatools/datasets/sen2/lds_sen2_clean/file_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def add_la_name(data, la_name):


def la_prefix(data, la_code):
data["Surname"] = data["Surname"] + "_" + la_code
data["Surname"] = data["Surname"] + "_" + la_code # Temporarily using Surname as there is no Child ID in the return
return data


Expand Down
142 changes: 43 additions & 99 deletions liiatools/datasets/sen2/lds_sen2_clean/sen2_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sfdata_stream_parser.collectors import xml_collector


class SEN2Event(events.ParseEvent):
class PersonEvent(events.ParseEvent):
pass


Expand All @@ -26,78 +26,60 @@ def _reduce_dict(dict_instance):

@xml_collector
def text_collector(stream):
"""
Create a dictionary of text values for each element
:param stream: An iterator of events from an XML parser
:return: Dictionary containing element name and text values
"""
print("Running text_collector")
data_dict = {}
current_element = None
for event in stream:
if isinstance(event, events.StartElement):
current_element = event.tag
print(f"StartElement: <{event.tag}>")
if isinstance(event, events.EndElement):
print(f"EndElement: <{event.tag}> context: {event.context}")
if isinstance(event, events.TextNode) and event.text:
print(f"TextNode: {event.text}")
data_dict.setdefault(current_element, []).append(event.text)

return _reduce_dict(data_dict)


@xml_collector
def sen2_collector(stream):
data_dict = {}
stream = peekable(stream)
last_tag = None
while stream:
event = stream.peek()
last_tag = event.get("tag", last_tag)
if event.get("tag") in (
"Requests",
"Assessments",
"NamedPlan",
"ActivePlans",
):
data_dict.setdefault(event.tag, []).append(text_collector(stream))
else:
if isinstance(event, events.TextNode) and event.text:
data_dict.setdefault(last_tag, []).append(event.text)
next(stream)

return _reduce_dict(data_dict)


@xml_collector
def child_collector(stream):
data_dict = {}
stream = peekable(stream)
assert stream.peek().tag == "Persons"
while stream:
event = stream.peek()
if event.get("tag") in ("Person"):
data_dict.setdefault(event.tag, []).append(text_collector(stream))
elif event.get("tag") == "Requests":
data_dict.setdefault(event.tag, []).append(sen2_collector(stream))
else:
next(stream)

return _reduce_dict(data_dict)


@xml_collector
def message_collector(stream):
"""
Collect messages from XML elements and yield events
:param stream: An iterator of events from an XML parser
:yield: Events of type HeaderEvent or PersonEvent
"""
stream = peekable(stream)
assert stream.peek().tag == "Message", "Expected Message, got {}".format(
stream.peek().tag
)
while stream:
event = stream.peek()
print(f'Tag: {event.get("tag")}')
if event.get("tag") == "Header":
print("<Header> tag identified")
header_record = text_collector(stream)
if header_record:
yield HeaderEvent(record=header_record)
print("HeaderEvent yielded")
elif event.get("tag") == "Persons":
sen2_record = sen2_collector(stream)
if sen2_record:
yield SEN2Event(record=sen2_record)
print("<Persons> tag identified")
person_record = text_collector(stream)
if person_record:
yield PersonEvent(record=person_record)
print("PersonEvent yielded")
else:
print("Nothing to yield")
next(stream)


__EXPORT_HEADERS = [
__EXPORT_HEADERS_PERSON = [
"Surname",
"Forename",
"PersonBirthDate",
Expand All @@ -118,62 +100,24 @@ def _maybe_list(value):
return value


def sen2_event(record, property, event_name=None):
if event_name is None:
event_name = property
value = record.get(property)
if value:
new_record = {**record, "Date": value, "Type": event_name}
return ({k: new_record.get(k) for k in __EXPORT_HEADERS},)

return ()


def event_to_records(event: SEN2Event) -> Iterator[dict]:
def event_to_records(event) -> Iterator[dict]:
record = event.record
child = {
**record.get("Persons", {}),
}

for sen2_item in _maybe_list(record.get("SEN2details")):
yield from sen2_event({**child, **sen2_event}, "ReceivedDate")
yield from sen2_event({**child, **sen2_event}, "RequestOutcomeDate")

for requests in _maybe_list(sen2_item.get("Requests")):
requests["Factors"] = ",".join(
_maybe_list(assessment.get("AssessmentFactors"))
)
yield from sen2_event(
{**child, **sen2_item, **assessment}, "AssessmentActualStartDate"
)
yield from sen2_event(
{**child, **sen2_item, **assessment}, "AssessmentAuthorisationDate"
)

for cin in _maybe_list(sen2_item.get("CINPlanDates")):
yield from sen2_event(
{**child, **sen2_item, **cin},
"CINPlanStartDate",
)
yield from sen2_event({**child, **sen2_item, **cin}, "CINPlanEndDate")

for s47 in _maybe_list(sen2_item.get("Section47")):
yield from sen2_event({**child, **sen2_item, **s47}, "S47ActualStartDate")

for cpp in _maybe_list(sen2_item.get("ChildProtectionPlans")):
yield from sen2_event({**child, **sen2_item, **cpp}, "CPPstartDate")
yield from sen2_event({**child, **sen2_item, **cpp}, "CPPendDate")
for cpp_review in _maybe_list(cpp.get("CPPreviewDate")):
cpp_review = {"CPPreviewDate": cpp_review}
yield from sen2_event(
{**child, **sen2_item, **cpp, **cpp_review}, "CPPreviewDate"
)
for item in _maybe_list(record):
yield from (item,)


def export_table(stream):
data = tablib.Dataset(headers=__EXPORT_HEADERS)
data_person = tablib.Dataset(headers=__EXPORT_HEADERS_PERSON)
# data_lalevel = tablib.Dataset(headers=__EXPORT_HEADERS_LALEVELVAC)
for event in stream:
if isinstance(event, SEN2Event):
if isinstance(event, PersonEvent):
for record in event_to_records(event):
data.append([record.get(k, "") for k in __EXPORT_HEADERS])
return data
data_person.append(
[record.get(k, "") for k in __EXPORT_HEADERS_PERSON]
)
# elif isinstance(event, LALevelEvent):
# for record in event_to_records(event):
# data_lalevel.append(
# [record.get(k, "") for k in __EXPORT_HEADERS_LALEVELVAC]
# )
return data_person
52 changes: 20 additions & 32 deletions liiatools/datasets/sen2/sen2_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,38 +156,24 @@ def cleanfile(input, la_code, la_log_dir, output):
# Clean stream
stream = converter.convert_true_false(stream)
tags = [
"LAchildID",
"UPN",
"FormerUPN",
"UPNunknown",
"PersonBirthDate",
"GenderCurrent",
"PersonDeathDate",
"Ethnicity",
"Disability",
"sen2referralDate",
"ReferralSource",
"PrimaryNeedCode",
"sen2closureDate",
"ReasonForClosure",
"DateOfInitialCPC",
"AssessmentActualStartDate",
"AssessmentInternalReviewDate",
"AssessmentAuthorisationDate",
"AssessmentFactors",
"sen2PlanStartDate",
"sen2PlanEndDate",
"S47ActualStartDate",
"InitialCPCtarget",
"DateOfInitialCPC",
"ICPCnotRequired",
"ReferralNFA",
"CPPstartDate",
"CPPendDate",
"InitialCategoryOfAbuse",
"LatestCategoryOfAbuse",
"NumberOfPreviousCPP",
"CPPreviewDate",
"Surname",
"Forename",
"PersonBirthDate",
"Sex",
"Ethnicity",
"Postcode",
"UPN",
"UniqueLearnerNumber",
"UPNunknown",
"ReceivedDate",
"RequestSource",
"RYA",
"RequestOutcomeDate",
"RequestOutcome",
"RequestMediation",
"RequestTribunal",
"Exported",
# etc.etc.
]
stream = validator.remove_invalid(stream, tag_list=tags)

Expand Down Expand Up @@ -359,3 +345,5 @@ def pan_agg(input, la_code, flat_output, analysis_output):
# icpc_day_limit = config["icpc_day_limit"]
# s47_journey = pan_process.s47_paths(s47_outs, s47_day_limit, icpc_day_limit)
# pan_process.export_journeyfile(analysis_output, s47_journey)

# poetry run python liiatools sen2 cleanfile --la_code "BAD" --la_log_dir "/workspaces/liia-tools/liiatools/spec/sen2/samples/" --o "/workspaces/liia-tools/liiatools/spec/sen2/samples/" --i "/workspaces/liia-tools/liiatools/spec/sen2/samples/SEN2-2024-mockup.xml"
8 changes: 4 additions & 4 deletions liiatools/spec/sen2/samples/SEN2-2024-mockup.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
<Forename>TestFirstname1</Forename>
<PersonBirthDate>2002-01-01</PersonBirthDate>
<Sex>M</Sex>
<Ethnicity>BBRI</Ethnicity>
<Ethnicity>WBRI</Ethnicity>
<PostCode>AA11AA</PostCode>
<UPN>X1234567898765</UPN>
<UPN>X123456789875</UPN>
<UniqueLearnerNumber>1234567890</UniqueLearnerNumber>
<UPNunknown></UPNunknown>
<Requests>
Expand Down Expand Up @@ -119,9 +119,9 @@
<Forename>TestFirstname2</Forename>
<PersonBirthDate>2003-11-11</PersonBirthDate>
<Sex>F</Sex>
<Ethnicity>BBRI</Ethnicity>
<Ethnicity>BAFR</Ethnicity>
<PostCode>AB11CD</PostCode>
<UPN>X1234567898321</UPN>
<UPN>X123456789832</UPN>
<UniqueLearnerNumber>1234567089</UniqueLearnerNumber>
<UPNunknown></UPNunknown>
<Requests>
Expand Down

0 comments on commit 5e27649

Please sign in to comment.