Skip to content

Commit

Permalink
feat: adds four new default FHIR resources to the mix
Browse files Browse the repository at this point in the history
New & default tasks:
- allergyintollerance
- device
- diagnosticreport
- immunization

I've also modified some of the existing unit testing to be more
specific about which tests to run, in the cases where it doesn't make
sense to update tests when you add a new task.
  • Loading branch information
mikix committed Oct 24, 2023
1 parent 494998f commit 14ae0f7
Show file tree
Hide file tree
Showing 29 changed files with 559 additions and 35 deletions.
112 changes: 111 additions & 1 deletion cumulus_etl/deid/ms-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
{"path": "nodesByType('ContactDetail')", "method": "redact"},
{"path": "nodesByType('ContactPoint')", "method": "redact"},
{"path": "nodesByType('HumanName')", "method": "redact"},
{"path": "nodesByType('Identifier')", "method": "redact"},
{"path": "nodesByType('Narrative')", "method": "redact"},


Expand All @@ -71,6 +70,27 @@

{"comment": "** Top-level resources **", "path": "xxx", "method": "redact"},

{"comment": "** https://www.hl7.org/fhir/R4/allergyintolerance.html **", "path": "xxx", "method": "redact"},
{"path": "AllergyIntolerance.clinicalStatus", "method": "keep"},
{"path": "AllergyIntolerance.verificationStatus", "method": "keep"},
{"path": "AllergyIntolerance.type", "method": "keep"},
{"path": "AllergyIntolerance.category", "method": "keep"},
{"path": "AllergyIntolerance.criticality", "method": "keep"},
{"path": "AllergyIntolerance.code", "method": "keep"},
{"path": "AllergyIntolerance.patient", "method": "keep"},
{"path": "AllergyIntolerance.encounter", "method": "keep"},
{"path": "AllergyIntolerance.onset.ofType(string)", "method": "redact", "comment": "would run philter on it, but it would just remove anything useful we could parse"},
{"path": "AllergyIntolerance.onset", "method": "keep"},
{"path": "AllergyIntolerance.recordedDate", "method": "keep"},
{"path": "AllergyIntolerance.recorder", "method": "keep"},
{"path": "AllergyIntolerance.asserter", "method": "keep"},
{"path": "AllergyIntolerance.lastOccurrence", "method": "keep"},
{"path": "AllergyIntolerance.reaction.substance", "method": "keep"},
{"path": "AllergyIntolerance.reaction.manifestation", "method": "keep"},
{"path": "AllergyIntolerance.reaction.onset", "method": "keep"},
{"path": "AllergyIntolerance.reaction.severity", "method": "keep"},
{"path": "AllergyIntolerance.reaction.exposureRoute", "method": "keep"},

{"comment": "** https://www.hl7.org/fhir/R4/condition.html **", "path": "xxx", "method": "redact"},
{"path": "Condition.clinicalStatus", "method": "keep"},
{"path": "Condition.verificationStatus", "method": "keep"},
Expand All @@ -93,6 +113,58 @@
{"path": "Condition.evidence.code", "method": "keep"},
{"path": "Condition.evidence.detail", "method": "keep"},

{"comment": "** https://www.hl7.org/fhir/R4/device.html **", "path": "xxx", "method": "redact"},
{"path": "Device.definition", "method": "keep"},
{"path": "Device.udiCarrier.deviceIdentifier", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.udiCarrier.issuer", "method": "keep"},
{"path": "Device.udiCarrier.jurisdiction", "method": "keep"},
{"path": "Device.udiCarrier.carrierAIDC", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.udiCarrier.carrierHRF", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.udiCarrier.entryType", "method": "keep"},
{"path": "Device.status", "method": "keep"},
{"path": "Device.statusReason", "method": "keep"},
{"path": "Device.distinctIdentifier", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.manufacturer", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.manufactureDate", "method": "keep"},
{"path": "Device.expirationDate", "method": "keep"},
{"path": "Device.lotNumber", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.serialNumber", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.deviceName.name", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.deviceName.type", "method": "keep"},
{"path": "Device.modelNumber", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.partNumber", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.type", "method": "keep"},
{"path": "Device.specialization.systemType", "method": "keep"},
{"path": "Device.specialization.version", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.version.type", "method": "keep"},
{"path": "Device.version.component", "method": "keep", "comment": "caution: non-PHI identifier"},
{"path": "Device.version.value", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Device.property.type", "method": "keep"},
{"path": "Device.property.valueQuantity", "method": "keep"},
{"path": "Device.property.valueCode", "method": "keep"},
{"path": "Device.patient", "method": "keep"},
{"path": "Device.owner", "method": "keep"},
{"path": "Device.location", "method": "keep"},
{"path": "Device.safety", "method": "keep"},
{"path": "Device.parent", "method": "keep"},

{"comment": "** https://www.hl7.org/fhir/R4/diagnosticreport.html **", "path": "xxx", "method": "redact"},
{"path": "DiagnosticReport.basedOn", "method": "keep"},
{"path": "DiagnosticReport.status", "method": "keep"},
{"path": "DiagnosticReport.category", "method": "keep"},
{"path": "DiagnosticReport.code", "method": "keep"},
{"path": "DiagnosticReport.subject", "method": "keep"},
{"path": "DiagnosticReport.encounter", "method": "keep"},
{"path": "DiagnosticReport.effective", "method": "keep"},
{"path": "DiagnosticReport.issued", "method": "keep"},
{"path": "DiagnosticReport.performer", "method": "keep"},
{"path": "DiagnosticReport.resultsInterpreter", "method": "keep"},
{"path": "DiagnosticReport.specimen", "method": "keep"},
{"path": "DiagnosticReport.result", "method": "keep"},
{"path": "DiagnosticReport.imagingStudy", "method": "keep"},
{"path": "DiagnosticReport.media.link", "method": "keep"},
{"path": "DiagnosticReport.conclusionCode", "method": "keep"},

{"comment": "** https://www.hl7.org/fhir/R4/documentreference.html **", "path": "xxx", "method": "redact"},
{"path": "DocumentReference.status", "method": "keep"},
{"path": "DocumentReference.docStatus", "method": "keep"},
Expand Down Expand Up @@ -156,6 +228,44 @@
{"path": "Encounter.serviceProvider", "method": "keep"},
{"path": "Encounter.partOf", "method": "keep"},

{"comment": "** https://www.hl7.org/fhir/R4/immunization.html **", "path": "xxx", "method": "redact"},
{"path": "Immunization.status", "method": "keep"},
{"path": "Immunization.statusReason", "method": "keep"},
{"path": "Immunization.vaccineCode", "method": "keep"},
{"path": "Immunization.patient", "method": "keep"},
{"path": "Immunization.encounter", "method": "keep"},
{"path": "Immunization.occurrence.ofType(string)", "method": "redact", "comment": "would run philter on it, but it would just remove anything useful we could parse"},
{"path": "Immunization.occurrence", "method": "keep"},
{"path": "Immunization.recorded", "method": "keep"},
{"path": "Immunization.primarySource", "method": "keep"},
{"path": "Immunization.reportOrigin", "method": "keep"},
{"path": "Immunization.location", "method": "keep"},
{"path": "Immunization.manufacturer", "method": "keep"},
{"path": "Immunization.lotNumber", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Immunization.expirationDate", "method": "keep"},
{"path": "Immunization.site", "method": "keep"},
{"path": "Immunization.route", "method": "keep"},
{"path": "Immunization.doseQuantity", "method": "keep"},
{"path": "Immunization.performer.function", "method": "keep"},
{"path": "Immunization.performer.actor", "method": "keep"},
{"path": "Immunization.reasonCode", "method": "keep"},
{"path": "Immunization.reasonReference", "method": "keep"},
{"path": "Immunization.isSubpotent", "method": "keep"},
{"path": "Immunization.subpotentReason", "method": "keep"},
{"path": "Immunization.education.documentType", "method": "keep"},
{"path": "Immunization.education.publicationDate", "method": "keep", "comment": "we aren't keeping uri for fear of any (institutional) PHI slipping in, so maybe these two dates aren't very useful, but better more data than less"},
{"path": "Immunization.education.presentationDate", "method": "keep"},
{"path": "Immunization.programEligibility", "method": "keep"},
{"path": "Immunization.fundingSource", "method": "keep"},
{"path": "Immunization.reaction.date", "method": "keep"},
{"path": "Immunization.reaction.detail", "method": "keep"},
{"path": "Immunization.reaction.reported", "method": "keep"},
{"path": "Immunization.protocolApplied.series", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Immunization.protocolApplied.authority", "method": "keep"},
{"path": "Immunization.protocolApplied.targetDisease", "method": "keep"},
{"path": "Immunization.protocolApplied.doseNumber", "method": "keep", "comment": "caution: non-PHI freeform string"},
{"path": "Immunization.protocolApplied.seriesDoses", "method": "keep", "comment": "caution: non-PHI freeform string"},

{"comment": "** https://www.hl7.org/fhir/R4/medication.html **", "path": "xxx", "method": "redact"},
{"path": "Medication.code", "method": "keep"},
{"path": "Medication.status", "method": "keep"},
Expand Down
24 changes: 24 additions & 0 deletions cumulus_etl/etl/tasks/basic_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,30 @@
from cumulus_etl.etl import tasks


class AllergyIntoleranceTask(tasks.EtlTask):
name = "allergyintolerance"
resource = "AllergyIntolerance"
tags = {"cpu"}


class ConditionTask(tasks.EtlTask):
name = "condition"
resource = "Condition"
tags = {"cpu"}


class DeviceTask(tasks.EtlTask):
name = "device"
resource = "Device"
tags = {"cpu"}


class DiagnosticReportTask(tasks.EtlTask):
name = "diagnosticreport"
resource = "DiagnosticReport"
tags = {"cpu"}


class DocumentReferenceTask(tasks.EtlTask):
name = "documentreference"
resource = "DocumentReference"
Expand All @@ -28,6 +46,12 @@ class EncounterTask(tasks.EtlTask):
tags = {"cpu"}


class ImmunizationTask(tasks.EtlTask):
name = "immunization"
resource = "Immunization"
tags = {"cpu"}


class MedicationRequestTask(tasks.EtlTask):
"""Write MedicationRequest resources and associated Medication resources"""

Expand Down
36 changes: 16 additions & 20 deletions cumulus_etl/etl/tasks/task_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,7 @@

from cumulus_etl import errors
from cumulus_etl.etl.studies import covid_symptom, hftest
from cumulus_etl.etl.tasks.basic_tasks import (
ConditionTask,
DocumentReferenceTask,
EncounterTask,
MedicationRequestTask,
ObservationTask,
PatientTask,
ProcedureTask,
ServiceRequestTask,
)
from cumulus_etl.etl.tasks import basic_tasks

AnyTask = TypeVar("AnyTask", bound="EtlTask")

Expand Down Expand Up @@ -43,17 +34,22 @@ def get_default_tasks() -> list[type[AnyTask]]:
"""
# Note: tasks will be run in the order listed here.
return [
# Run encounter & patient first, to reduce churn on the codebook (the cached mappings would mostly be written
# out during the encounter task and wouldn't need to be re-written later, one would hope)
EncounterTask,
PatientTask,
# Run encounter & patient first, to reduce churn on the codebook (we keep cached ID mappings for those two
# resource and write out those mappings every time a batch has a new encounter/patient - so doing them all
# upfront reduces the number of times we re-write those mappings later)
basic_tasks.EncounterTask,
basic_tasks.PatientTask,
# The rest of the tasks in alphabetical order, why not:
ConditionTask,
DocumentReferenceTask,
MedicationRequestTask,
ObservationTask,
ProcedureTask,
ServiceRequestTask,
basic_tasks.AllergyIntoleranceTask,
basic_tasks.ConditionTask,
basic_tasks.DeviceTask,
basic_tasks.DiagnosticReportTask,
basic_tasks.DocumentReferenceTask,
basic_tasks.ImmunizationTask,
basic_tasks.MedicationRequestTask,
basic_tasks.ObservationTask,
basic_tasks.ProcedureTask,
basic_tasks.ServiceRequestTask,
]


Expand Down
12 changes: 11 additions & 1 deletion docs/setup/cumulus-aws-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,18 +176,28 @@ Resources:
ScheduleExpression: "cron(0 8 1 * ? *)" # 8am on the 1st of the month
Targets:
DeltaTargets:
# Unfortunately, we can't seem to use wildcards to define tables.
# And each DeltaTables entry can only hold ten tables (AWS-side limitation).
# But we can just have multiple groupings of ten!
- DeltaTables:
# Unfortunately, we can't seem to use wildcards to define tables
- !Sub "s3://${S3Bucket}/${EtlSubdir}/allergyintolerance"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/condition"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/device"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/diagnosticreport"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/documentreference"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/encounter"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/immunization"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/medication"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/medicationrequest"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/observation"
CreateNativeDeltaTable: True
WriteManifest: False
- DeltaTables:
- !Sub "s3://${S3Bucket}/${EtlSubdir}/patient"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/procedure"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/servicerequest"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/covid_symptom__nlp_results"
- !Sub "s3://${S3Bucket}/${EtlSubdir}/covid_symptom__nlp_results_term_exists"
CreateNativeDeltaTable: True
WriteManifest: False

Expand Down
7 changes: 2 additions & 5 deletions tests/convert/test_convert_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import ddt

from cumulus_etl import cli, common, errors
from cumulus_etl.etl.tasks import task_factory

from tests import utils


Expand Down Expand Up @@ -73,9 +71,8 @@ async def test_happy_path(self):
await self.run_convert()

# Test first conversion results
expected_tables = {output.get_name(t) for t in task_factory.get_default_tasks() for output in t.outputs}
expected_tables.add("covid_symptom__nlp_results_term_exists") # this was our non-default added table
self.assertEqual(expected_tables | {"JobConfig"}, set(os.listdir(self.target_path)))
expected_tables = set(os.listdir(self.original_path)) - {"ignored"}
self.assertEqual(expected_tables, set(os.listdir(self.target_path)))
self.assertEqual(
{"test": True}, common.read_json(f"{self.target_path}/JobConfig/{job_timestamp}/job_config.json")
)
Expand Down
Empty file.
Empty file.
27 changes: 27 additions & 0 deletions tests/data/mstool/input/AllergyIntolerance.ndjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"resourceType" : "AllergyIntolerance",
"identifier" : [{ "value": "dropped" }],
"clinicalStatus" : { "text": "kept" },
"verificationStatus" : { "text": "kept" },
"type" : "allergy",
"category" : ["food"],
"criticality" : "low",
"code" : { "text": "kept" },
"patient" : { "reference": "Patient/x" },
"encounter" : { "reference": "Encounter/x" },
"onsetDateTime" : "2023",
"recordedDate" : "2023",
"recorder" : { "reference": "Practitioner/x" },
"asserter" : { "reference": "Patient/x" },
"lastOccurrence" : "2023",
"note" : [{ "text": "dropped" }],
"reaction" : [{
"substance" : { "text": "kept" },
"manifestation" : [{ "text": "kept" }],
"description" : "dropped",
"onset" : "2023",
"severity" : "mild",
"exposureRoute" : { "text": "kept" },
"note" : [{ "text": "dropped" }]
}]
}
5 changes: 5 additions & 0 deletions tests/data/mstool/input/AllergyIntolerance.onsetString.ndjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"resourceType" : "AllergyIntolerance",
"patient" : { "reference": "Patient/x" },
"onsetString" : "dropped"
}
50 changes: 50 additions & 0 deletions tests/data/mstool/input/Device.ndjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"resourceType" : "Device",
"identifier" : [{ "value": "dropped" }],
"definition" : { "reference": "DeviceDefinition/x" },
"udiCarrier" : [{
"deviceIdentifier" : "kept",
"issuer" : "urn:kept",
"jurisdiction" : "urn:kept",
"carrierAIDC" : "kept",
"carrierHRF" : "kept",
"entryType" : "barcode"
}],
"status" : "active",
"statusReason" : [{ "text": "online" }],
"distinctIdentifier" : "kept",
"manufacturer" : "kept",
"manufactureDate" : "2023",
"expirationDate" : "2023",
"lotNumber" : "kept",
"serialNumber" : "kept",
"deviceName" : [{
"name" : "kept",
"type" : "other"
}],
"modelNumber" : "kept",
"partNumber" : "kept",
"type" : { "text": "kept" },
"specialization" : [{
"systemType" : { "text": "kept" },
"version" : "kept"
}],
"version" : [{
"type" : { "text": "kept" },
"component" : { "value": "kept" },
"value" : "kept"
}],
"property" : [{
"type" : { "text": "kept" },
"valueQuantity" : [{ "unit": "kept" }],
"valueCode" : [{ "text": "kept" }]
}],
"patient" : { "reference": "Patient/x" },
"owner" : { "reference": "Organization/x" },
"contact" : [{ "value": "dropped" }],
"location" : { "reference": "Location/x" },
"url" : "urn:dropped",
"note" : [{ "text": "dropped" }],
"safety" : [{ "text": "kept" }],
"parent" : { "reference": "Device/x" }
}
24 changes: 24 additions & 0 deletions tests/data/mstool/input/DiagnosticReport.ndjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"resourceType" : "DiagnosticReport",
"identifier" : [{ "value": "dropped" }],
"basedOn" : [{ "reference": "CarePlan/x" }],
"status" : "final",
"category" : [{ "text": "kept" }],
"code" : { "text": "kept" },
"subject" : { "reference": "Patient/x" },
"encounter" : { "reference": "Encounter/x" },
"effectiveDateTime" : "2023",
"issued" : "2023",
"performer" : [{ "reference": "Practitioner/x" }],
"resultsInterpreter" : [{ "reference": "Practitioner/x" }],
"specimen" : [{ "reference": "Specimen/x" }],
"result" : [{ "reference": "Observation/x" }],
"imagingStudy" : [{ "reference": "ImagingStudy/x" }],
"media" : [{
"comment" : "dropped",
"link" : { "reference": "Media/x" }
}],
"conclusion" : "dropped",
"conclusionCode" : [{ "text": "kept" }],
"presentedForm" : [{ "title": "dropped" }]
}
Loading

0 comments on commit 14ae0f7

Please sign in to comment.