Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 54 additions & 25 deletions gen3_tracker/meta/dataframer.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,27 +477,38 @@ def select_coding(self, resource):

def flattened_research_subjects(self) -> Generator[dict, None, None]:

# get all observations with a Observation.subject=Patient, mapped from patient ID to observation
# setup
resource_type = "ResearchSubject"
conditions_by_patient_id = get_conditions_by_subject(self, "Patient")
patient_type = "Patient"
cursor = self.connect()

# grab associated conditions + observations via patient ID at once
conditions_by_patient_id = get_conditions_by_subject(self, patient_type)
observations_by_patient_id = get_observations_by_focus(self, patient_type)

# get all ResearchSubjects
cursor = self.connect()
cursor.execute(
"SELECT * FROM resources where resource_type = ?", (resource_type,)
)

# get research subject and associated .subject patient
# add in new fields to existing research subject
for _, _, raw_research_subject in cursor.fetchall():
research_subject = json.loads(raw_research_subject)
flat_research_subject = SimplifiedResource.build(
resource=research_subject
).simplified

# return with .subject (ie Patient) fields
patient = get_subject(self, research_subject)
_, patient = get_subject(self, research_subject)
flat_research_subject.update(patient)

# add patient observation values
flat_research_subject = update_with_observations(
flat_research_subject,
patient["patient_id"],
observations_by_patient_id,
)

# get condition code, eg enrollment diagnosis
if patient["patient_id"] in conditions_by_patient_id:
conditions = conditions_by_patient_id[patient["patient_id"]]
Expand Down Expand Up @@ -528,7 +539,7 @@ def flattened_medication_administrations(self) -> Generator[dict, None, None]:
resource=medication_administration
).simplified

patient = get_subject(self, medication_administration)
_, patient = get_subject(self, medication_administration)
flat_medication_administration.update(patient)

yield flat_medication_administration
Expand Down Expand Up @@ -561,16 +572,17 @@ def flattened_document_reference(
flat_doc_ref = SimplifiedResource.build(resource=doc_ref).simplified

# extract the corresponding .subject and append its fields
flat_doc_ref.update(get_subject(self, doc_ref))

# populate observation data associated with the document reference document
if doc_ref["id"] in observation_by_focus_id:
associated_observations = observation_by_focus_id[doc_ref["id"]]
raw_subject, simplified_subject = get_subject(self, doc_ref)
flat_doc_ref.update(simplified_subject)

# TODO: assumes there are no duplicate column names in each observation
for observation in associated_observations:
flat_observation = SimplifiedResource.build(resource=observation).values
flat_doc_ref.update(flat_observation)
# extract the subject of the .subject and append its fields
# eg: a specimen is associated with a patients
_, simplified_subject_of_subject = get_subject(self, raw_subject)
flat_doc_ref.update(simplified_subject_of_subject)

# populate observation data associated with the document reference document
update_with_observations(flat_doc_ref, doc_ref["id"], observation_by_focus_id)

# TODO: test this based on fhir-gdc
if "basedOn" in doc_ref:
Expand Down Expand Up @@ -606,16 +618,11 @@ def flattened_specimen(self, specimen: dict, observation_by_id: dict) -> dict:
flat_specimen = SimplifiedResource.build(resource=specimen).simplified

# extract its .subject and append its fields (including id)
flat_specimen.update(get_subject(self, specimen))
_, simplified_subject = get_subject(self, specimen)
flat_specimen.update(simplified_subject)

# populate observation codes for each associated observation
if specimen["id"] in observation_by_id:
observations = observation_by_id[specimen["id"]]

# TODO: assumes there are no duplicate column names in each observation
for observation in observations:
flat_observation = SimplifiedResource.build(resource=observation).values
flat_specimen.update(flat_observation)
update_with_observations(flat_specimen, specimen["id"], observation_by_id)

return flat_specimen

Expand Down Expand Up @@ -725,13 +732,21 @@ def is_number(s):
return False


####################
# MACROS / HELPERS #
####################


def get_subject(db: LocalFHIRDatabase, resource: dict) -> dict:
"""get the resource's subject field if it exists"""
"""
get the resource's subject if it exists
Return both the raw subject and its simplified version
"""

# ensure resource has subject field
subject_key = get_nested_value(resource, ["subject", "reference"])
if subject_key is None:
return {}
return {}, {}

# traverse the resource of the subject and return its values
cursor = db.connect()
Expand All @@ -740,7 +755,8 @@ def get_subject(db: LocalFHIRDatabase, resource: dict) -> dict:
assert row, f"{subject_key} not found in database"
_, _, raw_subject = row
subject = json.loads(raw_subject)
return traverse(subject)

return subject, traverse(subject)


def get_resources_by_reference(
Expand Down Expand Up @@ -804,3 +820,16 @@ def get_conditions_by_subject(
) -> dict[str, list]:
"""get all Conditions that have a subject of resource type subject_type"""
return get_resources_by_reference(db, "Condition", "subject", subject_type)


def update_with_observations(resource, id, observations_by_id):
"""update a resource with the observations associated with the provided ID"""
if id in observations_by_id:
associated_observations = observations_by_id[id]

# TODO: assumes there are no duplicate column names in each observation
for observation in associated_observations:
flat_observation = SimplifiedResource.build(resource=observation).values
resource.update(flat_observation)

return resource
1 change: 1 addition & 0 deletions gen3_tracker/meta/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ def values(self) -> dict:
for parent_dict in self.resource["parent"]
]
)

return _values


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name='gen3_tracker',
version='0.0.7rc24',
version='0.0.7rc27',
description='A CLI for adding version control to Gen3 data submission projects.',
long_description=long_description,
long_description_content_type='text/markdown',
Expand Down
1 change: 0 additions & 1 deletion tests/integration/test_end_to_end_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
# check the files exist in the cloned directory
run_command("ls -l")


assert Path(
"my-project-data/hello.txt"
).exists(), "hello.txt does not exist in the cloned directory."
Expand Down
Loading