Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 1 addition & 34 deletions gen3_tracker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
import typing
import uuid
from collections import OrderedDict
from typing import Union, Optional
from typing import Optional

import click
import pydantic
from click import Context, Command
from pydantic import BaseModel, field_validator


ACED_NAMESPACE = uuid.uuid3(uuid.NAMESPACE_DNS, b'aced-idp.org')
ENV_VARIABLE_PREFIX = 'G3T_'

Expand All @@ -24,28 +22,6 @@
}


def monkey_patch_url_validate():
# monkey patch to allow file: urls
import fhir.resources.fhirtypes
from pydantic import FileUrl

original_url_validate = fhir.resources.fhirtypes.Url.validate

@classmethod
def better_url_validate(cls, value: str, field: "ModelField", config: "BaseConfig") -> Union["AnyUrl", str]: # noqa
"""Allow file: urls. see https://github.com/pydantic/pydantic/issues/1983
bugfix: addresses issue introduced with `fhir.resources`==7.0.1
"""
if value.startswith("file:"):
_ = FileUrl(value)
return value
# return FileUrl.validate(value, field, config)
value = original_url_validate(value, field, config)
return value

fhir.resources.fhirtypes.Url.validate = better_url_validate


class LogConfig(BaseModel):
format: str
"""https://docs.python.org/3/library/logging.html#logging.Formatter"""
Expand Down Expand Up @@ -177,12 +153,3 @@ def resolve_command(
# os._exit(1) # noqa

raise e


# main
monkey_patch_url_validate()

# default initializers for path
pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.PosixPath] = str
pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.WindowsPath] = str
pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.Path] = str
8 changes: 4 additions & 4 deletions gen3_tracker/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def create_resource_id(resource, project_id) -> str:
assert resource, "resource required"
assert project_id, "project_id required"
identifier_string = identifier_to_string(resource.identifier)
return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.resource_type}/{identifier_string}"))
return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.get_resource_type()}/{identifier_string}"))


def create_object_id(path: str, project_id: str) -> str:
Expand All @@ -344,7 +344,7 @@ def assert_valid_id(resource, project_id):
"""Ensure that the id is correct."""
assert resource, "resource required"
assert project_id, "project_id required"
if resource.resource_type == "DocumentReference":
if resource.get_resource_type() == "DocumentReference":
document_reference: DocumentReference = resource
official_identifier = document_reference.content[0].attachment.url
recreate_id = create_object_id(official_identifier, project_id)
Expand All @@ -354,7 +354,7 @@ def assert_valid_id(resource, project_id):
recreate_id = create_resource_id(resource, project_id)
if resource.id == recreate_id:
return
msg = f"The current {resource.resource_type}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.resource_type}:{official_identifier}"
msg = f"The current {resource.get_resource_type()}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.get_resource_type()}:{official_identifier}"
raise Exception(msg)


Expand Down Expand Up @@ -523,7 +523,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
elif hasattr(self.output.obj, 'model_dump'):
_.update(self.output.obj.model_dump())
else:
_.update(self.output.obj.dict())
_.update(self.output.obj.model_dump())
rc = self.output.exit_code
if exc_type is not None:
if isinstance(self.output.obj, dict):
Expand Down
12 changes: 6 additions & 6 deletions gen3_tracker/meta/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ def parse_obj(resource: dict, validate=True) -> ParseResult:
try:
assert 'resourceType' in resource, "Dict missing `resourceType`, is it a FHIR dict?"
klass = FHIR_CLASSES.get_fhir_model_class(resource['resourceType'])
_ = klass.parse_obj(resource)
_ = klass.model_validate(resource)
if validate:
# trigger object traversal, see monkey patch below, at bottom of file
_.dict()
_.model_dump()
return ParseResult(resource=_, exception=None, path=None, resource_id=_.id)
except (ValidationError, AssertionError) as e:
return ParseResult(resource=None, exception=e, path=None, resource_id=resource.get('id', None))
Expand All @@ -68,11 +68,11 @@ def _entry_iterator(parse_result: ParseResult) -> Iterator[ParseResult]:
if _ is None:
break
if hasattr(_, 'resource') and _.resource: # BundleEntry
yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.dict())
yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.model_dump())
elif hasattr(_, 'item'): # ListEntry
yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict())
yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump())
else:
yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict())
yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump())
offset += 1
pass

Expand All @@ -81,7 +81,7 @@ def _has_entries(_: ParseResult):
"""FHIR types Bundles List have entries"""
if _.resource is None:
return False
return _.resource.resource_type in ["List"] and _.resource.entry is not None
return _.resource.get_resource_type() in ["List"] and _.resource.entry is not None


def directory_reader(directory_path: str,
Expand Down
2 changes: 1 addition & 1 deletion gen3_tracker/meta/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def render_graph(config: Config, directory_path: str, output_path: str, browser:
@meta.command("dataframe")
@click.argument('data_type',
required=True,
type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject']),
type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject', "MedicationAdministration"]),
default=None)
@click.argument("directory_path",
type=click.Path(exists=True, file_okay=False),
Expand Down
24 changes: 24 additions & 0 deletions gen3_tracker/meta/dataframer.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,28 @@ def flattened_research_subjects(self) -> Generator[dict, None, None]:

yield flat_research_subject


def flattened_medication_administrations(self) -> Generator[dict, None, None]:

# get all MedicationAdministrations
cursor = self.connect()
cursor.execute(
"SELECT * FROM resources where resource_type = ?", ("MedicationAdministration",)
)

# get research subject and associated .subject patient
for _, _, raw_medication_administration in cursor.fetchall():
medication_administration = json.loads(raw_medication_administration)
flat_medication_administration = SimplifiedResource.build(
resource=medication_administration
).simplified

patient = get_subject(self, medication_administration)
flat_medication_administration.update(patient)

yield flat_medication_administration


def flattened_document_references(self) -> Generator[dict, None, None]:
"""generator that yields document references populated
with DocumentReference.subject fields and Observation codes through Observation.focus
Expand Down Expand Up @@ -607,6 +629,8 @@ def create_dataframe(
df = pd.DataFrame(db.flattened_document_references())
elif data_type == "ResearchSubject":
df = pd.DataFrame(db.flattened_research_subjects())
elif data_type == "MedicationAdministration":
df = pd.DataFrame(db.flattened_medication_administrations())
elif data_type == "Specimen":
df = pd.DataFrame(db.flattened_specimens())
else:
Expand Down
50 changes: 42 additions & 8 deletions gen3_tracker/meta/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,10 @@ def _populate_simplified_extension(extension: dict):
resource = self.resource

for _ in resource.get("extension", [resource]):
if "extension" not in _.keys():
# special case data looks like this skip it, no extension to extract
if set(_.keys()) == {"url", "size", "hash", "title"}:
continue
elif "extension" not in _.keys():
if "resourceType" not in _.keys():
_populate_simplified_extension(_)
continue
Expand Down Expand Up @@ -286,13 +289,13 @@ def identifiers(self) -> dict:
elif identifiers_len == 1:
return {"identifier": identifiers[0].get("value")}
else:
base_identifier = {"identifier": identifiers[0].get("value")}
base_identifier.update(
{
identifier.get("system").split("/")[-1]: identifier.get("value")
for identifier in identifiers[1:]
}
)
# Todo: Raise an execption if there are multiple identifiers with a "-" in them
base_identifier = {
"identifier" if "-" in identifier.get("system", "").split("/")[-1]
else identifier.get("system").split("/")[-1]: identifier.get("value")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how do you know that - is the identifier that we should use?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like most things in the data-framer, it's pattern following from looking at the data.

for identifier in identifiers
}

return base_identifier

@computed_field
Expand Down Expand Up @@ -405,6 +408,35 @@ def values(self) -> dict:
return _values


class SimplifiedMedicationAdministration(SimplifiedFHIR):
@computed_field
@property
def values(self) -> dict:
"""Return a dictionary of 'value':value."""
_values = super().values
# Plucking out fields that didn't get picked up by default class simplifier.'
dose_value = self.resource.get("dosage", {}).get("dose", {}).get("value", None)
if dose_value:
_values["total_dosage"] = dose_value
occurenceTiming = self.resource.get("occurenceTiming", {}).get("repeat", {}).get("boundsRange")
if occurenceTiming:
low = occurenceTiming.get("low", {}).get("value")
_values["index_date_start_days"] = low if low else None
high = occurenceTiming.get("high", {}).get("value")
_values["index_date_end_days"] = high if high else None
for notes in self.resource.get("note", []):
note = notes.get("value", None)
if note:
# Probably best to concat notes together
_values["notes"] = _values["notes"] + "; " + note
for identifier in self.resource.get("identifier", []):
system = identifier.get("system", None)
if system:
if system.split("/")[-1] == "regimen":
_values["regimen_id"] = identifier["value"]
return _values


class SimplifiedCondition(SimplifiedFHIR):
@computed_field
@property
Expand Down Expand Up @@ -440,4 +472,6 @@ def build(resource: dict) -> SimplifiedFHIR:
return SimplifiedDocumentReference(resource=resource)
if resource_type == "Condition":
return SimplifiedCondition(resource=resource)
if resource_type == "MedicationAdministration":
return SimplifiedMedicationAdministration(resource=resource)
return SimplifiedFHIR(resource=resource)
17 changes: 8 additions & 9 deletions gen3_tracker/meta/skeleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import orjson
from fhir.resources.attachment import Attachment
from fhir.resources.bundle import Bundle, BundleEntry, BundleEntryRequest
from fhir.resources.documentreference import DocumentReference
from fhir.resources.fhirtypes import DocumentReferenceContentType
from fhir.resources.documentreference import DocumentReference, DocumentReferenceContent
from fhir.resources.identifier import Identifier
from fhir.resources.observation import Observation
from fhir.resources.operationoutcome import OperationOutcome
Expand Down Expand Up @@ -74,7 +73,7 @@ def get_data_from_meta() -> Generator[int, None, None]:

def update_document_reference(document_reference: DocumentReference, dvc_data: DVC):
"""Update document reference with index record."""
assert document_reference.resource_type == 'DocumentReference'
assert document_reference.get_resource_type() == 'DocumentReference'
assert dvc_data.out.object_id == document_reference.id, f"{dvc_data['did']} != {document_reference.id}"
assert dvc_data.out.modified, f"dvc_data missing modified: {dvc_data}"
document_reference.docStatus = 'final'
Expand Down Expand Up @@ -108,7 +107,7 @@ def update_document_reference(document_reference: DocumentReference, dvc_data: D
attachment.title = pathlib.Path(dvc_data.out.path).name
attachment.creation = dvc_data.out.modified

content = DocumentReferenceContentType(attachment=attachment)
content = DocumentReferenceContent(attachment=attachment)

document_reference.content = [content]

Expand Down Expand Up @@ -289,10 +288,10 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]:
for _ in dvc_data(dvc_files):
resources = create_skeleton(_, project_id, meta_index())
for resource in resources:
key = f"{resource.resource_type}/{resource.id}"
key = f"{resource.get_resource_type()}/{resource.id}"
if key not in emitted_already:
emitter.emit(resource.resource_type).write(
resource.json(option=orjson.OPT_APPEND_NEWLINE)
emitter.emit(resource.get_resource_type()).write(
resource.model_dump_json() + '\n'
)
emitted_already.append(key)

Expand All @@ -317,8 +316,8 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]:
bundle.entry.append(bundle_entry)

with EmitterContextManager('META') as emitter:
emitter.emit(bundle.resource_type, file_mode='a').write(
bundle.json(option=orjson.OPT_APPEND_NEWLINE)
emitter.emit(bundle.get_resource_type(), file_mode='a').write(
bundle.model_dump_json() + '\n'
)

after_meta_files = [_ for _ in pathlib.Path('META').glob('*.ndjson')]
Expand Down
4 changes: 2 additions & 2 deletions gen3_tracker/meta/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,14 @@ def validate(directory_path: pathlib.Path, project_id=None) -> ValidateDirectory
continue

_ = parse_result.resource
ids.append(f"{_.resource_type}/{_.id}")
ids.append(f"{_.get_resource_type()}/{_.id}")
nested_references = nested_lookup('reference', parse_result.json_obj)
# https://www.hl7.org/fhir/medicationrequest-definitions.html#MedicationRequest.medication
# is a reference to a Medication resource https://www.hl7.org/fhir/references.html#CodeableReference
# so it has a reference.reference form, strip it out
nested_references = [_ for _ in nested_references if isinstance(_, str)]
references.extend(nested_references)
resources[parse_result.resource.resource_type] += 1
resources[parse_result.resource.get_resource_type()] += 1

# assert references exist
references = set(references)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ halo
tqdm
deepdiff

fhir.resources==7.1.0 # FHIR Model
fhir.resources==7.1.0 # FHIR Model Pre-release
orjson
nested_lookup

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name='gen3_tracker',
version='0.0.7rc2',
version='0.0.7rc6',
description='A CLI for adding version control to Gen3 data submission projects.',
long_description=long_description,
long_description_content_type='text/markdown',
Expand Down
1 change: 1 addition & 0 deletions tests/integration/test_end_to_end_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
with open(log_file_path, "r") as log_file:
lines = log_file.readlines()
str_lines = str(lines)

for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
assert (
keyword in str_lines
Expand Down
Loading
Loading