diff --git a/gen3_tracker/__init__.py b/gen3_tracker/__init__.py index 4e860e2f..f3470b98 100644 --- a/gen3_tracker/__init__.py +++ b/gen3_tracker/__init__.py @@ -5,14 +5,12 @@ import typing import uuid from collections import OrderedDict -from typing import Union, Optional +from typing import Optional import click -import pydantic from click import Context, Command from pydantic import BaseModel, field_validator - ACED_NAMESPACE = uuid.uuid3(uuid.NAMESPACE_DNS, b'aced-idp.org') ENV_VARIABLE_PREFIX = 'G3T_' @@ -24,28 +22,6 @@ } -def monkey_patch_url_validate(): - # monkey patch to allow file: urls - import fhir.resources.fhirtypes - from pydantic import FileUrl - - original_url_validate = fhir.resources.fhirtypes.Url.validate - - @classmethod - def better_url_validate(cls, value: str, field: "ModelField", config: "BaseConfig") -> Union["AnyUrl", str]: # noqa - """Allow file: urls. see https://github.com/pydantic/pydantic/issues/1983 - bugfix: addresses issue introduced with `fhir.resources`==7.0.1 - """ - if value.startswith("file:"): - _ = FileUrl(value) - return value - # return FileUrl.validate(value, field, config) - value = original_url_validate(value, field, config) - return value - - fhir.resources.fhirtypes.Url.validate = better_url_validate - - class LogConfig(BaseModel): format: str """https://docs.python.org/3/library/logging.html#logging.Formatter""" @@ -177,12 +153,3 @@ def resolve_command( # os._exit(1) # noqa raise e - - -# main -monkey_patch_url_validate() - -# default initializers for path -pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.PosixPath] = str -pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.WindowsPath] = str -pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.Path] = str diff --git a/gen3_tracker/common/__init__.py b/gen3_tracker/common/__init__.py index 5aec35a7..35bbf3db 100644 --- a/gen3_tracker/common/__init__.py +++ b/gen3_tracker/common/__init__.py @@ -321,7 +321,7 @@ def create_resource_id(resource, project_id) -> str: assert resource, "resource required" assert project_id, "project_id required" identifier_string = identifier_to_string(resource.identifier) - return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.resource_type}/{identifier_string}")) + return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.get_resource_type()}/{identifier_string}")) def create_object_id(path: str, project_id: str) -> str: @@ -344,7 +344,7 @@ def assert_valid_id(resource, project_id): """Ensure that the id is correct.""" assert resource, "resource required" assert project_id, "project_id required" - if resource.resource_type == "DocumentReference": + if resource.get_resource_type() == "DocumentReference": document_reference: DocumentReference = resource official_identifier = document_reference.content[0].attachment.url recreate_id = create_object_id(official_identifier, project_id) @@ -354,7 +354,7 @@ def assert_valid_id(resource, project_id): recreate_id = create_resource_id(resource, project_id) if resource.id == recreate_id: return - msg = f"The current {resource.resource_type}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.resource_type}:{official_identifier}" + msg = f"The current {resource.get_resource_type()}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.get_resource_type()}:{official_identifier}" raise Exception(msg) @@ -523,7 +523,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): elif hasattr(self.output.obj, 'model_dump'): _.update(self.output.obj.model_dump()) else: - _.update(self.output.obj.dict()) + _.update(self.output.obj.model_dump()) rc = self.output.exit_code if exc_type is not None: if isinstance(self.output.obj, dict): diff --git a/gen3_tracker/meta/__init__.py b/gen3_tracker/meta/__init__.py index 963d5abf..256dfcc5 100644 --- a/gen3_tracker/meta/__init__.py +++ b/gen3_tracker/meta/__init__.py @@ -47,10 +47,10 @@ def parse_obj(resource: dict, validate=True) -> ParseResult: try: assert 'resourceType' in resource, "Dict missing `resourceType`, is it a FHIR dict?" klass = FHIR_CLASSES.get_fhir_model_class(resource['resourceType']) - _ = klass.parse_obj(resource) + _ = klass.model_validate(resource) if validate: # trigger object traversal, see monkey patch below, at bottom of file - _.dict() + _.model_dump() return ParseResult(resource=_, exception=None, path=None, resource_id=_.id) except (ValidationError, AssertionError) as e: return ParseResult(resource=None, exception=e, path=None, resource_id=resource.get('id', None)) @@ -68,11 +68,11 @@ def _entry_iterator(parse_result: ParseResult) -> Iterator[ParseResult]: if _ is None: break if hasattr(_, 'resource') and _.resource: # BundleEntry - yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.dict()) + yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.model_dump()) elif hasattr(_, 'item'): # ListEntry - yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict()) + yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump()) else: - yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict()) + yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump()) offset += 1 pass @@ -81,7 +81,7 @@ def _has_entries(_: ParseResult): """FHIR types Bundles List have entries""" if _.resource is None: return False - return _.resource.resource_type in ["List"] and _.resource.entry is not None + return _.resource.get_resource_type() in ["List"] and _.resource.entry is not None def directory_reader(directory_path: str, diff --git a/gen3_tracker/meta/cli.py b/gen3_tracker/meta/cli.py index 3dd37bfd..9462c7c3 100644 --- a/gen3_tracker/meta/cli.py +++ b/gen3_tracker/meta/cli.py @@ -103,7 +103,7 @@ def render_graph(config: Config, directory_path: str, output_path: str, browser: @meta.command("dataframe") @click.argument('data_type', required=True, - type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject']), + type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject', "MedicationAdministration"]), default=None) @click.argument("directory_path", type=click.Path(exists=True, file_okay=False), diff --git a/gen3_tracker/meta/dataframer.py b/gen3_tracker/meta/dataframer.py index c7d1cd3f..1cd2129d 100644 --- a/gen3_tracker/meta/dataframer.py +++ b/gen3_tracker/meta/dataframer.py @@ -504,6 +504,28 @@ def flattened_research_subjects(self) -> Generator[dict, None, None]: yield flat_research_subject + + def flattened_medication_administrations(self) -> Generator[dict, None, None]: + + # get all MedicationAdministrations + cursor = self.connect() + cursor.execute( + "SELECT * FROM resources where resource_type = ?", ("MedicationAdministration",) + ) + + # get research subject and associated .subject patient + for _, _, raw_medication_administration in cursor.fetchall(): + medication_administration = json.loads(raw_medication_administration) + flat_medication_administration = SimplifiedResource.build( + resource=medication_administration + ).simplified + + patient = get_subject(self, medication_administration) + flat_medication_administration.update(patient) + + yield flat_medication_administration + + def flattened_document_references(self) -> Generator[dict, None, None]: """generator that yields document references populated with DocumentReference.subject fields and Observation codes through Observation.focus @@ -607,6 +629,8 @@ def create_dataframe( df = pd.DataFrame(db.flattened_document_references()) elif data_type == "ResearchSubject": df = pd.DataFrame(db.flattened_research_subjects()) + elif data_type == "MedicationAdministration": + df = pd.DataFrame(db.flattened_medication_administrations()) elif data_type == "Specimen": df = pd.DataFrame(db.flattened_specimens()) else: diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index c6beda14..9c1ed456 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -219,7 +219,10 @@ def _populate_simplified_extension(extension: dict): resource = self.resource for _ in resource.get("extension", [resource]): - if "extension" not in _.keys(): + # special case data looks like this skip it, no extension to extract + if set(_.keys()) == {"url", "size", "hash", "title"}: + continue + elif "extension" not in _.keys(): if "resourceType" not in _.keys(): _populate_simplified_extension(_) continue @@ -286,13 +289,13 @@ def identifiers(self) -> dict: elif identifiers_len == 1: return {"identifier": identifiers[0].get("value")} else: - base_identifier = {"identifier": identifiers[0].get("value")} - base_identifier.update( - { - identifier.get("system").split("/")[-1]: identifier.get("value") - for identifier in identifiers[1:] - } - ) + # Todo: Raise an execption if there are multiple identifiers with a "-" in them + base_identifier = { + "identifier" if "-" in identifier.get("system", "").split("/")[-1] + else identifier.get("system").split("/")[-1]: identifier.get("value") + for identifier in identifiers + } + return base_identifier @computed_field @@ -405,6 +408,35 @@ def values(self) -> dict: return _values +class SimplifiedMedicationAdministration(SimplifiedFHIR): + @computed_field + @property + def values(self) -> dict: + """Return a dictionary of 'value':value.""" + _values = super().values + # Plucking out fields that didn't get picked up by default class simplifier.' + dose_value = self.resource.get("dosage", {}).get("dose", {}).get("value", None) + if dose_value: + _values["total_dosage"] = dose_value + occurenceTiming = self.resource.get("occurenceTiming", {}).get("repeat", {}).get("boundsRange") + if occurenceTiming: + low = occurenceTiming.get("low", {}).get("value") + _values["index_date_start_days"] = low if low else None + high = occurenceTiming.get("high", {}).get("value") + _values["index_date_end_days"] = high if high else None + for notes in self.resource.get("note", []): + note = notes.get("value", None) + if note: + # Probably best to concat notes together + _values["notes"] = _values["notes"] + "; " + note + for identifier in self.resource.get("identifier", []): + system = identifier.get("system", None) + if system: + if system.split("/")[-1] == "regimen": + _values["regimen_id"] = identifier["value"] + return _values + + class SimplifiedCondition(SimplifiedFHIR): @computed_field @property @@ -440,4 +472,6 @@ def build(resource: dict) -> SimplifiedFHIR: return SimplifiedDocumentReference(resource=resource) if resource_type == "Condition": return SimplifiedCondition(resource=resource) + if resource_type == "MedicationAdministration": + return SimplifiedMedicationAdministration(resource=resource) return SimplifiedFHIR(resource=resource) diff --git a/gen3_tracker/meta/skeleton.py b/gen3_tracker/meta/skeleton.py index 2e5df9ff..e3531885 100644 --- a/gen3_tracker/meta/skeleton.py +++ b/gen3_tracker/meta/skeleton.py @@ -7,8 +7,7 @@ import orjson from fhir.resources.attachment import Attachment from fhir.resources.bundle import Bundle, BundleEntry, BundleEntryRequest -from fhir.resources.documentreference import DocumentReference -from fhir.resources.fhirtypes import DocumentReferenceContentType +from fhir.resources.documentreference import DocumentReference, DocumentReferenceContent from fhir.resources.identifier import Identifier from fhir.resources.observation import Observation from fhir.resources.operationoutcome import OperationOutcome @@ -74,7 +73,7 @@ def get_data_from_meta() -> Generator[int, None, None]: def update_document_reference(document_reference: DocumentReference, dvc_data: DVC): """Update document reference with index record.""" - assert document_reference.resource_type == 'DocumentReference' + assert document_reference.get_resource_type() == 'DocumentReference' assert dvc_data.out.object_id == document_reference.id, f"{dvc_data['did']} != {document_reference.id}" assert dvc_data.out.modified, f"dvc_data missing modified: {dvc_data}" document_reference.docStatus = 'final' @@ -108,7 +107,7 @@ def update_document_reference(document_reference: DocumentReference, dvc_data: D attachment.title = pathlib.Path(dvc_data.out.path).name attachment.creation = dvc_data.out.modified - content = DocumentReferenceContentType(attachment=attachment) + content = DocumentReferenceContent(attachment=attachment) document_reference.content = [content] @@ -289,10 +288,10 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]: for _ in dvc_data(dvc_files): resources = create_skeleton(_, project_id, meta_index()) for resource in resources: - key = f"{resource.resource_type}/{resource.id}" + key = f"{resource.get_resource_type()}/{resource.id}" if key not in emitted_already: - emitter.emit(resource.resource_type).write( - resource.json(option=orjson.OPT_APPEND_NEWLINE) + emitter.emit(resource.get_resource_type()).write( + resource.model_dump_json() + '\n' ) emitted_already.append(key) @@ -317,8 +316,8 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]: bundle.entry.append(bundle_entry) with EmitterContextManager('META') as emitter: - emitter.emit(bundle.resource_type, file_mode='a').write( - bundle.json(option=orjson.OPT_APPEND_NEWLINE) + emitter.emit(bundle.get_resource_type(), file_mode='a').write( + bundle.model_dump_json() + '\n' ) after_meta_files = [_ for _ in pathlib.Path('META').glob('*.ndjson')] diff --git a/gen3_tracker/meta/validator.py b/gen3_tracker/meta/validator.py index 6630c196..941e2d3f 100644 --- a/gen3_tracker/meta/validator.py +++ b/gen3_tracker/meta/validator.py @@ -98,14 +98,14 @@ def validate(directory_path: pathlib.Path, project_id=None) -> ValidateDirectory continue _ = parse_result.resource - ids.append(f"{_.resource_type}/{_.id}") + ids.append(f"{_.get_resource_type()}/{_.id}") nested_references = nested_lookup('reference', parse_result.json_obj) # https://www.hl7.org/fhir/medicationrequest-definitions.html#MedicationRequest.medication # is a reference to a Medication resource https://www.hl7.org/fhir/references.html#CodeableReference # so it has a reference.reference form, strip it out nested_references = [_ for _ in nested_references if isinstance(_, str)] references.extend(nested_references) - resources[parse_result.resource.resource_type] += 1 + resources[parse_result.resource.get_resource_type()] += 1 # assert references exist references = set(references) diff --git a/requirements.txt b/requirements.txt index 7005d471..ef36c0f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ halo tqdm deepdiff -fhir.resources==7.1.0 # FHIR Model +fhir.resources==7.1.0 # FHIR Model Pre-release orjson nested_lookup diff --git a/setup.py b/setup.py index 1daa321f..47573991 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc2', + version='0.0.7rc6', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown', diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 5fdca0c0..6237edab 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -316,6 +316,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( with open(log_file_path, "r") as log_file: lines = log_file.readlines() str_lines = str(lines) + for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: assert ( keyword in str_lines diff --git a/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py b/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py new file mode 100644 index 00000000..895786b5 --- /dev/null +++ b/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py @@ -0,0 +1,52 @@ +import pathlib +from typing import Annotated + +import pydantic +from fhir.resources.attachment import Attachment +from pydantic import UrlConstraints, AnyUrl + + +def test_validate_any_url(): + + class MyModel(pydantic.BaseModel): + url: Annotated[AnyUrl, UrlConstraints(host_required=False)] + + _ = MyModel(url='file:///foo/bar') + assert _, "file:///foo/bar is a valid file url" + assert _.url.host is None, "file:///foo/bar has no host" + + _ = MyModel(url='xxx:///XXXX') + assert _, "file:///foo/bar is a valid file url" + assert _.url.host is None, "file:///foo/bar has no host" + + +def test_fhir_url(): + """Previously a monkey patch was used to enable file urls. Any xs:anyURI is now allowed. See https://w3.org/TR/xmlschema-2/#anyURI + From https://hl7.org/fhir/datatypes.html#url (This regex is very permissive, but URIs must be valid. Implementers are welcome to use more specific regex statements for a URI in specific contexts)""" + attachment: Attachment = Attachment(url='file:///foo/bar') + assert attachment.validate_after_model_construction() + + attachment: Attachment = Attachment.model_validate({'url': 'file:///foo/bar'}) + assert attachment + + attachment: Attachment = Attachment.model_validate({'url': 'xxx:///XXXX'}) + assert attachment + + attachment: Attachment = Attachment.model_validate({'url': 'FOO BAR'}) + assert attachment + + +def test_path_encoders(): + """Previously a monkey patch was used to enable correct serialization of path objects""" + # eg + # # default initializers for path + # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.PosixPath] = str + # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.WindowsPath] = str + # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.Path] = str + + class MyModel(pydantic.BaseModel): + path: pathlib.Path + + _ = MyModel(path=pathlib.Path('/foo/bar')) + assert _, "/foo/bar is a valid path" + _.model_dump()['path'] == '/foo/bar' diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py index 9b923d88..d1bbb868 100644 --- a/tests/unit/test_flatten_fhir_example.py +++ b/tests/unit/test_flatten_fhir_example.py @@ -203,6 +203,7 @@ def flatten_scalars(self: DomainResource) -> dict: def flatten_references(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" + fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")] _ = {} # if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference @@ -327,6 +328,7 @@ def patched_scalars_references_identifiers_observation() -> bool: def test_patient_without_flatten(patient_dict: dict): """This patient object should NOT have a 'flatten' method.""" # without path dependency, just have a plain patient object with no flatten method + patient = Patient.parse_obj(patient_dict) assert not hasattr( patient, "flatten" @@ -335,6 +337,7 @@ def test_patient_without_flatten(patient_dict: dict): def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict): """This patient object should have a 'flatten' method.""" + patient = Patient.parse_obj(patient_dict) assert hasattr( patient, "flatten" @@ -397,6 +400,7 @@ def test_specimen_with_scalars_references_identifiers( patched_scalars_references_identifiers: bool, specimen_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" + specimen = Specimen.parse_obj(specimen_dict) assert hasattr( specimen, "flatten" @@ -416,6 +420,7 @@ def test_eye_color_observation( observation_eye_color_dict: dict, ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" + observation = Observation.parse_obj(observation_eye_color_dict) assert hasattr( observation, "flatten" @@ -429,11 +434,11 @@ def test_eye_color_observation( "subject": "Patient/example", } - def test_bmi_observation( patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" + observation = Observation.parse_obj(observation_bmi_dict) assert hasattr( observation, "flatten" diff --git a/tests/unit/test_validate_float_to_int.py b/tests/unit/test_validate_float_to_int.py new file mode 100644 index 00000000..d10fc83c --- /dev/null +++ b/tests/unit/test_validate_float_to_int.py @@ -0,0 +1,123 @@ +import pytest +from fhir.resources.observation import Observation +from fhir.resources.patient import Patient +from pydantic import ValidationError + + +def test_validate_observation(): + """Test validate observation.""" + false = False + observation_dict = { + "resourceType": "Observation", + "id": "9d11e26b-0307-5573-aee8-d145bdc259f3", + "status": "final", + "category": [ + { + "coding": [ + { + "system": "http://terminology.hl7.org/CodeSystem/observation-category", + "code": "laboratory", + "display": "Laboratory" + } + ] + } + ], + "code": { + "coding": [ + { + "system": "http://loinc.org", + "code": "81247-9", + "display": "Master HL7 genetic variant reporting panel" + } + ] + }, + "subject": { + "reference": "Patient/16244c6a-028a-5d8b-ac80-22e7b870544b" + }, + "specimen": { + "reference": "Specimen/f7f2ceb6-53f3-561a-960d-0c47700c14a2" + }, + "focus": [ + { + "reference": "Specimen/f7f2ceb6-53f3-561a-960d-0c47700c14a2" + } + ], + "effectiveDateTime": "2024-06-03T08:00:00+00:00", + "valueString": "Sequencing parameters", + "component": [ + { + "code": { + "coding": [ + { + "system": "https://cadsr.cancer.gov/sample_laboratory_observation", + "code": "weight", + "display": "weight" + } + ], + "text": "weight" + }, + "valueInteger": 32.9 + }, + { + "code": { + "coding": [ + { + "system": "https://cadsr.cancer.gov/sample_laboratory_observation", + "code": "is_ffpe", + "display": "is_ffpe" + } + ], + "text": "is_ffpe" + }, + "valueBoolean": false + }, + { + "code": { + "coding": [ + { + "system": "https://cadsr.cancer.gov/sample_laboratory_observation", + "code": "sample_type", + "display": "sample_type" + } + ], + "text": "sample_type" + }, + "valueString": "Solid Tissue Normal" + }, + { + "code": { + "coding": [ + { + "system": "https://cadsr.cancer.gov/sample_laboratory_observation", + "code": "updated_datetime", + "display": "updated_datetime" + } + ], + "text": "updated_datetime" + }, + "valueDateTime": "2018-09-06T17:41:51.247648-05:00" + } + ] + } + observation_dict['component'][0]['valueInteger'] = 32.0 + observation = Observation.model_validate(observation_dict) + + assert observation, "Should have accepted valueInteger: 32.0" + + observation_dict['component'][0]['valueInteger'] = 32.9 + + with pytest.raises(ValidationError): + Observation.model_validate(observation_dict) + + +def test_patient(): + with pytest.raises(ValidationError): + patient_dict = {"multipleBirthInteger": 32.9} + patient = Patient.model_validate(patient_dict) + assert patient.multipleBirthInteger == 32.9, "Should not have accepted multipleBirthInteger: 32.9" + + patient_dict = {"multipleBirthInteger": 32.0} + Patient.model_validate(patient_dict) + + patient_dict = {"multipleBirthInteger": 32} + Patient.model_validate(patient_dict)