ACED-IDP · matthewpeterkort · Jan 22, 2025 · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024
diff --git a/gen3_tracker/__init__.py b/gen3_tracker/__init__.py
@@ -5,14 +5,12 @@
 import typing
 import uuid
 from collections import OrderedDict
-from typing import Union, Optional
+from typing import Optional
 
 import click
-import pydantic
 from click import Context, Command
 from pydantic import BaseModel, field_validator
 
-
 ACED_NAMESPACE = uuid.uuid3(uuid.NAMESPACE_DNS, b'aced-idp.org')
 ENV_VARIABLE_PREFIX = 'G3T_'
 
@@ -24,28 +22,6 @@
 }
 
 
-def monkey_patch_url_validate():
-    # monkey patch to allow file: urls
-    import fhir.resources.fhirtypes
-    from pydantic import FileUrl
-
-    original_url_validate = fhir.resources.fhirtypes.Url.validate
-
-    @classmethod
-    def better_url_validate(cls, value: str, field: "ModelField", config: "BaseConfig") -> Union["AnyUrl", str]:    # noqa
-        """Allow file: urls. see https://github.com/pydantic/pydantic/issues/1983
-        bugfix: addresses issue introduced with `fhir.resources`==7.0.1
-        """
-        if value.startswith("file:"):
-            _ = FileUrl(value)
-            return value
-            # return FileUrl.validate(value, field, config)
-        value = original_url_validate(value, field, config)
-        return value
-
-    fhir.resources.fhirtypes.Url.validate = better_url_validate
-
-
 class LogConfig(BaseModel):
     format: str
     """https://docs.python.org/3/library/logging.html#logging.Formatter"""
@@ -177,12 +153,3 @@ def resolve_command(
                 # os._exit(1)  # noqa
 
             raise e
-
-
-# main
-monkey_patch_url_validate()
-
-# default initializers for path
-pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.PosixPath] = str
-pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.WindowsPath] = str
-pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.Path] = str
diff --git a/gen3_tracker/common/__init__.py b/gen3_tracker/common/__init__.py
@@ -321,7 +321,7 @@ def create_resource_id(resource, project_id) -> str:
     assert resource, "resource required"
     assert project_id, "project_id required"
     identifier_string = identifier_to_string(resource.identifier)
-    return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.resource_type}/{identifier_string}"))
+    return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.get_resource_type()}/{identifier_string}"))
 
 
 def create_object_id(path: str, project_id: str) -> str:
@@ -344,7 +344,7 @@ def assert_valid_id(resource, project_id):
     """Ensure that the id is correct."""
     assert resource, "resource required"
     assert project_id, "project_id required"
-    if resource.resource_type == "DocumentReference":
+    if resource.get_resource_type() == "DocumentReference":
         document_reference: DocumentReference = resource
         official_identifier = document_reference.content[0].attachment.url
         recreate_id = create_object_id(official_identifier, project_id)
@@ -354,7 +354,7 @@ def assert_valid_id(resource, project_id):
         recreate_id = create_resource_id(resource, project_id)
     if resource.id == recreate_id:
         return
-    msg = f"The current {resource.resource_type}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.resource_type}:{official_identifier}"
+    msg = f"The current {resource.get_resource_type()}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.get_resource_type()}:{official_identifier}"
     raise Exception(msg)
 
 
@@ -523,7 +523,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
             elif hasattr(self.output.obj, 'model_dump'):
                 _.update(self.output.obj.model_dump())
             else:
-                _.update(self.output.obj.dict())
+                _.update(self.output.obj.model_dump())
         rc = self.output.exit_code
         if exc_type is not None:
             if isinstance(self.output.obj, dict):

diff --git a/gen3_tracker/meta/__init__.py b/gen3_tracker/meta/__init__.py
@@ -47,10 +47,10 @@ def parse_obj(resource: dict, validate=True) -> ParseResult:
     try:
         assert 'resourceType' in resource, "Dict missing `resourceType`, is it a FHIR dict?"
         klass = FHIR_CLASSES.get_fhir_model_class(resource['resourceType'])
-        _ = klass.parse_obj(resource)
+        _ = klass.model_validate(resource)
         if validate:
             # trigger object traversal, see monkey patch below, at bottom of file
-            _.dict()
+            _.model_dump()
         return ParseResult(resource=_, exception=None, path=None, resource_id=_.id)
     except (ValidationError, AssertionError) as e:
         return ParseResult(resource=None, exception=e, path=None, resource_id=resource.get('id', None))
@@ -68,11 +68,11 @@ def _entry_iterator(parse_result: ParseResult) -> Iterator[ParseResult]:
                 if _ is None:
                     break
                 if hasattr(_, 'resource') and _.resource:  # BundleEntry
-                    yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.dict())
+                    yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.model_dump())
                 elif hasattr(_, 'item'):  # ListEntry
-                    yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict())
+                    yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump())
                 else:
-                    yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict())
+                    yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump())
                 offset += 1
     pass
 
@@ -81,7 +81,7 @@ def _has_entries(_: ParseResult):
     """FHIR types Bundles List have entries"""
     if _.resource is None:
         return False
-    return _.resource.resource_type in ["List"] and _.resource.entry is not None
+    return _.resource.get_resource_type() in ["List"] and _.resource.entry is not None
 
 
 def directory_reader(directory_path: str,

diff --git a/gen3_tracker/meta/cli.py b/gen3_tracker/meta/cli.py
@@ -103,7 +103,7 @@ def render_graph(config: Config, directory_path: str, output_path: str, browser:
 @meta.command("dataframe")
 @click.argument('data_type',
                 required=True,
-                type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject']),
+                type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject', "MedicationAdministration"]),
                 default=None)
 @click.argument("directory_path",
                 type=click.Path(exists=True, file_okay=False),

diff --git a/gen3_tracker/meta/dataframer.py b/gen3_tracker/meta/dataframer.py
@@ -504,6 +504,28 @@ def flattened_research_subjects(self) -> Generator[dict, None, None]:
 
             yield flat_research_subject
 
+
+    def flattened_medication_administrations(self) -> Generator[dict, None, None]:
+
+         # get all MedicationAdministrations
+         cursor = self.connect()
+         cursor.execute(
+             "SELECT * FROM resources where resource_type = ?", ("MedicationAdministration",)
+         )
+
+         # get research subject and associated .subject patient
+         for _, _, raw_medication_administration in cursor.fetchall():
+             medication_administration = json.loads(raw_medication_administration)
+             flat_medication_administration = SimplifiedResource.build(
+                 resource=medication_administration
+             ).simplified
+
+             patient = get_subject(self, medication_administration)
+             flat_medication_administration.update(patient)
+
+             yield flat_medication_administration
+
+
     def flattened_document_references(self) -> Generator[dict, None, None]:
         """generator that yields document references populated
         with DocumentReference.subject fields and Observation codes through Observation.focus
@@ -607,6 +629,8 @@ def create_dataframe(
         df = pd.DataFrame(db.flattened_document_references())
     elif data_type == "ResearchSubject":
         df = pd.DataFrame(db.flattened_research_subjects())
+    elif data_type == "MedicationAdministration":
+        df = pd.DataFrame(db.flattened_medication_administrations())
     elif data_type == "Specimen":
         df = pd.DataFrame(db.flattened_specimens())
     else:

diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py
@@ -219,7 +219,10 @@ def _populate_simplified_extension(extension: dict):
             resource = self.resource
 
         for _ in resource.get("extension", [resource]):
-            if "extension" not in _.keys():
+            # special case data looks like this skip it, no extension to extract
+            if set(_.keys()) == {"url", "size", "hash", "title"}:
+               continue
+            elif "extension" not in _.keys():
                 if "resourceType" not in _.keys():
                     _populate_simplified_extension(_)
                 continue
@@ -286,13 +289,13 @@ def identifiers(self) -> dict:
         elif identifiers_len == 1:
             return {"identifier": identifiers[0].get("value")}
         else:
-            base_identifier = {"identifier": identifiers[0].get("value")}
-            base_identifier.update(
-                {
-                    identifier.get("system").split("/")[-1]: identifier.get("value")
-                    for identifier in identifiers[1:]
-                }
-            )
+            # Todo: Raise an execption if there are multiple identifiers with a "-" in them
+            base_identifier = {
+                "identifier" if "-" in identifier.get("system", "").split("/")[-1]
+                else identifier.get("system").split("/")[-1]: identifier.get("value")
+                for identifier in identifiers
+            }
+
             return base_identifier
 
     @computed_field
@@ -405,6 +408,35 @@ def values(self) -> dict:
         return _values
 
 
+class SimplifiedMedicationAdministration(SimplifiedFHIR):
+    @computed_field
+    @property
+    def values(self) -> dict:
+        """Return a dictionary of 'value':value."""
+        _values = super().values
+        # Plucking out fields that didn't get picked up by default class simplifier.'
+        dose_value = self.resource.get("dosage", {}).get("dose", {}).get("value", None)
+        if dose_value:
+            _values["total_dosage"] = dose_value
+        occurenceTiming = self.resource.get("occurenceTiming", {}).get("repeat", {}).get("boundsRange")
+        if occurenceTiming:
+            low = occurenceTiming.get("low", {}).get("value")
+            _values["index_date_start_days"] = low if low else None
+            high = occurenceTiming.get("high", {}).get("value")
+            _values["index_date_end_days"] = high if high else None
+        for notes in self.resource.get("note", []):
+            note = notes.get("value", None)
+            if note:
+                # Probably best to concat notes together
+                _values["notes"] = _values["notes"] + "; " + note
+        for identifier in self.resource.get("identifier", []):
+            system = identifier.get("system", None)
+            if system:
+                if system.split("/")[-1] == "regimen":
+                    _values["regimen_id"] = identifier["value"]
+        return _values
+
+
 class SimplifiedCondition(SimplifiedFHIR):
     @computed_field
     @property
@@ -440,4 +472,6 @@ def build(resource: dict) -> SimplifiedFHIR:
             return SimplifiedDocumentReference(resource=resource)
         if resource_type == "Condition":
             return SimplifiedCondition(resource=resource)
+        if resource_type == "MedicationAdministration":
+            return SimplifiedMedicationAdministration(resource=resource)
         return SimplifiedFHIR(resource=resource)
diff --git a/gen3_tracker/meta/skeleton.py b/gen3_tracker/meta/skeleton.py
@@ -7,8 +7,7 @@
 import orjson
 from fhir.resources.attachment import Attachment
 from fhir.resources.bundle import Bundle, BundleEntry, BundleEntryRequest
-from fhir.resources.documentreference import DocumentReference
-from fhir.resources.fhirtypes import DocumentReferenceContentType
+from fhir.resources.documentreference import DocumentReference, DocumentReferenceContent
 from fhir.resources.identifier import Identifier
 from fhir.resources.observation import Observation
 from fhir.resources.operationoutcome import OperationOutcome
@@ -74,7 +73,7 @@ def get_data_from_meta() -> Generator[int, None, None]:
 
 def update_document_reference(document_reference: DocumentReference, dvc_data: DVC):
     """Update document reference with index record."""
-    assert document_reference.resource_type == 'DocumentReference'
+    assert document_reference.get_resource_type() == 'DocumentReference'
     assert dvc_data.out.object_id == document_reference.id, f"{dvc_data['did']} != {document_reference.id}"
     assert dvc_data.out.modified, f"dvc_data missing modified: {dvc_data}"
     document_reference.docStatus = 'final'
@@ -108,7 +107,7 @@ def update_document_reference(document_reference: DocumentReference, dvc_data: D
     attachment.title = pathlib.Path(dvc_data.out.path).name
     attachment.creation = dvc_data.out.modified
 
-    content = DocumentReferenceContentType(attachment=attachment)
+    content = DocumentReferenceContent(attachment=attachment)
 
     document_reference.content = [content]
 
@@ -289,10 +288,10 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]:
         for _ in dvc_data(dvc_files):
             resources = create_skeleton(_, project_id, meta_index())
             for resource in resources:
-                key = f"{resource.resource_type}/{resource.id}"
+                key = f"{resource.get_resource_type()}/{resource.id}"
                 if key not in emitted_already:
-                    emitter.emit(resource.resource_type).write(
-                        resource.json(option=orjson.OPT_APPEND_NEWLINE)
+                    emitter.emit(resource.get_resource_type()).write(
+                        resource.model_dump_json() + '\n'
                     )
                     emitted_already.append(key)
 
@@ -317,8 +316,8 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]:
             bundle.entry.append(bundle_entry)
 
         with EmitterContextManager('META') as emitter:
-            emitter.emit(bundle.resource_type, file_mode='a').write(
-                bundle.json(option=orjson.OPT_APPEND_NEWLINE)
+            emitter.emit(bundle.get_resource_type(), file_mode='a').write(
+                bundle.model_dump_json() + '\n'
             )
 
     after_meta_files = [_ for _ in pathlib.Path('META').glob('*.ndjson')]

diff --git a/gen3_tracker/meta/validator.py b/gen3_tracker/meta/validator.py
@@ -98,14 +98,14 @@ def validate(directory_path: pathlib.Path, project_id=None) -> ValidateDirectory
             continue
 
         _ = parse_result.resource
-        ids.append(f"{_.resource_type}/{_.id}")
+        ids.append(f"{_.get_resource_type()}/{_.id}")
         nested_references = nested_lookup('reference', parse_result.json_obj)
         # https://www.hl7.org/fhir/medicationrequest-definitions.html#MedicationRequest.medication
         # is a reference to a Medication resource https://www.hl7.org/fhir/references.html#CodeableReference
         # so it has a reference.reference form, strip it out
         nested_references = [_ for _ in nested_references if isinstance(_, str)]
         references.extend(nested_references)
-        resources[parse_result.resource.resource_type] += 1
+        resources[parse_result.resource.get_resource_type()] += 1
 
     # assert references exist
     references = set(references)

diff --git a/requirements.txt b/requirements.txt
@@ -6,7 +6,7 @@ halo
 tqdm
 deepdiff
 
-fhir.resources==7.1.0  # FHIR Model
+fhir.resources==7.1.0  # FHIR Model Pre-release
 orjson
 nested_lookup
 

diff --git a/setup.py b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='gen3_tracker',
-    version='0.0.7rc2',
+    version='0.0.7rc6',
     description='A CLI for adding version control to Gen3 data submission projects.',
     long_description=long_description,
     long_description_content_type='text/markdown',

diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
@@ -316,6 +316,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     with open(log_file_path, "r") as log_file:
         lines = log_file.readlines()
         str_lines = str(lines)
+
         for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
             assert (
                 keyword in str_lines