From 8155ed1eeb3782c8ba59d96d826991b4869035f4 Mon Sep 17 00:00:00 2001
From: quinnwai <quinnwai.wong@gmail.com>
Date: Mon, 2 Dec 2024 11:32:27 -0800
Subject: [PATCH 01/18] update pytest and version number

---
 setup.py                                      | 2 +-
 tests/integration/test_end_to_end_workflow.py | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index ab0c30b5..1daa321f 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='gen3_tracker',
-    version='0.0.7rc1',
+    version='0.0.7rc2',
     description='A CLI for adding version control to Gen3 data submission projects.',
     long_description=long_description,
     long_description_content_type='text/markdown',
diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 429ae5fb..86b03fb0 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -58,7 +58,7 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
     run(runner, ["--debug", "meta", "validate"])
 
     # update the file
-    test_file = pathlib.Path("my-project-data/hello.txt")
+    test_file = Path("my-project-data/hello.txt")
     test_file.parent.mkdir(parents=True, exist_ok=True)
     test_file.write_text('hello UPDATE\n')
     # re-add the file
@@ -213,7 +213,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec
     run(runner, ["init", project_id, "--approve"])
     result = run(runner,
                  ["push", "--skip_validate", "--overwrite"],
-                 expected_exit_code=0,
+                 expected_exit_code=1,
                  expected_files=[log_file_path]
                 )
 
@@ -225,9 +225,8 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec
         lines = log_file.readlines()
         str_lines = str(lines)
 
-        assert "/content/0/attachment/creation" in str_lines, f"expected errors to describe to /content/0/attachment/creation, instead got: \n{str_lines}"
-        assert "jsonschema" in str_lines, f"expected errors to mention jsonschema, instead got: \n{str_lines}"
-        assert invalid_date in str_lines, f"expected invalid date {invalid_date} to be logged, instead got: \n{str_lines} "
+        for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
+            assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
 
 
 def test_push_fails_with_no_write_permissions(runner: CliRunner, project_id: str, tmp_path: Path):

From ec045a3cf49d42c8ab604188e546d796da77e0b4 Mon Sep 17 00:00:00 2001
From: quinnwai <quinnwai.wong@gmail.com>
Date: Mon, 2 Dec 2024 11:36:29 -0800
Subject: [PATCH 02/18] linting

---
 gen3_tracker/gen3/jobs.py                     | 128 ++-
 gen3_tracker/git/cli.py                       | 853 +++++++++++++-----
 gen3_tracker/meta/entities.py                 |  21 +-
 tests/__init__.py                             |  18 +-
 tests/integration/__init__.py                 |  33 +-
 tests/integration/conftest.py                 |   2 +-
 tests/integration/test_bucket_import.py       |  17 +-
 tests/integration/test_bundle.py              |  32 +-
 tests/integration/test_end_to_end_workflow.py | 195 +++-
 tests/unit/meta/conftest.py                   |   2 +-
 tests/unit/meta/test_meta.py                  | 136 ++-
 tests/unit/test_coding_conventions.py         |   5 +-
 tests/unit/test_flatten_fhir_example.py       | 277 ++++--
 tests/unit/test_hash_types.py                 |  18 +-
 tests/unit/test_read_dvc.py                   |  18 +-
 15 files changed, 1238 insertions(+), 517 deletions(-)

diff --git a/gen3_tracker/gen3/jobs.py b/gen3_tracker/gen3/jobs.py
index f544d4a6..24f6b3e7 100644
--- a/gen3_tracker/gen3/jobs.py
+++ b/gen3_tracker/gen3/jobs.py
@@ -14,26 +14,36 @@
 from gen3_tracker import Config
 from gen3_tracker.common import Push, Commit
 from gen3_tracker.gen3.indexd import write_indexd
-from gen3_tracker.git import calculate_hash, DVC, run_command, DVCMeta, DVCItem, modified_date
+from gen3_tracker.git import (
+    calculate_hash,
+    DVC,
+    run_command,
+    DVCMeta,
+    DVCItem,
+    modified_date,
+)
 
 
 def _validate_parameters(from_: str) -> pathlib.Path:
 
-    assert len(urlparse(from_).scheme) == 0, f"{from_} appears to be an url. url to url cp not supported"
+    assert (
+        len(urlparse(from_).scheme) == 0
+    ), f"{from_} appears to be an url. url to url cp not supported"
 
     return from_
 
 
-def cp(config: Config,
-       from_: str,
-       project_id: str,
-       ignore_state: bool,
-       auth=None,
-       user=None,
-       object_name=None,
-       bucket_name=None,
-       metadata: dict = {},
-       ):
+def cp(
+    config: Config,
+    from_: str,
+    project_id: str,
+    ignore_state: bool,
+    auth=None,
+    user=None,
+    object_name=None,
+    bucket_name=None,
+    metadata: dict = {},
+):
     """Copy meta to bucket, used by etl_pod job"""
     from_ = _validate_parameters(str(from_))
     if not isinstance(from_, pathlib.Path):
@@ -41,13 +51,15 @@ def cp(config: Config,
 
     assert auth, "auth is required"
 
-    metadata = dict({'submitter': None, 'metadata_version': '0.0.1', 'is_metadata': True} | metadata)
-    if not metadata['submitter']:
+    metadata = dict(
+        {"submitter": None, "metadata_version": "0.0.1", "is_metadata": True} | metadata
+    )
+    if not metadata["submitter"]:
         if not user:
-            user = auth.curl('/user/user').json()
-        metadata['submitter'] = user['name']
+            user = auth.curl("/user/user").json()
+        metadata["submitter"] = user["name"]
 
-    program, project = project_id.split('-')
+    program, project = project_id.split("-")
 
     assert bucket_name, f"could not find bucket for {program}"
 
@@ -57,27 +69,26 @@ def cp(config: Config,
 
     if not object_name:
         now = datetime.now().strftime("%Y%m%d-%H%M%S")
-        object_name = f'_{project_id}-{now}_meta.zip'
+        object_name = f"_{project_id}-{now}_meta.zip"
 
     zipfile_path = temp_dir / object_name
-    with ZipFile(zipfile_path, 'w') as zip_object:
+    with ZipFile(zipfile_path, "w") as zip_object:
         for _ in from_.glob("*.ndjson"):
             zip_object.write(_)
 
     stat = zipfile_path.stat()
-    md5_sum = calculate_hash('md5', zipfile_path)
+    md5_sum = calculate_hash("md5", zipfile_path)
     my_dvc = DVC(
         meta=DVCMeta(),
         outs=[
             DVCItem(
                 path=object_name,
                 md5=md5_sum,
-                hash='md5',
+                hash="md5",
                 modified=modified_date(zipfile_path),
                 size=stat.st_size,
-
             )
-        ]
+        ],
     )
 
     metadata = write_indexd(
@@ -92,56 +103,81 @@ def cp(config: Config,
     # document = file_client.upload_file_to_guid(guid=id_, file_name=object_name, bucket=bucket_name)
     # print(document, file=sys.stderr)
 
-    run_command(f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}", no_capture=False)
+    run_command(
+        f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}",
+        no_capture=False,
+    )
 
-    return {'msg': f"Uploaded {zipfile_path} to {bucket_name}", "object_id": my_dvc.object_id, "object_name": object_name}
+    return {
+        "msg": f"Uploaded {zipfile_path} to {bucket_name}",
+        "object_id": my_dvc.object_id,
+        "object_name": object_name,
+    }
 
 
-def publish_commits(config: Config, wait: bool, auth: Gen3Auth, bucket_name: str, spinner=None) -> dict:
+def publish_commits(
+    config: Config, wait: bool, auth: Gen3Auth, bucket_name: str, spinner=None
+) -> dict:
     """Publish commits to the portal."""
 
     # TODO legacy fhir-import-export job: copies meta to bucket and triggers job,
     #  meta information is already in git REPO,
     #  we should consider changing the fhir_import_export job to use the git REPO
 
-    user = auth.curl('/user/user').json()
+    user = auth.curl("/user/user").json()
 
     # copy meta to bucket
     upload_result = cp(
         config=config,
-        from_='META',
+        from_="META",
         project_id=config.gen3.project_id,
         ignore_state=True,
         auth=auth,
         user=user,
-        bucket_name=bucket_name
+        bucket_name=bucket_name,
     )
 
-    object_id = upload_result['object_id']
+    object_id = upload_result["object_id"]
 
     push = Push(config=config)
     jobs_client = Gen3Jobs(auth_provider=auth)
 
     # create "legacy" commit object, read by fhir-import-export job
-    push.commits.append(Commit(object_id=object_id, message='From g3t-git', meta_path=upload_result['object_name'], commit_id=object_id))
-    args = {'push': push.model_dump(), 'project_id': config.gen3.project_id, 'method': 'put'}
+    push.commits.append(
+        Commit(
+            object_id=object_id,
+            message="From g3t-git",
+            meta_path=upload_result["object_name"],
+            commit_id=object_id,
+        )
+    )
+    args = {
+        "push": push.model_dump(),
+        "project_id": config.gen3.project_id,
+        "method": "put",
+    }
 
     # capture logging from gen3.jobs
     from cdislogging import get_logger  # noqa
+
     cdis_logging = get_logger("__name__")
     cdis_logging.setLevel(logging.WARN)
-    
+
     if wait:
         # async_run_job_and_wait monkeypatched below
-        _ = asyncio.run(jobs_client.async_run_job_and_wait(job_name='fhir_import_export', job_input=args, spinner=spinner))
+        _ = asyncio.run(
+            jobs_client.async_run_job_and_wait(
+                job_name="fhir_import_export", job_input=args, spinner=spinner
+            )
+        )
     else:
-        _ = jobs_client.create_job('fhir_import_export', args)
+        _ = jobs_client.create_job("fhir_import_export", args)
 
     if not isinstance(_, dict):
-        _ = {'output': _}
-    if isinstance(_['output'], str):
+        _ = {"output": _}
+    if isinstance(_["output"], str):
         try:
-            _['output'] = json.loads(_['output'])
+            _["output"] = json.loads(_["output"])
         except json.JSONDecodeError:
             pass
     return _
@@ -149,7 +185,9 @@ def publish_commits(config: Config, wait: bool, auth: Gen3Auth, bucket_name: str
 
 # monkey patch for gen3.jobs.Gen3Jobs.async_run_job_and_wait
 # make it less noisy and sleep less (max of 30 seconds)
-async def async_run_job_and_wait(self, job_name, job_input, spinner=None, _ssl=None, **kwargs):
+async def async_run_job_and_wait(
+    self, job_name, job_input, spinner=None, _ssl=None, **kwargs
+):
     """
     Asynchronous function to create a job, wait for output, and return. Will
     sleep in a linear delay until the job is done, starting with 1 second.
@@ -188,12 +226,12 @@ async def async_run_job_and_wait(self, job_name, job_input, spinner=None, _ssl=N
     if status.get("status") != "Completed":
         # write failed output to log file before raising exception
         response = await self.async_get_output(job_create_response.get("uid"))
-        with open("logs/publish.log", 'a') as f:
-                log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat()}
-                log_msg.update(response)
-                f.write(json.dumps(log_msg, separators=(',', ':')))
-                f.write('\n')
-        
+        with open("logs/publish.log", "a") as f:
+            log_msg = {"timestamp": datetime.now(pytz.UTC).isoformat()}
+            log_msg.update(response)
+            f.write(json.dumps(log_msg, separators=(",", ":")))
+            f.write("\n")
+
         raise Exception(f"Job status not complete: {status.get('status')}")
 
     response = await self.async_get_output(job_create_response.get("uid"))
diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py
index e7c4cc40..c0222bd8 100644
--- a/gen3_tracker/git/cli.py
+++ b/gen3_tracker/git/cli.py
@@ -34,20 +34,36 @@
 
 import gen3_tracker
 from gen3_tracker import Config
-from gen3_tracker.common import CLIOutput, INFO_COLOR, ERROR_COLOR, is_url, filter_dicts, SUCCESS_COLOR, \
-    read_ndjson_file
+from gen3_tracker.common import (
+    CLIOutput,
+    INFO_COLOR,
+    ERROR_COLOR,
+    is_url,
+    filter_dicts,
+    SUCCESS_COLOR,
+    read_ndjson_file,
+)
 from gen3_tracker.config import init as config_init, ensure_auth
 from gen3_tracker.gen3.buckets import get_buckets
-from gen3_tracker.git import git_files, to_indexd, to_remote, dvc_data, \
-    data_file_changes, modified_date, git_status, DVC, MISSING_G3T_MESSAGE
-from gen3_tracker.git import run_command, \
-    MISSING_GIT_MESSAGE, git_repository_exists
+from gen3_tracker.git import (
+    git_files,
+    to_indexd,
+    to_remote,
+    dvc_data,
+    data_file_changes,
+    modified_date,
+    git_status,
+    DVC,
+    MISSING_G3T_MESSAGE,
+)
+from gen3_tracker.git import run_command, MISSING_GIT_MESSAGE, git_repository_exists
 from gen3_tracker.git.adder import url_path, write_dvc_file
 from gen3_tracker.git.cloner import ls
 from gen3_tracker.git.initializer import initialize_project_server_side
 from gen3_tracker.git.snapshotter import push_snapshot
 from gen3_tracker.meta.skeleton import meta_index, get_data_from_meta
 from gen3_tracker.common import _default_json_serializer
+
 # logging.basicConfig(level=logging.INFO)
 _logger = logging.getLogger(__package__)
 
@@ -66,14 +82,20 @@
 #     if debug:
 #         _logger.setLevel(logging.DEBUG)
 
+
 def _check_parameters(config, project_id):
     """Common parameter checks."""
     if not project_id:
         raise AssertionError("project_id is required")
-    if not project_id.count('-') == 1:
-        raise AssertionError(f"project_id must be of the form program-project {project_id}")
+    if not project_id.count("-") == 1:
+        raise AssertionError(
+            f"project_id must be of the form program-project {project_id}"
+        )
     if not config.gen3.profile:
-        click.secho("No profile set. Continuing in disconnected mode. Use `set profile <profile>`", fg='yellow')
+        click.secho(
+            "No profile set. Continuing in disconnected mode. Use `set profile <profile>`",
+            fg="yellow",
+        )
 
 
 @click.group(cls=gen3_tracker.NaturalOrderGroup)
@@ -84,10 +106,34 @@ def cli():
 
 @cli.command(context_settings=dict(ignore_unknown_options=True))
 # @click.option('--force', '-f', is_flag=True, help='Force the init.')
-@click.argument('project_id', default=None, required=False, envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID")
-@click.option('--approve', '-a', help='Approve the addition (privileged)', is_flag=True, default=False, show_default=True)
-@click.option('--no-server', help='Skip server setup (testing)', is_flag=True, default=False, show_default=True, hidden=True)
-@click.option('--debug', is_flag=True, envvar='G3T_DEBUG', help='Enable debug mode. G3T_DEBUG environment variable can also be used.')
+@click.argument(
+    "project_id",
+    default=None,
+    required=False,
+    envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID",
+)
+@click.option(
+    "--approve",
+    "-a",
+    help="Approve the addition (privileged)",
+    is_flag=True,
+    default=False,
+    show_default=True,
+)
+@click.option(
+    "--no-server",
+    help="Skip server setup (testing)",
+    is_flag=True,
+    default=False,
+    show_default=True,
+    hidden=True,
+)
+@click.option(
+    "--debug",
+    is_flag=True,
+    envvar="G3T_DEBUG",
+    help="Enable debug mode. G3T_DEBUG environment variable can also be used.",
+)
 @click.pass_obj
 def init(config: Config, project_id: str, approve: bool, no_server: bool, debug: bool):
     """Initialize a new repository."""
@@ -113,15 +159,29 @@ def init(config: Config, project_id: str, approve: bool, no_server: bool, debug:
         ensure_git_repo(config)
 
         if not no_server:
-            init_logs, approval_needed = initialize_project_server_side(config, project_id)
+            init_logs, approval_needed = initialize_project_server_side(
+                config, project_id
+            )
             logs.extend(init_logs)
 
             if approve and approval_needed:
-                run_command('g3t collaborator approve --all', dry_run=config.dry_run, no_capture=True)
+                run_command(
+                    "g3t collaborator approve --all",
+                    dry_run=config.dry_run,
+                    no_capture=True,
+                )
             elif approval_needed and not approve:
-                click.secho("Approval needed. to approve the project, a privileged user must run `g3t collaborator approve --all`", fg=INFO_COLOR, file=sys.stderr)
+                click.secho(
+                    "Approval needed. to approve the project, a privileged user must run `g3t collaborator approve --all`",
+                    fg=INFO_COLOR,
+                    file=sys.stderr,
+                )
             else:
-                click.secho(f"Approval not needed. Project {project_id} has approved read/write", fg=INFO_COLOR, file=sys.stderr)
+                click.secho(
+                    f"Approval not needed. Project {project_id} has approved read/write",
+                    fg=INFO_COLOR,
+                    file=sys.stderr,
+                )
 
         if config.debug:
             for _ in logs:
@@ -135,26 +195,34 @@ def init(config: Config, project_id: str, approve: bool, no_server: bool, debug:
 
 def ensure_git_repo(config):
     # ensure a git repo
-    if pathlib.Path('.git').exists():
+    if pathlib.Path(".git").exists():
         return
 
-    if not pathlib.Path('.git').exists():
-        command = 'git init'
+    if not pathlib.Path(".git").exists():
+        command = "git init"
         run_command(command, dry_run=config.dry_run, no_capture=True)
     else:
-        click.secho('Git repository already exists.', fg=INFO_COLOR, file=sys.stderr)
-    pathlib.Path('MANIFEST').mkdir(exist_ok=True)
-    pathlib.Path('META').mkdir(exist_ok=True)
-    pathlib.Path('LOGS').mkdir(exist_ok=True)
-    with open('.gitignore', 'w') as f:
-        f.write('LOGS/\n')
-        f.write('.g3t/state/\n')  # legacy
-    with open('META/README.md', 'w') as f:
-        f.write('This directory contains metadata files for the data files in the MANIFEST directory.\n')
-    with open('MANIFEST/README.md', 'w') as f:
-        f.write('This directory contains dvc files that reference the data files.\n')
-    run_command('git add MANIFEST META .gitignore .g3t', dry_run=config.dry_run, no_capture=True)
-    run_command('git commit -m "initialized" MANIFEST META .gitignore .g3t', dry_run=config.dry_run, no_capture=True)
+        click.secho("Git repository already exists.", fg=INFO_COLOR, file=sys.stderr)
+    pathlib.Path("MANIFEST").mkdir(exist_ok=True)
+    pathlib.Path("META").mkdir(exist_ok=True)
+    pathlib.Path("LOGS").mkdir(exist_ok=True)
+    with open(".gitignore", "w") as f:
+        f.write("LOGS/\n")
+        f.write(".g3t/state/\n")  # legacy
+    with open("META/README.md", "w") as f:
+        f.write(
+            "This directory contains metadata files for the data files in the MANIFEST directory.\n"
+        )
+    with open("MANIFEST/README.md", "w") as f:
+        f.write("This directory contains dvc files that reference the data files.\n")
+    run_command(
+        "git add MANIFEST META .gitignore .g3t", dry_run=config.dry_run, no_capture=True
+    )
+    run_command(
+        'git commit -m "initialized" MANIFEST META .gitignore .g3t',
+        dry_run=config.dry_run,
+        no_capture=True,
+    )
 
 
 # Note: The commented code below is an example of how to use context settings to allow extra arguments.
@@ -165,8 +233,8 @@ def ensure_git_repo(config):
 
 
 @cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
-@click.argument('target')
-@click.option('--no-git-add', default=False, is_flag=True, hidden=True)
+@click.argument("target")
+@click.option("--no-git-add", default=False, is_flag=True, hidden=True)
 @click.pass_context
 def add(ctx, target, no_git_add: bool):
     """
@@ -211,10 +279,10 @@ def add(ctx, target, no_git_add: bool):
         assert not config.no_config_found, MISSING_G3T_MESSAGE
 
         # needs to have a target
-        assert target, 'No targets specified.'
+        assert target, "No targets specified."
 
         # Expand wildcard paths
-        if is_url(target) and not target.startswith('file://'):
+        if is_url(target) and not target.startswith("file://"):
             all_changed_files, updates = add_url(ctx, target)
         else:
             all_changed_files, updates = add_file(ctx, target)
@@ -224,8 +292,12 @@ def add(ctx, target, no_git_add: bool):
         #
         adds = [str(_) for _ in all_changed_files if _ not in updates]
         if adds and not no_git_add:
-            adds.append('.gitignore')
-            run_command(f'git add {" ".join([str(_) for _ in adds])}', dry_run=config.dry_run, no_capture=True)
+            adds.append(".gitignore")
+            run_command(
+                f'git add {" ".join([str(_) for _ in adds])}',
+                dry_run=config.dry_run,
+                no_capture=True,
+            )
 
     except Exception as e:
         click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr)
@@ -234,9 +306,15 @@ def add(ctx, target, no_git_add: bool):
 
 
 @cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
-@click.argument('targets', nargs=-1)
-@click.option('--message', '-m', help='The commit message.')
-@click.option('--all', '-a', is_flag=True, default=False, help='Automatically stage files that have been modified and deleted.')
+@click.argument("targets", nargs=-1)
+@click.option("--message", "-m", help="The commit message.")
+@click.option(
+    "--all",
+    "-a",
+    is_flag=True,
+    default=False,
+    help="Automatically stage files that have been modified and deleted.",
+)
 @click.pass_context
 def commit(ctx, targets, message, all):
     """Commit the changes
@@ -269,11 +347,11 @@ def status(config):
     """Show changed files."""
     soft_error = False
     try:
-        with Halo(text='Scanning', spinner='line', placement='right', color='white'):
-            manifest_path = pathlib.Path('MANIFEST')
+        with Halo(text="Scanning", spinner="line", placement="right", color="white"):
+            manifest_path = pathlib.Path("MANIFEST")
             changes = data_file_changes(manifest_path)
             # Get a list of all files in the MANIFEST directory and its subdirectories
-            files = glob.glob('MANIFEST/**/*.dvc', recursive=True)
+            files = glob.glob("MANIFEST/**/*.dvc", recursive=True)
             # Filter out directories, keep only files
             files = [f for f in files if os.path.isfile(f)]
         if not files:
@@ -284,28 +362,46 @@ def status(config):
 
             document_reference_mtime = 0
 
-            if pathlib.Path('META/DocumentReference.ndjson').exists():
+            if pathlib.Path("META/DocumentReference.ndjson").exists():
                 # Get the modification time
-                document_reference_mtime = os.path.getmtime('META/DocumentReference.ndjson')
+                document_reference_mtime = os.path.getmtime(
+                    "META/DocumentReference.ndjson"
+                )
 
             latest_file_mtime = os.path.getmtime(latest_file)
             if document_reference_mtime < latest_file_mtime:
-                document_reference_mtime = datetime.fromtimestamp(document_reference_mtime).isoformat()
-                latest_file_mtime = datetime.fromtimestamp(latest_file_mtime).isoformat()
-                click.secho(f"WARNING: DocumentReference.ndjson is out of date {document_reference_mtime}. The most recently changed file is {latest_file} {latest_file_mtime}.  Please check DocumentReferences.ndjson", fg=INFO_COLOR, file=sys.stderr)
+                document_reference_mtime = datetime.fromtimestamp(
+                    document_reference_mtime
+                ).isoformat()
+                latest_file_mtime = datetime.fromtimestamp(
+                    latest_file_mtime
+                ).isoformat()
+                click.secho(
+                    f"WARNING: DocumentReference.ndjson is out of date {document_reference_mtime}. The most recently changed file is {latest_file} {latest_file_mtime}.  Please check DocumentReferences.ndjson",
+                    fg=INFO_COLOR,
+                    file=sys.stderr,
+                )
                 soft_error = True
 
             if changes:
-                click.secho(f"# There are {len(changes)} data files that you need to update via `g3t add`:", fg=INFO_COLOR, file=sys.stderr)
+                click.secho(
+                    f"# There are {len(changes)} data files that you need to update via `g3t add`:",
+                    fg=INFO_COLOR,
+                    file=sys.stderr,
+                )
                 cwd = pathlib.Path.cwd()
                 for _ in changes:
-                    data_path = str(_.data_path).replace(str(cwd) + '/', "")
-                    click.secho(f'  g3t add {data_path} # changed: {modified_date(_.data_path)},  last added: {modified_date(_.dvc_path)}', fg=INFO_COLOR, file=sys.stderr)
+                    data_path = str(_.data_path).replace(str(cwd) + "/", "")
+                    click.secho(
+                        f"  g3t add {data_path} # changed: {modified_date(_.data_path)},  last added: {modified_date(_.dvc_path)}",
+                        fg=INFO_COLOR,
+                        file=sys.stderr,
+                    )
                     soft_error = True
             else:
                 click.secho("No data file changes.", fg=INFO_COLOR, file=sys.stderr)
 
-        _ = run_command('git status')
+        _ = run_command("git status")
         print(_.stdout)
         if soft_error:
             exit(1)
@@ -316,27 +412,66 @@ def status(config):
 
 
 @cli.command()
-@click.option('--step',
-              type=click.Choice(['index', 'upload', 'publish', 'all', 'fhir']),
-              default='all',
-              show_default=True,
-              help='The step to run '
-              )
-@click.option('--transfer-method',
-              type=click.Choice(gen3_tracker.FILE_TRANSFER_METHODS.keys()),
-              default='gen3',
-              show_default=True,
-              help='The upload method.'
-              )
-@click.option('--overwrite', is_flag=True, help='(index): Overwrite previously submitted files.')
-@click.option('--wait', default=True, is_flag=True, show_default=True, help="(publish): Wait for metadata completion.")
-@click.option('--dry-run', show_default=True, default=False, is_flag=True, help='Print the commands that would be executed, but do not execute them.')
-@click.option('--re-run', show_default=True, default=False, is_flag=True, help='Re-run the last publish step')
-@click.option('--fhir-server', show_default=True, default=False, is_flag=True, help='Push data in META directory to FHIR Server. Whatever FHIR data that exists in META dir will be upserted into the fhir server')
-@click.option('--debug', is_flag=True)
-@click.option('--skip_validate', is_flag=True, help='Skip validation of the metadata')
+@click.option(
+    "--step",
+    type=click.Choice(["index", "upload", "publish", "all", "fhir"]),
+    default="all",
+    show_default=True,
+    help="The step to run ",
+)
+@click.option(
+    "--transfer-method",
+    type=click.Choice(gen3_tracker.FILE_TRANSFER_METHODS.keys()),
+    default="gen3",
+    show_default=True,
+    help="The upload method.",
+)
+@click.option(
+    "--overwrite", is_flag=True, help="(index): Overwrite previously submitted files."
+)
+@click.option(
+    "--wait",
+    default=True,
+    is_flag=True,
+    show_default=True,
+    help="(publish): Wait for metadata completion.",
+)
+@click.option(
+    "--dry-run",
+    show_default=True,
+    default=False,
+    is_flag=True,
+    help="Print the commands that would be executed, but do not execute them.",
+)
+@click.option(
+    "--re-run",
+    show_default=True,
+    default=False,
+    is_flag=True,
+    help="Re-run the last publish step",
+)
+@click.option(
+    "--fhir-server",
+    show_default=True,
+    default=False,
+    is_flag=True,
+    help="Push data in META directory to FHIR Server. Whatever FHIR data that exists in META dir will be upserted into the fhir server",
+)
+@click.option("--debug", is_flag=True)
+@click.option("--skip_validate", is_flag=True, help="Skip validation of the metadata")
 @click.pass_context
-def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wait: bool, dry_run: bool, fhir_server: bool, debug: bool, skip_validate: bool):
+def push(
+    ctx,
+    step: str,
+    transfer_method: str,
+    overwrite: bool,
+    re_run: bool,
+    wait: bool,
+    dry_run: bool,
+    fhir_server: bool,
+    debug: bool,
+    skip_validate: bool,
+):
     """Push changes to the remote repository.
     \b
     steps:
@@ -362,27 +497,35 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa
             raise NotImplementedError("Re-run not implemented")
 
         try:
-            with Halo(text='Checking', spinner='line', placement='right', color='white'):
+            with Halo(
+                text="Checking", spinner="line", placement="right", color="white"
+            ):
                 run_command("g3t status")
                 if not skip_validate:
                     run_command("g3t meta validate", no_capture=True)
 
         except Exception as e:
-            click.secho("Please correct issues before pushing.", fg=ERROR_COLOR, file=sys.stderr)
+            click.secho(
+                "Please correct issues before pushing.", fg=ERROR_COLOR, file=sys.stderr
+            )
             click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr)
             if config.debug:
                 raise
             exit(1)
 
-        with Halo(text='Scanning', spinner='line', placement='right', color='white'):
+        with Halo(text="Scanning", spinner="line", placement="right", color="white"):
 
             # check git status
             branch, uncommitted = git_status()
-            assert not uncommitted, "Uncommitted changes found.  Please commit or stash them first."
+            assert (
+                not uncommitted
+            ), "Uncommitted changes found.  Please commit or stash them first."
 
             # check dvc vs external files
-            changes = data_file_changes(pathlib.Path('MANIFEST'))
-            assert not changes, f"# There are {len(changes)} data files that you need to update.  See `g3t status`"
+            changes = data_file_changes(pathlib.Path("MANIFEST"))
+            assert (
+                not changes
+            ), f"# There are {len(changes)} data files that you need to update.  See `g3t status`"
 
             # initialize dvc objects with this project_id
             committed_files, dvc_objects = manifest(config.gen3.project_id)
@@ -392,133 +535,232 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa
             bucket_name = get_program_bucket(config=config, auth=auth)
 
             # check for new files
-            records = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth)['records']
-            dids = {_['did']: _['updated_date'] for _ in records}
+            records = ls(
+                config, metadata={"project_id": config.gen3.project_id}, auth=auth
+            )["records"]
+            dids = {_["did"]: _["updated_date"] for _ in records}
             new_dvc_objects = [_ for _ in dvc_objects if _.object_id not in dids]
-            updated_dvc_objects = [_ for _ in dvc_objects if _.object_id in dids and _.out.modified > dids[_.object_id]]
+            updated_dvc_objects = [
+                _
+                for _ in dvc_objects
+                if _.object_id in dids and _.out.modified > dids[_.object_id]
+            ]
             if step not in ["publish", "fhir"]:
                 if not overwrite:
                     dvc_objects = new_dvc_objects + updated_dvc_objects
-                    assert dvc_objects, "No new files to index.  Use --overwrite to force"
-
-        click.secho(f'Scanned new: {len(new_dvc_objects)}, updated: {len(updated_dvc_objects)} files', fg=INFO_COLOR, file=sys.stderr)
+                    assert (
+                        dvc_objects
+                    ), "No new files to index.  Use --overwrite to force"
+
+        click.secho(
+            f"Scanned new: {len(new_dvc_objects)}, updated: {len(updated_dvc_objects)} files",
+            fg=INFO_COLOR,
+            file=sys.stderr,
+        )
         if updated_dvc_objects:
-            click.secho(f'Found {len(updated_dvc_objects)} updated files. overwriting', fg=INFO_COLOR, file=sys.stderr)
+            click.secho(
+                f"Found {len(updated_dvc_objects)} updated files. overwriting",
+                fg=INFO_COLOR,
+                file=sys.stderr,
+            )
             overwrite = True
 
-        if step in ['index', 'all']:
+        if step in ["index", "all"]:
             # send to index
 
             if dry_run:
-                click.secho("Dry run: not indexing files", fg=INFO_COLOR, file=sys.stderr)
+                click.secho(
+                    "Dry run: not indexing files", fg=INFO_COLOR, file=sys.stderr
+                )
                 yaml.dump(
                     {
-                        'new': [_.model_dump() for _ in new_dvc_objects],
-                        'updated': [_.model_dump() for _ in updated_dvc_objects],
+                        "new": [_.model_dump() for _ in new_dvc_objects],
+                        "updated": [_.model_dump() for _ in updated_dvc_objects],
                     },
-                    sys.stdout
+                    sys.stdout,
                 )
                 return
 
             for _ in tqdm(
-                    to_indexd(
-                        dvc_objects=dvc_objects,
-                        auth=auth,
-                        project_id=config.gen3.project_id,
-                        bucket_name=bucket_name,
-                        overwrite=overwrite,
-                        restricted_project_id=None
-
-                    ),
-                    desc='Indexing', unit='file', leave=False, total=len(committed_files)):
+                to_indexd(
+                    dvc_objects=dvc_objects,
+                    auth=auth,
+                    project_id=config.gen3.project_id,
+                    bucket_name=bucket_name,
+                    overwrite=overwrite,
+                    restricted_project_id=None,
+                ),
+                desc="Indexing",
+                unit="file",
+                leave=False,
+                total=len(committed_files),
+            ):
                 pass
-            click.secho(f'Indexed {len(committed_files)} files.', fg=INFO_COLOR, file=sys.stderr)
+            click.secho(
+                f"Indexed {len(committed_files)} files.", fg=INFO_COLOR, file=sys.stderr
+            )
 
-        if step in ['upload', 'all']:
-            click.secho(f'Checking {len(dvc_objects)} files for upload via {transfer_method}', fg=INFO_COLOR, file=sys.stderr)
+        if step in ["upload", "all"]:
+            click.secho(
+                f"Checking {len(dvc_objects)} files for upload via {transfer_method}",
+                fg=INFO_COLOR,
+                file=sys.stderr,
+            )
             to_remote(
                 upload_method=transfer_method,
                 dvc_objects=dvc_objects,
                 bucket_name=bucket_name,
                 profile=config.gen3.profile,
                 dry_run=config.dry_run,
-                work_dir=config.work_dir
+                work_dir=config.work_dir,
             )
 
-        if fhir_server or step in ['fhir']:
+        if fhir_server or step in ["fhir"]:
             """Either there exists a Bundle.ndjson file in META signifying a revision to the data, or there is no bundle.json,
-                    signifying that the data in the META directory should be upserted into gen34"""
-            meta_dir = pathlib.Path('META')
+            signifying that the data in the META directory should be upserted into gen34
+            """
+            meta_dir = pathlib.Path("META")
             bundle_file = meta_dir / "Bundle.ndjson"
             if os.path.isfile(bundle_file):
-                with Halo(text='Sending to FHIR Server', spinner='line', placement='right', color='white'):
+                with Halo(
+                    text="Sending to FHIR Server",
+                    spinner="line",
+                    placement="right",
+                    color="white",
+                ):
                     with open(bundle_file, "r") as file:
                         json_string = file.read()
                     bundle_data = orjson.loads(json_string)
                     headers = {"Authorization": f"{auth._access_token}"}
-                    result = requests.delete(url=f'{auth.endpoint}/Bundle', data=orjson.dumps(bundle_data, default=_default_json_serializer,
-                                                                                              option=orjson.OPT_APPEND_NEWLINE).decode(), headers=headers)
-                
-                with open("logs/publish.log", 'a') as f:
-                    log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat(), "result": f"{result}"}
-                    click.secho('Published project. See logs/publish.log', fg=SUCCESS_COLOR, file=sys.stderr)
-                    f.write(json.dumps(log_msg, separators=(',', ':')))
-                    f.write('\n')
+                    result = requests.delete(
+                        url=f"{auth.endpoint}/Bundle",
+                        data=orjson.dumps(
+                            bundle_data,
+                            default=_default_json_serializer,
+                            option=orjson.OPT_APPEND_NEWLINE,
+                        ).decode(),
+                        headers=headers,
+                    )
+
+                with open("logs/publish.log", "a") as f:
+                    log_msg = {
+                        "timestamp": datetime.now(pytz.UTC).isoformat(),
+                        "result": f"{result}",
+                    }
+                    click.secho(
+                        "Published project. See logs/publish.log",
+                        fg=SUCCESS_COLOR,
+                        file=sys.stderr,
+                    )
+                    f.write(json.dumps(log_msg, separators=(",", ":")))
+                    f.write("\n")
                 return
 
             project_id = config.gen3.project_id
             now = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
-            bundle = Bundle(type='transaction', timestamp=now)
-            bundle.identifier = Identifier(value=project_id, system="https://aced-idp.org/project_id")
+            bundle = Bundle(type="transaction", timestamp=now)
+            bundle.identifier = Identifier(
+                value=project_id, system="https://aced-idp.org/project_id"
+            )
             from gen3_tracker import ACED_NAMESPACE
+
             bundle.id = str(uuid.uuid5(ACED_NAMESPACE, f"Bundle/{project_id}/{now}"))
             bundle.entry = []
 
             for _ in get_data_from_meta():
                 bundle_entry = BundleEntry()
                 # See https://build.fhir.org/bundle-definitions.html#Bundle.entry.request.url
-                bundle_entry.request = BundleEntryRequest(url=f"{_['resourceType']}/{_['id']}", method='PUT')
+                bundle_entry.request = BundleEntryRequest(
+                    url=f"{_['resourceType']}/{_['id']}", method="PUT"
+                )
                 bundle_entry.resource = _
                 bundle.entry.append(bundle_entry)
 
             headers = {"Authorization": f"{auth._access_token}"}
             bundle_dict = bundle.dict()
-            with Halo(text='Sending to FHIR Server', spinner='line', placement='right', color='white'):
-                result = requests.put(url=f'{auth.endpoint}/Bundle', data=orjson.dumps(bundle_dict, default=_default_json_serializer,
-                                                                                       option=orjson.OPT_APPEND_NEWLINE).decode(), headers=headers)
-
-            with open("logs/publish.log", 'a') as f:
-                log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat(), "result": f"{result}"}
-                click.secho('Published project. See logs/publish.log', fg=SUCCESS_COLOR, file=sys.stderr)
-                f.write(json.dumps(log_msg, separators=(',', ':')))
-                f.write('\n')
+            with Halo(
+                text="Sending to FHIR Server",
+                spinner="line",
+                placement="right",
+                color="white",
+            ):
+                result = requests.put(
+                    url=f"{auth.endpoint}/Bundle",
+                    data=orjson.dumps(
+                        bundle_dict,
+                        default=_default_json_serializer,
+                        option=orjson.OPT_APPEND_NEWLINE,
+                    ).decode(),
+                    headers=headers,
+                )
+
+            with open("logs/publish.log", "a") as f:
+                log_msg = {
+                    "timestamp": datetime.now(pytz.UTC).isoformat(),
+                    "result": f"{result}",
+                }
+                click.secho(
+                    "Published project. See logs/publish.log",
+                    fg=SUCCESS_COLOR,
+                    file=sys.stderr,
+                )
+                f.write(json.dumps(log_msg, separators=(",", ":")))
+                f.write("\n")
             return
 
-        if step in ['publish', 'all'] and not fhir_server:
+        if step in ["publish", "all"] and not fhir_server:
             log_path = "logs/publish.log"
 
-            with Halo(text='Uploading snapshot', spinner='line', placement='right', color='white'):
+            with Halo(
+                text="Uploading snapshot",
+                spinner="line",
+                placement="right",
+                color="white",
+            ):
                 # push the snapshot of the `.git` sub-directory in the current directory
                 push_snapshot(config, auth=auth)
 
-            if transfer_method == 'gen3':
+            if transfer_method == "gen3":
                 try:
                     # legacy, "old" fhir_import_export use publish_commits to publish the META
-                    with Halo(text='Publishing', spinner='line', placement='right', color='white') as spinner:
-                        _ = publish_commits(config, wait=wait, auth=auth, bucket_name=bucket_name, spinner=spinner)
+                    with Halo(
+                        text="Publishing",
+                        spinner="line",
+                        placement="right",
+                        color="white",
+                    ) as spinner:
+                        _ = publish_commits(
+                            config,
+                            wait=wait,
+                            auth=auth,
+                            bucket_name=bucket_name,
+                            spinner=spinner,
+                        )
                 except Exception as e:
-                    click.secho(f'Unable to publish project. See {log_path} for more info', fg=ERROR_COLOR, file=sys.stderr)
+                    click.secho(
+                        f"Unable to publish project. See {log_path} for more info",
+                        fg=ERROR_COLOR,
+                        file=sys.stderr,
+                    )
                     raise e
 
                 # print success message and save logs
-                with open(log_path, 'a') as f:
-                    log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat()}
+                with open(log_path, "a") as f:
+                    log_msg = {"timestamp": datetime.now(pytz.UTC).isoformat()}
                     log_msg.update(_)
-                    f.write(json.dumps(log_msg, separators=(',', ':')))
-                    f.write('\n')
-                click.secho(f'Published project. Logs found at {log_path}', fg=SUCCESS_COLOR, file=sys.stderr)
+                    f.write(json.dumps(log_msg, separators=(",", ":")))
+                    f.write("\n")
+                click.secho(
+                    f"Published project. Logs found at {log_path}",
+                    fg=SUCCESS_COLOR,
+                    file=sys.stderr,
+                )
             else:
-                click.secho(f'Auto-publishing not supported for {transfer_method}. Please use --step publish after uploading', fg=ERROR_COLOR, file=sys.stderr)
+                click.secho(
+                    f"Auto-publishing not supported for {transfer_method}. Please use --step publish after uploading",
+                    fg=ERROR_COLOR,
+                    file=sys.stderr,
+                )
 
     except Exception as e:
         click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr)
@@ -529,7 +771,7 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa
 
 def manifest(project_id) -> tuple[list[str], list[DVC]]:
     """Get the committed files and their dvc objects. Initialize dvc objects with this project_id"""
-    committed_files = [_ for _ in git_files() if _.endswith('.dvc')]
+    committed_files = [_ for _ in git_files() if _.endswith(".dvc")]
     dvc_objects = [_ for _ in dvc_data(committed_files)]
     for _ in dvc_objects:
         _.project_id = project_id
@@ -537,26 +779,43 @@ def manifest(project_id) -> tuple[list[str], list[DVC]]:
 
 
 @cli.command()
-@click.option('--remote',
-              type=click.Choice(['gen3', 's3', 'ln', 'scp']),
-              default='gen3',
-              show_default=True,
-              help='Specify the remote storage type. gen3:download, s3:s3 cp, ln: symbolic link, scp: scp copy'
-              )
-@click.option('--worker_count', '-w', default=(multiprocessing.cpu_count() - 1), show_default=True,
-              type=int,
-              help='Number of workers to use.')
-@click.option('--data-only', help='Ignore git snapshot', is_flag=True, default=False, show_default=True)
+@click.option(
+    "--remote",
+    type=click.Choice(["gen3", "s3", "ln", "scp"]),
+    default="gen3",
+    show_default=True,
+    help="Specify the remote storage type. gen3:download, s3:s3 cp, ln: symbolic link, scp: scp copy",
+)
+@click.option(
+    "--worker_count",
+    "-w",
+    default=(multiprocessing.cpu_count() - 1),
+    show_default=True,
+    type=int,
+    help="Number of workers to use.",
+)
+@click.option(
+    "--data-only",
+    help="Ignore git snapshot",
+    is_flag=True,
+    default=False,
+    show_default=True,
+)
 @click.pass_obj
 def pull(config: Config, remote: str, worker_count: int, data_only: bool):
-    """ Fetch from and integrate with a remote repository."""
+    """Fetch from and integrate with a remote repository."""
     try:
 
-        with Halo(text='Authorizing', spinner='line', placement='right', color='white'):
+        with Halo(text="Authorizing", spinner="line", placement="right", color="white"):
             auth = gen3_tracker.config.ensure_auth(config=config)
 
         if not data_only:
-            with Halo(text='Pulling git snapshot', spinner='line', placement='right', color='white'):
+            with Halo(
+                text="Pulling git snapshot",
+                spinner="line",
+                placement="right",
+                color="white",
+            ):
                 if not auth:
                     auth = gen3_tracker.config.ensure_auth(config=config)
                 snapshot, zip_filepath = download_snapshot(auth, config)
@@ -567,35 +826,52 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool):
                 # Rename the directory
                 shutil.move(".git", new_dir_name)
                 # unzip the snapshot
-                with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
-                    zip_ref.extractall('.')
-            click.secho(f"Pulled {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr)
+                with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
+                    zip_ref.extractall(".")
+            click.secho(
+                f"Pulled {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr
+            )
 
         manifest_files, dvc_objects = manifest(config.gen3.project_id)
-        if remote == 'gen3':
+        if remote == "gen3":
             # download the files
-            with Halo(text='Pulling from gen3', spinner='line', placement='right', color='white'):
-                object_ids = [{'object_id': _.object_id} for _ in dvc_objects]  # if not _.out.source_url
-                current_time = datetime.now().strftime("%Y%m%d%H%M%S")  # Format datetime as you need
-                manifest_file = pathlib.Path(config.work_dir) / f'manifest-{current_time}.json'
-                with open(manifest_file, 'w') as fp:
+            with Halo(
+                text="Pulling from gen3",
+                spinner="line",
+                placement="right",
+                color="white",
+            ):
+                object_ids = [
+                    {"object_id": _.object_id} for _ in dvc_objects
+                ]  # if not _.out.source_url
+                current_time = datetime.now().strftime(
+                    "%Y%m%d%H%M%S"
+                )  # Format datetime as you need
+                manifest_file = (
+                    pathlib.Path(config.work_dir) / f"manifest-{current_time}.json"
+                )
+                with open(manifest_file, "w") as fp:
                     json.dump(object_ids, fp)
-            cmd = f'gen3-client download-multiple --no-prompt --profile {config.gen3.profile}  --manifest {manifest_file} --numparallel {worker_count}'
+            cmd = f"gen3-client download-multiple --no-prompt --profile {config.gen3.profile}  --manifest {manifest_file} --numparallel {worker_count}"
             print(cmd)
             run_command(cmd, no_capture=True)
-        elif remote == 's3':
-            with Halo(text='Pulling from s3', spinner='line', placement='right', color='white'):
+        elif remote == "s3":
+            with Halo(
+                text="Pulling from s3", spinner="line", placement="right", color="white"
+            ):
                 if not auth:
                     auth = gen3_tracker.config.ensure_auth(config=config)
-                results = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth)
+                results = ls(
+                    config, metadata={"project_id": config.gen3.project_id}, auth=auth
+                )
                 object_ids = [_.object_id for _ in dvc_objects]
-            for _ in results['records']:
-                if _['did'] in object_ids:
-                    print('aws s3 cp ', _['urls'][0], _['file_name'])
-        elif remote == 'ln':
+            for _ in results["records"]:
+                if _["did"] in object_ids:
+                    print("aws s3 cp ", _["urls"][0], _["file_name"])
+        elif remote == "ln":
             for _ in dvc_objects:
                 print(f"ln -s {_.out.realpath} {_.out.path}")
-        elif remote == 'scp':
+        elif remote == "scp":
             for _ in dvc_objects:
                 print(f"scp USER@HOST:{_.out.realpath} {_.out.path}")
 
@@ -609,56 +885,79 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool):
 
 
 @cli.command()
-@click.argument('project_id', default=None, required=False, envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID", metavar='PROJECT_ID')
+@click.argument(
+    "project_id",
+    default=None,
+    required=False,
+    envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID",
+    metavar="PROJECT_ID",
+)
 @click.pass_obj
 def clone(config, project_id):
     """Clone a repository into a new directory"""
     try:
         config.gen3.project_id = project_id
-        assert not pathlib.Path(project_id).exists(), f"{project_id} already exists.  Please remove it first."
+        assert not pathlib.Path(
+            project_id
+        ).exists(), f"{project_id} already exists.  Please remove it first."
         os.mkdir(project_id)
         os.chdir(project_id)
-        with Halo(text='Cloning', spinner='line', placement='right', color='white'):
+        with Halo(text="Cloning", spinner="line", placement="right", color="white"):
             auth = gen3_tracker.config.ensure_auth(config=config)
             snapshot, zip_filepath = download_snapshot(auth, config)
-            assert not pathlib.Path('.git').exists(), "A git repository already exists.  Please remove it, or move to another directory first."
+            assert not pathlib.Path(
+                ".git"
+            ).exists(), "A git repository already exists.  Please remove it, or move to another directory first."
             # unzip
-            with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
-                zip_ref.extractall('.')
+            with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
+                zip_ref.extractall(".")
 
             # if we just unzipped a .git these directories will exist
-            expected_dirs = ['.git', 'META', 'MANIFEST']
+            expected_dirs = [".git", "META", "MANIFEST"]
             if not all([pathlib.Path(_).exists() for _ in expected_dirs]):
                 # if not, we have downloaded a legacy SNAPSHOT.zip, so lets migrate the data to the expected drirectories
-                click.secho(f"{expected_dirs} not found after downloading {snapshot['file_name']} processing legacy snapshot", fg=INFO_COLOR, file=sys.stderr)
+                click.secho(
+                    f"{expected_dirs} not found after downloading {snapshot['file_name']} processing legacy snapshot",
+                    fg=INFO_COLOR,
+                    file=sys.stderr,
+                )
                 # legacy - was this a *SNAPSHOT.zip?
-                meta_files = (pathlib.Path('studies') / config.gen3.project)
+                meta_files = pathlib.Path("studies") / config.gen3.project
                 # legacy - was this a *meta.zip?
                 if not meta_files.exists():
-                    meta_files = pathlib.Path('.')
+                    meta_files = pathlib.Path(".")
                 # create local directories and git
                 [_ for _ in config_init(config, project_id)]
                 ensure_git_repo(config=config)
                 # move ndjson from studies to META
-                for _ in meta_files.glob('*.ndjson'):
-                    shutil.move(_, 'META/')
+                for _ in meta_files.glob("*.ndjson"):
+                    shutil.move(_, "META/")
                 # add to git
-                run_command('git add META/*.*')
+                run_command("git add META/*.*")
                 # migrate DocumentReferences to MANIFEST
                 references = meta_index()
                 manifest_files = []
-                for _ in read_ndjson_file('META/DocumentReference.ndjson'):
+                for _ in read_ndjson_file("META/DocumentReference.ndjson"):
                     document_reference = DocumentReference.parse_obj(_)
-                    dvc_object = DVC.from_document_reference(config, document_reference, references)
-                    manifest_files.append(write_dvc_file(yaml_data=dvc_object.model_dump(), target=dvc_object.out.path))
+                    dvc_object = DVC.from_document_reference(
+                        config, document_reference, references
+                    )
+                    manifest_files.append(
+                        write_dvc_file(
+                            yaml_data=dvc_object.model_dump(),
+                            target=dvc_object.out.path,
+                        )
+                    )
 
                 # Get the current time in seconds since the epoch
                 current_time = time.time()
                 # Update the access and modification times of the file
-                os.utime('META/DocumentReference.ndjson', (current_time, current_time))
+                os.utime("META/DocumentReference.ndjson", (current_time, current_time))
 
-                run_command('git add MANIFEST/')
-                run_command('git commit -m "migrated from legacy" MANIFEST/ META/ .gitignore')
+                run_command("git add MANIFEST/")
+                run_command(
+                    'git commit -m "migrated from legacy" MANIFEST/ META/ .gitignore'
+                )
                 shutil.move(zip_filepath, config.work_dir / zip_filepath.name)
 
         click.secho(f"Cloned {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr)
@@ -673,27 +972,30 @@ def clone(config, project_id):
 def download_snapshot(auth, config):
     """Download the latest snapshot."""
     from gen3_tracker.git.cloner import find_latest_snapshot
+
     snapshot = find_latest_snapshot(auth, config)
 
     gen3_file = Gen3File(auth)
-    pathlib.Path(snapshot['file_name']).parent.mkdir(exist_ok=True, parents=True)
-    ok = gen3_file.download_single(snapshot['did'], '.')
+    pathlib.Path(snapshot["file_name"]).parent.mkdir(exist_ok=True, parents=True)
+    ok = gen3_file.download_single(snapshot["did"], ".")
     assert ok, f"Failed to download {snapshot['did']}"
 
-    zip_filepath = pathlib.Path(snapshot['file_name'])
+    zip_filepath = pathlib.Path(snapshot["file_name"])
     assert zip_filepath.exists(), f"Failed to download {snapshot['did']}"
     return snapshot, zip_filepath
 
 
 def file_name_or_guid(config, object_id) -> (str, pathlib.Path):
     """Check if the object_id is a file name or a GUID."""
-    guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$')
+    guid_pattern = re.compile(
+        r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
+    )
     path = None
     if not guid_pattern.match(object_id):
         if not is_url(object_id):
-            path = pathlib.Path('MANIFEST') / (object_id + ".dvc")
+            path = pathlib.Path("MANIFEST") / (object_id + ".dvc")
         else:
-            path = pathlib.Path('MANIFEST') / (url_path(object_id) + ".dvc")
+            path = pathlib.Path("MANIFEST") / (url_path(object_id) + ".dvc")
 
         if path.exists():
             dvc_object = next(iter(dvc_data([str(path)])), None)
@@ -702,20 +1004,33 @@ def file_name_or_guid(config, object_id) -> (str, pathlib.Path):
             object_id = dvc_object.object_id
         else:
             raise ValueError(
-                f"{object_id} was not found in the MANIFEST and does not appear to be an object identifier (GUID).")
+                f"{object_id} was not found in the MANIFEST and does not appear to be an object identifier (GUID)."
+            )
     else:
         committed_files, dvc_objects = manifest(config.gen3.project_id)
-        dvc_objects = [dvc_object for dvc_object in dvc_objects if dvc_object.object_id == object_id]
+        dvc_objects = [
+            dvc_object
+            for dvc_object in dvc_objects
+            if dvc_object.object_id == object_id
+        ]
         assert dvc_objects, f"{object_id} not found in MANIFEST."
-        path = pathlib.Path('MANIFEST') / (dvc_objects[0].out.path + ".dvc")
+        path = pathlib.Path("MANIFEST") / (dvc_objects[0].out.path + ".dvc")
 
     assert guid_pattern.match(object_id), f"{object_id} was not found in MANIFEST."
     return object_id, path
 
 
 @cli.command("ls")
-@click.option('--long', '-l', 'long_flag', default=False, is_flag=True, help='Long listing format.', show_default=True)
-@click.argument('target', default=None, required=False)
+@click.option(
+    "--long",
+    "-l",
+    "long_flag",
+    default=False,
+    is_flag=True,
+    help="Long listing format.",
+    show_default=True,
+)
+@click.argument("target", default=None, required=False)
 @click.pass_obj
 def ls_cli(config: Config, long_flag: bool, target: str):
     """List files in the repository.
@@ -724,10 +1039,14 @@ def ls_cli(config: Config, long_flag: bool, target: str):
     """
     try:
 
-        with Halo(text='Pulling file list', spinner='line', placement='right', color='white'):
+        with Halo(
+            text="Pulling file list", spinner="line", placement="right", color="white"
+        ):
             auth = gen3_tracker.config.ensure_auth(config=config)
-            results = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth)
-            indexd_records = results['records']
+            results = ls(
+                config, metadata={"project_id": config.gen3.project_id}, auth=auth
+            )
+            indexd_records = results["records"]
             committed_files, dvc_objects = manifest(config.gen3.project_id)
             # list all data files
             dvc_objects = {_.object_id: _ for _ in dvc_objects}
@@ -742,44 +1061,49 @@ def _dvc_meta(dvc_object, full=False) -> dict:
                             _[k] = v
                 else:
                     _ = dvc_object.model_dump(exclude_none=True)
-                _['object_id'] = dvc_object.object_id
+                _["object_id"] = dvc_object.object_id
                 return _
 
             if not long_flag:
                 indexd_records = [
                     {
-                        'did': _['did'],
-                        'file_name': _['file_name'],
-                        'indexd_created_date': _['created_date'],
-                        'meta': _dvc_meta(dvc_objects.get(_['did'], None)),
-                        'urls': _['urls']
-                    } for _ in indexd_records
+                        "did": _["did"],
+                        "file_name": _["file_name"],
+                        "indexd_created_date": _["created_date"],
+                        "meta": _dvc_meta(dvc_objects.get(_["did"], None)),
+                        "urls": _["urls"],
+                    }
+                    for _ in indexd_records
                 ]
 
-        bucket_ids = {_['did'] for _ in indexd_records}
+        bucket_ids = {_["did"] for _ in indexd_records}
 
-        uncommitted = pathlib.Path('MANIFEST').glob('**/*.dvc')
+        uncommitted = pathlib.Path("MANIFEST").glob("**/*.dvc")
         uncommitted = [str(_) for _ in uncommitted]
         uncommitted = [str(_) for _ in uncommitted if _ not in committed_files]
         uncommitted = [_.model_dump(exclude_none=True) for _ in dvc_data(uncommitted)]
 
         _ = {
-            'bucket': indexd_records,
-            'committed': [_dvc_meta(v, full=True) for k, v in dvc_objects.items() if k not in bucket_ids],
-            'uncommitted': uncommitted
+            "bucket": indexd_records,
+            "committed": [
+                _dvc_meta(v, full=True)
+                for k, v in dvc_objects.items()
+                if k not in bucket_ids
+            ],
+            "uncommitted": uncommitted,
         }
 
         if target:
             # Escape special characters and replace wildcard '*' with '.*' for regex pattern
             pattern = re.escape(target).replace("\\*", ".*")
             filtered = {
-                'bucket': filter_dicts(_.get('bucket', []), pattern),
-                'committed': filter_dicts(_.get('committed', []), pattern),
-                'uncommitted': filter_dicts(_.get('uncommitted', []), pattern)
+                "bucket": filter_dicts(_.get("bucket", []), pattern),
+                "committed": filter_dicts(_.get("committed", []), pattern),
+                "uncommitted": filter_dicts(_.get("uncommitted", []), pattern),
             }
             _ = filtered
 
-        if config.output.format == 'json':
+        if config.output.format == "json":
             print(json.dumps(_, indent=2))
         else:
             yaml.dump(_, sys.stdout, default_flow_style=False)
@@ -791,7 +1115,7 @@ def _dvc_meta(dvc_object, full=False) -> dict:
 
 
 @cli.command()
-@click.argument('object_id', metavar='<name>')
+@click.argument("object_id", metavar="<name>")
 @click.pass_obj
 def rm(config: Config, object_id: str):
     """Remove a single file from the server index, and MANIFEST. Does not alter META.
@@ -800,29 +1124,50 @@ def rm(config: Config, object_id: str):
     """
     try:
 
-        with Halo(text='Searching', spinner='line', placement='right', color='white'):
+        with Halo(text="Searching", spinner="line", placement="right", color="white"):
             object_id, path = file_name_or_guid(config, object_id)
 
-        with Halo(text='Deleting from server', spinner='line', placement='right', color='white'):
+        with Halo(
+            text="Deleting from server",
+            spinner="line",
+            placement="right",
+            color="white",
+        ):
             auth = gen3_tracker.config.ensure_auth(config=config)
             index = Gen3Index(auth)
             result = index.delete_record(object_id)
         if not result:
             if not path:
-                path = ''
-            click.secho(f"Failed to delete {object_id} from server. {path}", fg=ERROR_COLOR, file=sys.stderr)
+                path = ""
+            click.secho(
+                f"Failed to delete {object_id} from server. {path}",
+                fg=ERROR_COLOR,
+                file=sys.stderr,
+            )
         else:
-            click.secho(f"Deleted {object_id} from server. {path}", fg=INFO_COLOR, file=sys.stderr)
+            click.secho(
+                f"Deleted {object_id} from server. {path}",
+                fg=INFO_COLOR,
+                file=sys.stderr,
+            )
 
-        with Halo(text='Scanning', spinner='line', placement='right', color='white'):
+        with Halo(text="Scanning", spinner="line", placement="right", color="white"):
             committed_files, dvc_objects = manifest(config.gen3.project_id)
-            dvc_objects = [dvc_object for dvc_object in dvc_objects if dvc_object.object_id == object_id]
+            dvc_objects = [
+                dvc_object
+                for dvc_object in dvc_objects
+                if dvc_object.object_id == object_id
+            ]
             assert dvc_objects, f"{object_id} not found in MANIFEST."
             dvc_object = dvc_objects[0]
-            path = pathlib.Path('MANIFEST') / (dvc_object.out.path + ".dvc")
+            path = pathlib.Path("MANIFEST") / (dvc_object.out.path + ".dvc")
             assert path.exists(), f"{path} not found"
             path.unlink()
-        click.secho(f"Deleted {path} from MANIFEST. Please adjust META resources", fg=INFO_COLOR, file=sys.stderr)
+        click.secho(
+            f"Deleted {path} from MANIFEST. Please adjust META resources",
+            fg=INFO_COLOR,
+            file=sys.stderr,
+        )
 
     except Exception as e:
         click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr)
@@ -858,10 +1203,10 @@ def ping(config: Config):
                 msgs.append(str(e))
                 ok = False
             except Gen3AuthError as e:
-                msg = str(e).split(':')[0]
+                msg = str(e).split(":")[0]
                 msgs.append(msg)
                 msg2 = str(e).split('<p class="introduction">')[-1]
-                msg2 = msg2.split('</p>')[0]
+                msg2 = msg2.split("</p>")[0]
                 msgs.append(msg2)
                 ok = False
 
@@ -871,34 +1216,42 @@ def ping(config: Config):
             _ = "Configuration ERROR: "
             output.exit_code = 1
 
-        _ = {'msg': _ + ', '.join(msgs)}
+        _ = {"msg": _ + ", ".join(msgs)}
         if auth:
-            _['endpoint'] = auth.endpoint
-            user_info = auth.curl('/user/user').json()
-            _['username'] = user_info['username']
+            _["endpoint"] = auth.endpoint
+            user_info = auth.curl("/user/user").json()
+            _["username"] = user_info["username"]
             buckets = get_buckets(config=config)
             bucket_info = {}
             program_info = defaultdict(list)
-            for k, v in buckets['S3_BUCKETS'].items():
+            for k, v in buckets["S3_BUCKETS"].items():
                 bucket_info[k] = {}
-                if 'programs' not in v:
+                if "programs" not in v:
                     bucket_info[k] = "No `programs` found"
-                    click.secho(f"WARNING: No `programs` found for bucket {k}", fg=INFO_COLOR, file=sys.stderr)
+                    click.secho(
+                        f"WARNING: No `programs` found for bucket {k}",
+                        fg=INFO_COLOR,
+                        file=sys.stderr,
+                    )
                     continue
-                bucket_info[k] = ",".join(v['programs'])
-                for program in v['programs']:
+                bucket_info[k] = ",".join(v["programs"])
+                for program in v["programs"]:
                     program_info[program].append(k)
-            _['bucket_programs'] = bucket_info
+            _["bucket_programs"] = bucket_info
 
             for k, v in program_info.items():
                 if len(v) > 1:
-                    click.secho(f"WARNING: {k} is in multiple buckets: {', '.join(v)}", fg=INFO_COLOR, file=sys.stderr)
+                    click.secho(
+                        f"WARNING: {k} is in multiple buckets: {', '.join(v)}",
+                        fg=INFO_COLOR,
+                        file=sys.stderr,
+                    )
 
-            assert 'authz' in user_info, "No authz found"
+            assert "authz" in user_info, "No authz found"
             authz_info = defaultdict(dict)
-            for k, v in user_info['authz'].items():
-                authz_info[k] = ",".join(set([_['method'] for _ in v]))
-            _['your_access'] = dict(authz_info)
+            for k, v in user_info["authz"].items():
+                authz_info[k] = ",".join(set([_["method"] for _ in v]))
+            _["your_access"] = dict(authz_info)
 
         output.update(_)
 
diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py
index 888cf903..c6beda14 100644
--- a/gen3_tracker/meta/entities.py
+++ b/gen3_tracker/meta/entities.py
@@ -246,7 +246,6 @@ def scalars(self) -> dict:
             if (not isinstance(v, list) and not isinstance(v, dict))
         }
 
-
     @computed_field
     @property
     def codings(self) -> dict:
@@ -261,8 +260,12 @@ def codings(self) -> dict:
                     if isinstance(elem, dict):
                         # TODO: implement hierarchy of codes rather than just taking last code?
                         for value, source in normalize_coding(elem):
-                            if len(v) > 1 and get_nested_value(elem, [source, 0, 'system']):
-                                _codings[elem[source][0]["system"].split("/")[-1]] = value
+                            if len(v) > 1 and get_nested_value(
+                                elem, [source, 0, "system"]
+                            ):
+                                _codings[elem[source][0]["system"].split("/")[-1]] = (
+                                    value
+                                )
                             else:
                                 _codings[k] = value
             elif isinstance(v, dict):
@@ -281,10 +284,15 @@ def identifiers(self) -> dict:
         if not identifiers_len:
             return {"identifier": None}
         elif identifiers_len == 1:
-            return {"identifier": identifiers[0].get('value')}
+            return {"identifier": identifiers[0].get("value")}
         else:
-            base_identifier = {"identifier": identifiers[0].get('value')}
-            base_identifier.update({identifier.get("system").split("/")[-1]: identifier.get("value") for identifier in identifiers[1:]})
+            base_identifier = {"identifier": identifiers[0].get("value")}
+            base_identifier.update(
+                {
+                    identifier.get("system").split("/")[-1]: identifier.get("value")
+                    for identifier in identifiers[1:]
+                }
+            )
             return base_identifier
 
     @computed_field
@@ -375,7 +383,6 @@ def values(self) -> dict:
         if "code" in self.resource and "text" in self.resource["code"]:
             _values["observation_code"] = self.resource["code"]["text"]
 
-
         assert len(_values) > 0, f"no values found in Observation: {self.resource}"
 
         return _values
diff --git a/tests/__init__.py b/tests/__init__.py
index 204633dd..7ff71bef 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -5,7 +5,13 @@
 from gen3_tracker.cli import cli
 
 
-def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], expected_exit_code: int = 0, expected_files: list[pathlib.Path] = []) -> Result:
+def run(
+    runner: CliRunner,
+    args: list[str],
+    expected_output: list[str] = [],
+    expected_exit_code: int = 0,
+    expected_files: list[pathlib.Path] = [],
+) -> Result:
     """Run a command and check the output, exit code and expected files."""
     if isinstance(args, str):
         args = args.split()
@@ -15,16 +21,20 @@ def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], exp
         expected_files = [expected_files]
     expected_files = [pathlib.Path(_) for _ in expected_files]
 
-    print('------------------------------------------------------------')
+    print("------------------------------------------------------------")
     print("g3t " + " ".join(args))
     result = runner.invoke(cli, args)
     print("result.stdout", result.stdout)
     print("result.output", result.output)
     print("result.exception", result.exception)
     print("CWD", pathlib.Path.cwd())
-    assert result.exit_code == expected_exit_code, f"g3t {' '.join(args)} exit_code: {result.exit_code}, expected: {expected_exit_code}"
+    assert (
+        result.exit_code == expected_exit_code
+    ), f"g3t {' '.join(args)} exit_code: {result.exit_code}, expected: {expected_exit_code}"
     for line in expected_output:
-        assert line in result.output, f"output: {result.output}, expected: {expected_output}"
+        assert (
+            line in result.output
+        ), f"output: {result.output}, expected: {expected_output}"
         print(f"{line} found in output.")
     for file in expected_files:
         assert file.exists(), f"{file} does not exist."
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
index fac2d49a..68c1c087 100644
--- a/tests/integration/__init__.py
+++ b/tests/integration/__init__.py
@@ -7,7 +7,13 @@
 from gen3.query import Gen3Query
 
 
-def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], expected_exit_code: int = 0, expected_files: list[pathlib.Path] = []) -> Result:
+def run(
+    runner: CliRunner,
+    args: list[str],
+    expected_output: list[str] = [],
+    expected_exit_code: int = 0,
+    expected_files: list[pathlib.Path] = [],
+) -> Result:
     """Run a command and check the output, exit code and expected files."""
     if isinstance(args, str):
         args = args.split()
@@ -17,13 +23,17 @@ def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], exp
         expected_files = [expected_files]
     expected_files = [pathlib.Path(_) for _ in expected_files]
 
-    print('------------------------------------------------------------')
+    print("------------------------------------------------------------")
     print("g3t " + " ".join(args))
     result = runner.invoke(cli, args)
     print(result.stdout)
-    assert result.exit_code == expected_exit_code, f"exit_code: {result.exit_code}, expected: {expected_exit_code}"
+    assert (
+        result.exit_code == expected_exit_code
+    ), f"exit_code: {result.exit_code}, expected: {expected_exit_code}"
     for line in expected_output:
-        assert line in result.output, f"output: {result.output}, expected: {expected_output}"
+        assert (
+            line in result.output
+        ), f"output: {result.output}, expected: {expected_output}"
         print(f"{line} found in output.")
     for file in expected_files:
         assert file.exists(), f"{file} does not exist."
@@ -37,11 +47,12 @@ def validate_document_in_grip(did: str, auth=None, project_id=None):
     if not auth:
         auth = ensure_auth(config=default())
     token = auth.get_access_token()
-    result = requests.get(f"{auth.endpoint}/grip/writer/graphql/CALIPER/get-vertex/{did}/{project_id}",
-                          headers={"Authorization": f"bearer {token}"}
-                          ).json()
-    assert 'data' in result, f"Failed to query grip for {did} {result}"
-    assert result['data']['gid'] == did
+    result = requests.get(
+        f"{auth.endpoint}/grip/writer/graphql/CALIPER/get-vertex/{did}/{project_id}",
+        headers={"Authorization": f"bearer {token}"},
+    ).json()
+    assert "data" in result, f"Failed to query grip for {did} {result}"
+    assert result["data"]["gid"] == did
 
 
 def validate_document_in_elastic(did, auth):
@@ -55,7 +66,7 @@ def validate_document_in_elastic(did, auth):
               }
             }
         """,
-        variables={"filter": {"AND": [{"IN": {"id": [did]}}]}}
+        variables={"filter": {"AND": [{"IN": {"id": [did]}}]}},
     )
     print(result)
-    assert result['data']['file'][0]['id'] == did
+    assert result["data"]["file"][0]["id"] == did
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index b171120e..8de22443 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -16,7 +16,7 @@ def program() -> str:
 
 @pytest.fixture
 def project() -> str:
-    project = uuid.uuid4().hex.replace('-', '_')
+    project = uuid.uuid4().hex.replace("-", "_")
     return project
 
 
diff --git a/tests/integration/test_bucket_import.py b/tests/integration/test_bucket_import.py
index 5edc31d3..9a0959cc 100644
--- a/tests/integration/test_bucket_import.py
+++ b/tests/integration/test_bucket_import.py
@@ -42,8 +42,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None:
 
     print(project_id)
 
-    run(runner, ["--debug", "init", project_id, "--approve", "--no-server"],
-        expected_files=[".g3t", ".git"])
+    run(
+        runner,
+        ["--debug", "init", project_id, "--approve", "--no-server"],
+        expected_files=[".g3t", ".git"],
+    )
 
     for _ in SHOULD_SUCCEED:
         run(runner, _.split())
@@ -60,11 +63,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None:
     result = run(runner, ["--debug", "--format", "json", "ls"])
     listing = json.loads(result.stdout)
 
-    for _ in ['bucket', 'committed', 'uncommitted']:
+    for _ in ["bucket", "committed", "uncommitted"]:
         assert _ in listing
 
     # files should appear in uncommitted
-    assert len(listing['uncommitted']) == len(SHOULD_SUCCEED)
+    assert len(listing["uncommitted"]) == len(SHOULD_SUCCEED)
 
     # commit the changes
     run(runner, ["--debug", "commit", "-am", "initial commit"])
@@ -72,11 +75,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None:
     # test the ls command, should now be in committed
     result = run(runner, ["--debug", "--format", "json", "ls"])
     listing = json.loads(result.stdout)
-    assert len(listing['committed']) == len(SHOULD_SUCCEED)
+    assert len(listing["committed"]) == len(SHOULD_SUCCEED)
 
     # test the ls filter
     for _ in EXPECTED_MANIFEST_PATHS:
-        bucket_name = _.split('/')[1]
+        bucket_name = _.split("/")[1]
         result = run(runner, ["--debug", "--format", "json", "ls", bucket_name])
         listing = json.loads(result.stdout)
-        assert len(listing['committed']) == 1
+        assert len(listing["committed"]) == 1
diff --git a/tests/integration/test_bundle.py b/tests/integration/test_bundle.py
index f98d0e8c..d27d99ad 100644
--- a/tests/integration/test_bundle.py
+++ b/tests/integration/test_bundle.py
@@ -11,21 +11,21 @@
 CHANGE_PATIENT = [
     "--debug add s3://s3-bucket/p1-object.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1",
     "--debug meta init",
-    "--debug commit -am \"initial commit\"",
+    '--debug commit -am "initial commit"',
     "--debug add s3://s3-bucket/p1-object.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1-prime",
     "--debug meta init",
-    "--debug commit -am \"prime commit\"",
+    '--debug commit -am "prime commit"',
 ]
 
 # user made a mistake and added the wrong file
 CHANGE_FILE = [
     "--debug add s3://s3-bucket/p1-object-mistake.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1",
     "--debug meta init",
-    "--debug commit -am \"initial commit\"",
+    '--debug commit -am "initial commit"',
     "--debug rm s3://s3-bucket/p1-object-mistake.txt",
     "--debug add s3://s3-bucket/p1-object-correct.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1",
     "--debug meta init",
-    "--debug commit -am \"prime commit\"",
+    '--debug commit -am "prime commit"',
 ]
 
 
@@ -37,8 +37,11 @@ def test_change_patient(runner: CliRunner, project_id, tmpdir) -> None:
 
     print(project_id)
 
-    run(runner, ["--debug", "init", project_id, "--approve", "--no-server"],
-        expected_files=[".g3t", ".git"])
+    run(
+        runner,
+        ["--debug", "init", project_id, "--approve", "--no-server"],
+        expected_files=[".g3t", ".git"],
+    )
 
     for _ in CHANGE_PATIENT:
         run(runner, _.split())
@@ -54,8 +57,10 @@ def test_change_patient(runner: CliRunner, project_id, tmpdir) -> None:
     assert all([_ == "DELETE" for _ in methods]), "Only DELETE method is expected."
 
     urls = [_.request.url for _ in bundle.entry]
-    assert any([_.startswith('Patient') for _ in urls]), "Expected to delete a Patient."
-    assert any([_.startswith('ResearchSubject') for _ in urls]), "Expected to delete a ResearchSubject."
+    assert any([_.startswith("Patient") for _ in urls]), "Expected to delete a Patient."
+    assert any(
+        [_.startswith("ResearchSubject") for _ in urls]
+    ), "Expected to delete a ResearchSubject."
 
 
 def test_change_file(runner: CliRunner, project_id, tmpdir) -> None:
@@ -66,8 +71,11 @@ def test_change_file(runner: CliRunner, project_id, tmpdir) -> None:
 
     print(project_id)
 
-    run(runner, ["--debug", "init", project_id, "--approve", "--no-server"],
-        expected_files=[".g3t", ".git"])
+    run(
+        runner,
+        ["--debug", "init", project_id, "--approve", "--no-server"],
+        expected_files=[".g3t", ".git"],
+    )
 
     for _ in CHANGE_FILE:
         run(runner, _.split())
@@ -83,4 +91,6 @@ def test_change_file(runner: CliRunner, project_id, tmpdir) -> None:
     assert all([_ == "DELETE" for _ in methods]), "Only DELETE method is expected."
 
     urls = [_.request.url for _ in bundle.entry]
-    assert any([_.startswith('DocumentReference') for _ in urls]), "Expected to delete a DocumentReference."
+    assert any(
+        [_.startswith("DocumentReference") for _ in urls]
+    ), "Expected to delete a DocumentReference."
diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 86b03fb0..6237edab 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -17,23 +17,36 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
     assert tmpdir.chdir()
     print(Path.cwd())
 
-    assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set."
+    assert os.environ.get(
+        "G3T_PROFILE"
+    ), "G3T_PROFILE environment variable must be set."
 
     print(project_id)
 
-    run(runner, ["--debug", "init", project_id, "--approve"],
-        expected_files=[".g3t", ".git"])
+    run(
+        runner,
+        ["--debug", "init", project_id, "--approve"],
+        expected_files=[".g3t", ".git"],
+    )
 
     # check ping
-    run(runner, ["--debug", "ping"], expected_output=["bucket_programs", "your_access", "endpoint", "username"])
+    run(
+        runner,
+        ["--debug", "ping"],
+        expected_output=["bucket_programs", "your_access", "endpoint", "username"],
+    )
 
     # create a test file
     test_file = Path("my-project-data/hello.txt")
     test_file.parent.mkdir(parents=True, exist_ok=True)
-    test_file.write_text('hello\n')
+    test_file.write_text("hello\n")
 
     # add the file
-    run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"])
+    run(
+        runner,
+        ["--debug", "add", str(test_file)],
+        expected_files=["MANIFEST/my-project-data/hello.txt.dvc"],
+    )
 
     # should create a dvc file
     dvc_path = Path("MANIFEST/my-project-data/hello.txt.dvc")
@@ -49,7 +62,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
     object_id = dvc.object_id
 
     # create the meta file
-    run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"])
+    run(
+        runner,
+        ["--debug", "meta", "init"],
+        expected_files=["META/DocumentReference.ndjson"],
+    )
 
     # commit the changes, delegating to git
     run(runner, ["--debug", "commit", "-am", "initial commit"])
@@ -60,10 +77,18 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
     # update the file
     test_file = Path("my-project-data/hello.txt")
     test_file.parent.mkdir(parents=True, exist_ok=True)
-    test_file.write_text('hello UPDATE\n')
+    test_file.write_text("hello UPDATE\n")
     # re-add the file
-    run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"])
-    run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"])
+    run(
+        runner,
+        ["--debug", "add", str(test_file)],
+        expected_files=["MANIFEST/my-project-data/hello.txt.dvc"],
+    )
+    run(
+        runner,
+        ["--debug", "meta", "init"],
+        expected_files=["META/DocumentReference.ndjson"],
+    )
     run(runner, ["--debug", "commit", "-am", "updated"])
     run(runner, ["--debug", "meta", "validate"])
 
@@ -71,7 +96,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
     run(runner, ["--debug", "meta", "graph"], expected_files=["meta.html"])
 
     # create a dataframe
-    run(runner, ["--debug", "meta", "dataframe", 'DocumentReference'], expected_files=["DocumentReference.csv"])
+    run(
+        runner,
+        ["--debug", "meta", "dataframe", "DocumentReference"],
+        expected_files=["DocumentReference.csv"],
+    )
 
     # push to the server
     run(runner, ["--debug", "push"])
@@ -103,12 +132,25 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
     # check the files exist in the cloned directory
     run_command("ls -l")
 
-    assert Path("my-project-data/hello.txt").exists(), "hello.txt does not exist in the cloned directory."
+    assert Path(
+        "my-project-data/hello.txt"
+    ).exists(), "hello.txt does not exist in the cloned directory."
 
     # remove the project from the server.
     # TODO note, this does not remove the files from the bucket (UChicago bug)
     # See https://ohsucomputationalbio.slack.com/archives/C043HPV0VMY/p1714065633867229
-    run(runner, ["--debug", "projects", "empty", "--project_id", project_id, "--confirm", "empty"])
+    run(
+        runner,
+        [
+            "--debug",
+            "projects",
+            "empty",
+            "--project_id",
+            project_id,
+            "--confirm",
+            "empty",
+        ],
+    )
 
     # TODO fix `collaborator rm`
     # arborist logs:  "Policy `data_upload` does not exist for user `xxx@xxx.xxx`: not revoking. Check if it is assigned through a group."
@@ -116,13 +158,27 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
     # run(runner, ["--debug", "collaborator", "rm", username, "--approve"], expected_output=[username])
 
     # add a user with write permissions
-    run(runner, ["--debug", "collaborator", "add", "foo@bar.com", "--write", "--approve"])
+    run(
+        runner,
+        ["--debug", "collaborator", "add", "foo@bar.com", "--write", "--approve"],
+    )
 
     # add a user from another directory (without config)
     os.mkdir("empty")
     os.chdir("empty")
     program, project = project_id.split("-")
-    run(runner, ["--debug", "collaborator", "add", "foo2@bar.com", f"/programs/{program}/projects/{project}", "--write", "--approve"])
+    run(
+        runner,
+        [
+            "--debug",
+            "collaborator",
+            "add",
+            "foo2@bar.com",
+            f"/programs/{program}/projects/{project}",
+            "--write",
+            "--approve",
+        ],
+    )
 
 
 def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> None:
@@ -131,20 +187,29 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N
     assert tmpdir.chdir()
     print(Path.cwd())
 
-    assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set."
+    assert os.environ.get(
+        "G3T_PROFILE"
+    ), "G3T_PROFILE environment variable must be set."
 
     print(project_id)
 
-    run(runner, ["--debug", "init", project_id, "--approve"],
-        expected_files=[".g3t", ".git"])
+    run(
+        runner,
+        ["--debug", "init", project_id, "--approve"],
+        expected_files=[".g3t", ".git"],
+    )
 
     # create a test file
     test_file = Path("my-project-data/hello.txt")
     test_file.parent.mkdir(parents=True, exist_ok=True)
-    test_file.write_text('hello\n')
+    test_file.write_text("hello\n")
 
     # add the file
-    run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"])
+    run(
+        runner,
+        ["--debug", "add", str(test_file)],
+        expected_files=["MANIFEST/my-project-data/hello.txt.dvc"],
+    )
 
     # should create a dvc file
     dvc_path = Path("MANIFEST/my-project-data/hello.txt.dvc")
@@ -160,7 +225,11 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N
     object_id = dvc.object_id
 
     # create the meta file
-    run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"])
+    run(
+        runner,
+        ["--debug", "meta", "init"],
+        expected_files=["META/DocumentReference.ndjson"],
+    )
 
     # commit the changes, delegating to git
     run(runner, ["--debug", "commit", "-am", "initial commit"])
@@ -184,24 +253,44 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N
     # remove the project from the server.
     # TODO note, this does not remove the files from the bucket (UChicago bug)
     # See https://ohsucomputationalbio.slack.com/archives/C043HPV0VMY/p1714065633867229
-    run(runner, ["--debug", "projects", "empty", "--project_id", project_id, "--confirm", "empty"])
-
-
-def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, project_id: str, tmp_path: Path):
+    run(
+        runner,
+        [
+            "--debug",
+            "projects",
+            "empty",
+            "--project_id",
+            project_id,
+            "--confirm",
+            "empty",
+        ],
+    )
+
+
+def test_push_fails_with_invalid_doc_ref_creation_date(
+    runner: CliRunner, project_id: str, tmp_path: Path
+):
 
     # check
-    assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set."
+    assert os.environ.get(
+        "G3T_PROFILE"
+    ), "G3T_PROFILE environment variable must be set."
 
     # copy fixture to temp test dir
     project_dir = "fhir-gdc-examples"
     fixtures_path = Path(os.path.dirname(__file__)).parent / "fixtures"
     fhir_gdc_dir = fixtures_path / project_dir
-    modified_doc_ref_path = fixtures_path / "negative-examples/fhir-gdc-DocumentReference-invalid-date.ndjson"
+    modified_doc_ref_path = (
+        fixtures_path
+        / "negative-examples/fhir-gdc-DocumentReference-invalid-date.ndjson"
+    )
 
     # init project
     new_project_dir = tmp_path / project_dir
     shutil.copytree(fhir_gdc_dir, new_project_dir)
-    shutil.copy(modified_doc_ref_path, new_project_dir / "META" / "DocumentReference.ndjson" )
+    shutil.copy(
+        modified_doc_ref_path, new_project_dir / "META" / "DocumentReference.ndjson"
+    )
 
     # get invalid date from fixture
     doc_ref_content = pd.read_json(modified_doc_ref_path, lines=True)["content"][0]
@@ -211,14 +300,17 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec
     log_file_path = "logs/publish.log"
     os.chdir(new_project_dir)
     run(runner, ["init", project_id, "--approve"])
-    result = run(runner,
-                 ["push", "--skip_validate", "--overwrite"],
-                 expected_exit_code=1,
-                 expected_files=[log_file_path]
-                )
+    result = run(
+        runner,
+        ["push", "--skip_validate", "--overwrite"],
+        expected_exit_code=1,
+        expected_files=[log_file_path],
+    )
 
     # ensure push has useful useful error logs
-    assert log_file_path in result.output, f"expected log file path in stdout, instead got:\n{result.output}"
+    assert (
+        log_file_path in result.output
+    ), f"expected log file path in stdout, instead got:\n{result.output}"
 
     # ensure saved log file contains info about invalid date
     with open(log_file_path, "r") as log_file:
@@ -226,40 +318,53 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec
         str_lines = str(lines)
 
         for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
-            assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
+            assert (
+                keyword in str_lines
+            ), f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
 
 
-def test_push_fails_with_no_write_permissions(runner: CliRunner, project_id: str, tmp_path: Path):
+def test_push_fails_with_no_write_permissions(
+    runner: CliRunner, project_id: str, tmp_path: Path
+):
 
     # setup
-    assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set."
+    assert os.environ.get(
+        "G3T_PROFILE"
+    ), "G3T_PROFILE environment variable must be set."
     os.chdir(tmp_path)
 
     # initialize project without approving permissions
     log_file_path = "logs/publish.log"
-    run(runner, [ "init", project_id],
-        expected_files=[".g3t", ".git"])
+    run(runner, ["init", project_id], expected_files=[".g3t", ".git"])
 
     # create test file
     test_file = Path("my-project-data/hello.txt")
     test_file.parent.mkdir(parents=True, exist_ok=True)
-    test_file.write_text('hello\n')
+    test_file.write_text("hello\n")
 
     # prepare test file for submission
-    run(runner, ["add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"])
+    run(
+        runner,
+        ["add", str(test_file)],
+        expected_files=["MANIFEST/my-project-data/hello.txt.dvc"],
+    )
     run(runner, ["meta", "init"], expected_files=["META/DocumentReference.ndjson"])
-    print("current directory:",os.getcwd())
+    print("current directory:", os.getcwd())
     run(runner, ["commit", "-m", "initial commit"])
 
     # push
     result = run(runner, ["push"], expected_exit_code=1, expected_files=[log_file_path])
 
     # ensure stdout mentions log files
-    assert log_file_path in result.output, f"expected log file path in stdout, instead got:\n{result.output}"
+    assert (
+        log_file_path in result.output
+    ), f"expected log file path in stdout, instead got:\n{result.output}"
 
     # check valid error messages within
     with open(log_file_path, "r") as log_file:
         # grab last line
-        line = [l for l in log_file.readlines()][-1]
+        line = [_ for _ in log_file.readlines()][-1]
         for output in ["401", "permission"]:
-            assert "401" in line, f"expected {log_file_path} to contain {output}, instead got: \n{line}"
+            assert (
+                "401" in line
+            ), f"expected {log_file_path} to contain {output}, instead got: \n{line}"
diff --git a/tests/unit/meta/conftest.py b/tests/unit/meta/conftest.py
index b171120e..8de22443 100644
--- a/tests/unit/meta/conftest.py
+++ b/tests/unit/meta/conftest.py
@@ -16,7 +16,7 @@ def program() -> str:
 
 @pytest.fixture
 def project() -> str:
-    project = uuid.uuid4().hex.replace('-', '_')
+    project = uuid.uuid4().hex.replace("-", "_")
     return project
 
 
diff --git a/tests/unit/meta/test_meta.py b/tests/unit/meta/test_meta.py
index 818ba083..d602a5c6 100644
--- a/tests/unit/meta/test_meta.py
+++ b/tests/unit/meta/test_meta.py
@@ -10,15 +10,20 @@
 from tests import run
 
 
-def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, project_id, tmp_path: pathlib.Path) -> None:
+def test_assert_object_id_invalid_on_project_id_change(
+    runner: CliRunner, project_id, tmp_path: pathlib.Path
+) -> None:
     """Test object_id validation command."""
     # change to the temporary directory
     os.chdir(tmp_path)
     print(pathlib.Path.cwd())
     print(project_id)
 
-    run(runner, ["--debug", "--profile", "local", "init", project_id, "--no-server"],
-        expected_files=[".g3t", ".git"])
+    run(
+        runner,
+        ["--debug", "--profile", "local", "init", project_id, "--no-server"],
+        expected_files=[".g3t", ".git"],
+    )
 
     # create test files
     cmds = """
@@ -27,42 +32,67 @@ def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, projec
     echo "hello" > my-project-data/hello.txt
     echo "big-data" > my-read-only-data/big-file.txt
     ln -s $PWD/my-read-only-data/big-file.txt my-project-data/big-file.txt
-    """.split('\n')
+    """.split(
+        "\n"
+    )
     for cmd in cmds:
         run_command(cmd, no_capture=True)
 
-    assert pathlib.Path("my-project-data/hello.txt").exists(), "hello.txt does not exist."
-    assert pathlib.Path("my-read-only-data/big-file.txt").exists(), "my-read-only-data/big-file.txt does not exist."
-    assert pathlib.Path("my-project-data/big-file.txt").exists(), "my-project-data/big-file.txt does not exist."
+    assert pathlib.Path(
+        "my-project-data/hello.txt"
+    ).exists(), "hello.txt does not exist."
+    assert pathlib.Path(
+        "my-read-only-data/big-file.txt"
+    ).exists(), "my-read-only-data/big-file.txt does not exist."
+    assert pathlib.Path(
+        "my-project-data/big-file.txt"
+    ).exists(), "my-project-data/big-file.txt does not exist."
 
     files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"]
     patients = ["P1", "P2"]
     for f, p in zip(files, patients):
-        run(runner, ["--debug", "add", str(f), "--patient", p], expected_files=[f"MANIFEST/{f}.dvc"])
-
-    run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson"])
+        run(
+            runner,
+            ["--debug", "add", str(f), "--patient", p],
+            expected_files=[f"MANIFEST/{f}.dvc"],
+        )
+
+    run(
+        runner,
+        ["--debug", "meta", "init"],
+        expected_files=[
+            "META/DocumentReference.ndjson",
+            "META/Patient.ndjson",
+            "META/ResearchStudy.ndjson",
+            "META/ResearchSubject.ndjson",
+        ],
+    )
     run(runner, ["--debug", "meta", "validate"])
-    run(runner, ["commit", "-m",  "init", "MANIFEST/", "META/", ".g3t", ".gitignore"])
+    run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"])
 
     # now change the project_id to something new
     # this should cause invalid object_id errors
     config = gen3_tracker.config.default()
     config.gen3.project_id = config.gen3.project_id + "XXXX"
-    with open('.g3t/config.yaml', 'w') as f:
+    with open(".g3t/config.yaml", "w") as f:
         yaml.dump(config.model_dump(), f)
-    run(runner, ["commit", "-m",  "change-project_id", '.g3t/config.yaml'])
+    run(runner, ["commit", "-m", "change-project_id", ".g3t/config.yaml"])
 
     # should error now
     run(runner, ["--debug", "meta", "validate"], expected_exit_code=1)
     run(runner, ["--debug", "push", "--dry-run"], expected_exit_code=1)
     # also check skip_validate
-    run(runner, ["--debug", "push", "--dry-run", "--skip_validate"], expected_exit_code=0)
+    run(
+        runner,
+        ["--debug", "push", "--dry-run", "--skip_validate"],
+        expected_exit_code=0,
+    )
 
     # should pass now
     config.gen3.project_id = config.gen3.project_id.replace("XXXX", "")
-    with open('.g3t/config.yaml', 'w') as f:
+    with open(".g3t/config.yaml", "w") as f:
         yaml.dump(config.model_dump(), f)
-    run(runner, ["commit", "-m",  "restore-project_id", '.g3t/config.yaml'])
+    run(runner, ["commit", "-m", "restore-project_id", ".g3t/config.yaml"])
 
     # ensure we can validate without passing project id
     results = validate(directory_path="META")
@@ -72,7 +102,9 @@ def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, projec
     run(runner, ["--debug", "push", "--dry-run"], expected_exit_code=0)
 
 
-def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path: pathlib.Path) -> None:
+def test_assert_add_specimen_after_init(
+    runner: CliRunner, project_id, tmp_path: pathlib.Path
+) -> None:
     """Test meta skeleton handles re-add of data with new specimen"""
     # change to the temporary directory
     os.chdir(tmp_path)
@@ -80,8 +112,11 @@ def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path:
     print(project_id)
 
     # init the project, no server
-    run(runner, ["--debug", "--profile", "local", "init", project_id, "--no-server"],
-        expected_files=[".g3t", ".git"])
+    run(
+        runner,
+        ["--debug", "--profile", "local", "init", project_id, "--no-server"],
+        expected_files=[".g3t", ".git"],
+    )
 
     # create test files
     cmds = """
@@ -90,34 +125,73 @@ def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path:
     echo "hello" > my-project-data/hello.txt
     echo "big-data" > my-read-only-data/big-file.txt
     ln -s $PWD/my-read-only-data/big-file.txt my-project-data/big-file.txt
-    """.split('\n')
+    """.split(
+        "\n"
+    )
     for cmd in cmds:
         run_command(cmd, no_capture=True)
 
-    assert pathlib.Path("my-project-data/hello.txt").exists(), "hello.txt does not exist."
-    assert pathlib.Path("my-read-only-data/big-file.txt").exists(), "my-read-only-data/big-file.txt does not exist."
-    assert pathlib.Path("my-project-data/big-file.txt").exists(), "my-project-data/big-file.txt does not exist."
+    assert pathlib.Path(
+        "my-project-data/hello.txt"
+    ).exists(), "hello.txt does not exist."
+    assert pathlib.Path(
+        "my-read-only-data/big-file.txt"
+    ).exists(), "my-read-only-data/big-file.txt does not exist."
+    assert pathlib.Path(
+        "my-project-data/big-file.txt"
+    ).exists(), "my-project-data/big-file.txt does not exist."
 
     def _files_with_patients():
         files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"]
         patients = ["P1", "P2"]
         for f, p in zip(files, patients):
-            run(runner, ["--debug", "add", str(f), "--patient", p], expected_files=[f"MANIFEST/{f}.dvc"])
-
-        run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson"])
+            run(
+                runner,
+                ["--debug", "add", str(f), "--patient", p],
+                expected_files=[f"MANIFEST/{f}.dvc"],
+            )
+
+        run(
+            runner,
+            ["--debug", "meta", "init"],
+            expected_files=[
+                "META/DocumentReference.ndjson",
+                "META/Patient.ndjson",
+                "META/ResearchStudy.ndjson",
+                "META/ResearchSubject.ndjson",
+            ],
+        )
         run(runner, ["--debug", "meta", "validate"])
-        run(runner, ["commit", "-m",  "init", "MANIFEST/", "META/", ".g3t", ".gitignore"])
+        run(
+            runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]
+        )
 
     def _files_with_patients_and_specimens():
         files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"]
         patients = ["P1", "P2"]
         specimens = ["S1", "S2"]
         for f, p, s in zip(files, patients, specimens):
-            run(runner, ["--debug", "add", str(f), "--patient", p, "--specimen", s], expected_files=[f"MANIFEST/{f}.dvc"])
-
-        run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson", "META/Specimen.ndjson"])
+            run(
+                runner,
+                ["--debug", "add", str(f), "--patient", p, "--specimen", s],
+                expected_files=[f"MANIFEST/{f}.dvc"],
+            )
+
+        run(
+            runner,
+            ["--debug", "meta", "init"],
+            expected_files=[
+                "META/DocumentReference.ndjson",
+                "META/Patient.ndjson",
+                "META/ResearchStudy.ndjson",
+                "META/ResearchSubject.ndjson",
+                "META/Specimen.ndjson",
+            ],
+        )
         run(runner, ["--debug", "meta", "validate"])
-        run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"])
+        run(
+            runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]
+        )
 
     # create initial association between patients and files
     _files_with_patients()
diff --git a/tests/unit/test_coding_conventions.py b/tests/unit/test_coding_conventions.py
index 34171929..e88b5cb6 100644
--- a/tests/unit/test_coding_conventions.py
+++ b/tests/unit/test_coding_conventions.py
@@ -7,7 +7,10 @@
 def test_coding_conventions():
     """Check python conventions on key directories"""
     script_dir = os.path.dirname(os.path.abspath(__file__))
-    directories = [os.path.join(script_dir, "../../gen3_tracker"), os.path.join(script_dir, "../../tests")]
+    directories = [
+        os.path.join(script_dir, "../../gen3_tracker"),
+        os.path.join(script_dir, "../../tests"),
+    ]
     failures = []
     for directory in directories:
         cmd_str = f"flake8 {directory} --max-line-length 256 --exclude test_flatten_fhir_example.py"
diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py
index 82acc470..9b923d88 100644
--- a/tests/unit/test_flatten_fhir_example.py
+++ b/tests/unit/test_flatten_fhir_example.py
@@ -29,19 +29,40 @@
 # test data ------------------------------------------------------------
 # The following fixtures provide test data for the tests below.
 
+
 @pytest.fixture
 def patient_dict() -> dict:
     # TODO - read the patient example from a file
-    patient_dict = {"resourceType": "Patient", "id": "3", "meta": {"lastUpdated": "2012-05-29T23:45:32Z"},
-                    "text": {"status": "generated",
-                             "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003eKidd, Kari. SSN:\n            444555555\u003c/div\u003e"},
-                    "identifier": [{"type": {
-                        "coding": [{"system": "http://terminology.hl7.org/CodeSystem/v2-0203", "code": "SS", "display": "Social Security number"}]},
-                        "system": "http://hl7.org/fhir/sid/us-ssn", "value": "444555555"}], "active": True,
-                    "name": [{"use": "official", "family": "Kidd", "given": ["Kari"]}],
-                    "telecom": [{"system": "phone", "value": "555-555-2005", "use": "work"}], "gender": "female",
-                    "address": [{"use": "home", "line": ["2222 Home Street"]}],
-                    "managingOrganization": {"reference": "Organization/hl7"}}
+    patient_dict = {
+        "resourceType": "Patient",
+        "id": "3",
+        "meta": {"lastUpdated": "2012-05-29T23:45:32Z"},
+        "text": {
+            "status": "generated",
+            "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003eKidd, Kari. SSN:\n            444555555\u003c/div\u003e',
+        },
+        "identifier": [
+            {
+                "type": {
+                    "coding": [
+                        {
+                            "system": "http://terminology.hl7.org/CodeSystem/v2-0203",
+                            "code": "SS",
+                            "display": "Social Security number",
+                        }
+                    ]
+                },
+                "system": "http://hl7.org/fhir/sid/us-ssn",
+                "value": "444555555",
+            }
+        ],
+        "active": True,
+        "name": [{"use": "official", "family": "Kidd", "given": ["Kari"]}],
+        "telecom": [{"system": "phone", "value": "555-555-2005", "use": "work"}],
+        "gender": "female",
+        "address": [{"use": "home", "line": ["2222 Home Street"]}],
+        "managingOrganization": {"reference": "Organization/hl7"},
+    }
     yield patient_dict
 
 
@@ -52,38 +73,42 @@ def specimen_dict():
         "id": "denovo-3",
         "text": {
             "status": "generated",
-            "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative\u003c/b\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource \u0026quot;denovo-3\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003eidentifier\u003c/b\u003e: id: 3\u003c/p\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: available\u003c/p\u003e\u003cp\u003e\u003cb\u003etype\u003c/b\u003e: Venous blood specimen \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"https://browser.ihtsdotools.org/\"\u003eSNOMED CT\u003c/a\u003e#122555007)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"Patient-denovoFather.html\"\u003ePatient/denovoFather: John Doe\u003c/a\u003e \u0026quot; DOE\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003ereceivedTime\u003c/b\u003e: 2021-01-01 01:01:01+0000\u003c/p\u003e\u003cp\u003e\u003cb\u003erequest\u003c/b\u003e: \u003ca href\u003d\"ServiceRequest-genomicServiceRequest.html\"\u003eServiceRequest/genomicServiceRequest\u003c/a\u003e\u003c/p\u003e\u003ch3\u003eCollections\u003c/h3\u003e\u003ctable class\u003d\"grid\"\u003e\u003ctr\u003e\u003ctd\u003e-\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollector\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollected[x]\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eQuantity\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eMethod\u003c/b\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd\u003e*\u003c/td\u003e\u003ctd\u003e\u003ca href\u003d\"Practitioner-practitioner01.html\"\u003ePractitioner/practitioner01\u003c/a\u003e \u0026quot; DOEL\u0026quot;\u003c/td\u003e\u003ctd\u003e2021-01-01 01:01:00+0000\u003c/td\u003e\u003ctd\u003e1 mL\u003c/td\u003e\u003ctd\u003eLine, Venous \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"http://terminology.hl7.org/3.1.0/CodeSystem-v2-0488.html\"\u003especimenCollectionMethod\u003c/a\u003e#LNV)\u003c/span\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/table\u003e\u003c/div\u003e"},
+            "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative\u003c/b\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource \u0026quot;denovo-3\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003eidentifier\u003c/b\u003e: id: 3\u003c/p\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: available\u003c/p\u003e\u003cp\u003e\u003cb\u003etype\u003c/b\u003e: Venous blood specimen \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"https://browser.ihtsdotools.org/"\u003eSNOMED CT\u003c/a\u003e#122555007)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"Patient-denovoFather.html"\u003ePatient/denovoFather: John Doe\u003c/a\u003e \u0026quot; DOE\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003ereceivedTime\u003c/b\u003e: 2021-01-01 01:01:01+0000\u003c/p\u003e\u003cp\u003e\u003cb\u003erequest\u003c/b\u003e: \u003ca href\u003d"ServiceRequest-genomicServiceRequest.html"\u003eServiceRequest/genomicServiceRequest\u003c/a\u003e\u003c/p\u003e\u003ch3\u003eCollections\u003c/h3\u003e\u003ctable class\u003d"grid"\u003e\u003ctr\u003e\u003ctd\u003e-\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollector\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollected[x]\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eQuantity\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eMethod\u003c/b\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd\u003e*\u003c/td\u003e\u003ctd\u003e\u003ca href\u003d"Practitioner-practitioner01.html"\u003ePractitioner/practitioner01\u003c/a\u003e \u0026quot; DOEL\u0026quot;\u003c/td\u003e\u003ctd\u003e2021-01-01 01:01:00+0000\u003c/td\u003e\u003ctd\u003e1 mL\u003c/td\u003e\u003ctd\u003eLine, Venous \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"http://terminology.hl7.org/3.1.0/CodeSystem-v2-0488.html"\u003especimenCollectionMethod\u003c/a\u003e#LNV)\u003c/span\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/table\u003e\u003c/div\u003e',
+        },
         "identifier": [
             {
                 "system": "http://www.somesystemabc.net/identifiers/specimens",
-                "value": "3"}],
+                "value": "3",
+            }
+        ],
         "status": "available",
         "type": {
             "coding": [
                 {
                     "system": "http://snomed.info/sct",
                     "code": "122555007",
-                            "display": "Venous blood specimen"}]},
-        "subject": {
-            "reference": "Patient/denovoFather",
-            "display": "John Doe"},
+                    "display": "Venous blood specimen",
+                }
+            ]
+        },
+        "subject": {"reference": "Patient/denovoFather", "display": "John Doe"},
         "receivedTime": "2021-01-01T01:01:01Z",
-        "request": [
-            {
-                "reference": "ServiceRequest/genomicServiceRequest"}],
+        "request": [{"reference": "ServiceRequest/genomicServiceRequest"}],
         "collection": {
-            "collector": {
-                "reference": "Practitioner/practitioner01"},
+            "collector": {"reference": "Practitioner/practitioner01"},
             "collectedDateTime": "2021-01-01T01:01:00Z",
-            "quantity": {
-                "value": 1,
-                "unit": "mL"},
+            "quantity": {"value": 1, "unit": "mL"},
             "method": {
                 "coding": [
                     {
                         "system": "http://terminology.hl7.org/CodeSystem/v2-0488",
                         "code": "LNV",
-                        "display": "Line, Venous"}]}}}
+                        "display": "Line, Venous",
+                    }
+                ]
+            },
+        },
+    }
 
 
 @pytest.fixture
@@ -93,14 +118,14 @@ def observation_eye_color_dict():
         "id": "eye-color",
         "text": {
             "status": "generated",
-            "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d\"eye-color\"\u003e \u003c/a\u003e\u003ca name\u003d\"hceye-color\"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource Observation \u0026quot;eye-color\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: final\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: eye color \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e ()\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"patient-example.html\"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 2016-05-18\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: blue\u003c/p\u003e\u003c/div\u003e"},
+            "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d"eye-color"\u003e \u003c/a\u003e\u003ca name\u003d"hceye-color"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource Observation \u0026quot;eye-color\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: final\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: eye color \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e ()\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"patient-example.html"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 2016-05-18\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: blue\u003c/p\u003e\u003c/div\u003e',
+        },
         "status": "final",
-        "code": {
-            "text": "eye color"},
-        "subject": {
-            "reference": "Patient/example"},
+        "code": {"text": "eye color"},
+        "subject": {"reference": "Patient/example"},
         "effectiveDateTime": "2016-05-18",
-        "valueString": "blue"}
+        "valueString": "blue",
+    }
 
 
 @pytest.fixture
@@ -110,7 +135,8 @@ def observation_bmi_dict():
         "id": "bmi-using-related",
         "text": {
             "status": "generated",
-            "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d\"bmi-using-related\"\u003e \u003c/a\u003e\u003ca name\u003d\"hcbmi-using-related\"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource Observation \u0026quot;bmi-using-related\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: \u003cspan title\u003d\"  \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d need to fix vitals to removed fixed value \u0027has-member\u0027 \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\t\u0026lt;meta\u0026gt;\n\t\t\u0026lt;profile value\u003d\u0026quot;http://hl7.org/fhir/StructureDefinition/vitalsigns\u0026quot;/\u0026gt;\n\t\u0026lt;/meta\u0026gt;\n     \"\u003efinal\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecategory\u003c/b\u003e: Vital Signs \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"http://terminology.hl7.org/5.5.0/CodeSystem-observation-category.html\"\u003eObservation Category Codes\u003c/a\u003e#vital-signs)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: BMI \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"https://loinc.org/\"\u003eLOINC\u003c/a\u003e#39156-5 \u0026quot;Body mass index (BMI) [Ratio]\u0026quot;)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"patient-example.html\"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 1999-07-02\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: 16.2 kg/m2\u003cspan style\u003d\"background: LightGoldenRodYellow\"\u003e (Details: UCUM code kg/m2 \u003d \u0027kg/m2\u0027)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ederivedFrom\u003c/b\u003e: \u003c/p\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d\"broken-link.html\"\u003eObservation/bodyheight: Body Height\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d\"observation-example.html\"\u003eObservation/example: Body Weight\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e"},
+            "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d"bmi-using-related"\u003e \u003c/a\u003e\u003ca name\u003d"hcbmi-using-related"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource Observation \u0026quot;bmi-using-related\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: \u003cspan title\u003d"  \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d need to fix vitals to removed fixed value \u0027has-member\u0027 \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\t\u0026lt;meta\u0026gt;\n\t\t\u0026lt;profile value\u003d\u0026quot;http://hl7.org/fhir/StructureDefinition/vitalsigns\u0026quot;/\u0026gt;\n\t\u0026lt;/meta\u0026gt;\n     "\u003efinal\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecategory\u003c/b\u003e: Vital Signs \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"http://terminology.hl7.org/5.5.0/CodeSystem-observation-category.html"\u003eObservation Category Codes\u003c/a\u003e#vital-signs)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: BMI \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"https://loinc.org/"\u003eLOINC\u003c/a\u003e#39156-5 \u0026quot;Body mass index (BMI) [Ratio]\u0026quot;)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"patient-example.html"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 1999-07-02\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: 16.2 kg/m2\u003cspan style\u003d"background: LightGoldenRodYellow"\u003e (Details: UCUM code kg/m2 \u003d \u0027kg/m2\u0027)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ederivedFrom\u003c/b\u003e: \u003c/p\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d"broken-link.html"\u003eObservation/bodyheight: Body Height\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d"observation-example.html"\u003eObservation/example: Body Weight\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e',
+        },
         "status": "final",
         "category": [
             {
@@ -118,35 +144,41 @@ def observation_bmi_dict():
                     {
                         "system": "http://terminology.hl7.org/CodeSystem/observation-category",
                         "code": "vital-signs",
-                        "display": "Vital Signs"}],
-                "text": "Vital Signs"}],
+                        "display": "Vital Signs",
+                    }
+                ],
+                "text": "Vital Signs",
+            }
+        ],
         "code": {
             "coding": [
                 {
                     "system": "http://loinc.org",
                     "code": "39156-5",
-                    "display": "Body mass index (BMI) [Ratio]"}],
-            "text": "BMI"},
-        "subject": {
-            "reference": "Patient/example"},
+                    "display": "Body mass index (BMI) [Ratio]",
+                }
+            ],
+            "text": "BMI",
+        },
+        "subject": {"reference": "Patient/example"},
         "effectiveDateTime": "1999-07-02",
         "valueQuantity": {
             "value": 16.2,
             "unit": "kg/m2",
             "system": "http://unitsofmeasure.org",
-            "code": "kg/m2"},
+            "code": "kg/m2",
+        },
         "derivedFrom": [
-            {
-                "reference": "Observation/bodyheight",
-                "display": "Body Height"},
-            {
-                "reference": "Observation/example",
-                "display": "Body Weight"}]}
+            {"reference": "Observation/bodyheight", "display": "Body Height"},
+            {"reference": "Observation/example", "display": "Body Weight"},
+        ],
+    }
 
 
 # flatteners ------------------------------------------------------------
 # The following functions are used to flatten the FHIR resources.
 
+
 def flatten_simple(self: DomainResource):
     """Convert the DomainResource instance to just an id."""
     return self.id
@@ -161,13 +193,17 @@ def _isodate(v):
 
 def flatten_scalars(self: DomainResource) -> dict:
     """Convert the DomainResource instance to a dictionary."""
-    _ = {k: _isodate(v) for k, v in self.dict().items() if not isinstance(v, (list, dict))}
+    _ = {
+        k: _isodate(v)
+        for k, v in self.dict().items()
+        if not isinstance(v, (list, dict))
+    }
     return _
 
 
 def flatten_references(self: DomainResource) -> dict:
     """Convert the DomainResource instance to a dictionary."""
-    fields = [_ for _ in self.__fields__.keys() if not _.endswith('__ext')]
+    fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")]
     _ = {}
     # if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference
     for k in fields:
@@ -181,14 +217,16 @@ def flatten_references(self: DomainResource) -> dict:
 def flatten_identifier(self: Identifier) -> dict:
     """Convert the Identifier instance to a key value, use a simplified system as key."""
     parsed_url = urlparse(self.system)
-    path_parts = parsed_url.path.split('/')  # e.g. "http://hl7.org/fhir/sid/us-ssn" -> us-ssn
-    key = path_parts[-1] if path_parts else 'identifier'
+    path_parts = parsed_url.path.split(
+        "/"
+    )  # e.g. "http://hl7.org/fhir/sid/us-ssn" -> us-ssn
+    key = path_parts[-1] if path_parts else "identifier"
     return {key: self.value}
 
 
 def flatten_coding(self: Coding) -> dict:
     """Convert the DomainResource instance to a dictionary."""
-    return {'display': self.display}
+    return {"display": self.display}
 
 
 def flatten_scalars_and_references(self: DomainResource) -> dict:
@@ -213,12 +251,12 @@ def flatten_observation(self: Observation) -> dict:
     _ = flatten_scalars_references_identifiers(self)
     # normalize all the valueXXXXX to 'value'
     if self.valueQuantity:
-        _['value'] = f"{self.valueQuantity.value} {self.valueQuantity.unit}"
+        _["value"] = f"{self.valueQuantity.value} {self.valueQuantity.unit}"
     elif self.valueString:
-        _['value'] = self.valueString
-        del _['valueString']
+        _["value"] = self.valueString
+        del _["valueString"]
     elif self.valueCodeableConcept:
-        _['value'] = self.valueCodeableConcept.text
+        _["value"] = self.valueCodeableConcept.text
     # there are many other value types, but we'll ignore them for now
     # see https://build.fhir.org/observation-definitions.html#Observation.value_x_
     # 	Quantity|CodeableConcept|string|boolean|integer|Range|Ratio|SampledData|time|dateTime|Period|Attachment|Reference(MolecularSequence)
@@ -229,6 +267,7 @@ def flatten_observation(self: Observation) -> dict:
 # patchers ------------------------------------------------------------
 # The following fixtures are used to patch the DomainResource class to add the desired method.
 
+
 @pytest.fixture
 def patched_domain_resource_simple() -> bool:
     """Patch the DomainResource class to add a flatten method."""
@@ -284,66 +323,126 @@ def patched_scalars_references_identifiers_observation() -> bool:
 
 # tests ------------------------------------------------------------
 
+
 def test_patient_without_flatten(patient_dict: dict):
     """This patient object should NOT have a 'flatten' method."""
     # without path dependency, just have a plain patient object with no flatten method
     patient = Patient.parse_obj(patient_dict)
-    assert not hasattr(patient, 'flatten'), "Patient object should not have a 'flatten' method"
+    assert not hasattr(
+        patient, "flatten"
+    ), "Patient object should not have a 'flatten' method"
 
 
 def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict):
     """This patient object should have a 'flatten' method."""
     patient = Patient.parse_obj(patient_dict)
-    assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method"
-    assert patient.flatten() == patient.id, f"Patient.flatten() should return {patient.id}"
+    assert hasattr(
+        patient, "flatten"
+    ), "Patient object does not have a 'flatten' method"
+    assert (
+        patient.flatten() == patient.id
+    ), f"Patient.flatten() should return {patient.id}"
 
 
 def test_patient_with_scalars(patched_scalars: bool, patient_dict: dict):
     """This patient object should have a 'flatten' method that returns a dict of scalar values."""
     patient = Patient.parse_obj(patient_dict)
-    assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method"
-    assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'resourceType': 'Patient'}, "Patient.flatten() should return a dict of all scalar values"
-
-
-def test_patient_with_scalars_and_references(patched_scalars_and_references: bool, patient_dict: dict):
+    assert hasattr(
+        patient, "flatten"
+    ), "Patient object does not have a 'flatten' method"
+    assert patient.flatten() == {
+        "active": True,
+        "gender": "female",
+        "id": "3",
+        "resourceType": "Patient",
+    }, "Patient.flatten() should return a dict of all scalar values"
+
+
+def test_patient_with_scalars_and_references(
+    patched_scalars_and_references: bool, patient_dict: dict
+):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
     patient = Patient.parse_obj(patient_dict)
-    assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method"
-    assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'managingOrganization': 'Organization/hl7', 'resourceType': 'Patient'}, "Patient.flatten() should return a dict of all scalar values and references"
-
-
-def test_patient_with_scalars_references_identifiers(patched_scalars_references_identifiers: bool, patient_dict: dict):
+    assert hasattr(
+        patient, "flatten"
+    ), "Patient object does not have a 'flatten' method"
+    assert patient.flatten() == {
+        "active": True,
+        "gender": "female",
+        "id": "3",
+        "managingOrganization": "Organization/hl7",
+        "resourceType": "Patient",
+    }, "Patient.flatten() should return a dict of all scalar values and references"
+
+
+def test_patient_with_scalars_references_identifiers(
+    patched_scalars_references_identifiers: bool, patient_dict: dict
+):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
     patient = Patient.parse_obj(patient_dict)
-    assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method"
-    assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'managingOrganization': 'Organization/hl7', 'resourceType': 'Patient', 'us-ssn': '444555555'}, "Patient.flatten() should return a dict of all scalar values and references"
-
-
-def test_specimen_with_scalars_references_identifiers(patched_scalars_references_identifiers: bool, specimen_dict: dict):
+    assert hasattr(
+        patient, "flatten"
+    ), "Patient object does not have a 'flatten' method"
+    assert patient.flatten() == {
+        "active": True,
+        "gender": "female",
+        "id": "3",
+        "managingOrganization": "Organization/hl7",
+        "resourceType": "Patient",
+        "us-ssn": "444555555",
+    }, "Patient.flatten() should return a dict of all scalar values and references"
+
+
+def test_specimen_with_scalars_references_identifiers(
+    patched_scalars_references_identifiers: bool, specimen_dict: dict
+):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
     specimen = Specimen.parse_obj(specimen_dict)
-    assert hasattr(specimen, 'flatten'), "Specimen object does not have a 'flatten' method"
-    assert specimen.flatten() == {'resourceType': 'Specimen', 'id': 'denovo-3', 'status': 'available',
-                                  'receivedTime': '2021-01-01T01:01:01+00:00',
-                                  'subject': 'Patient/denovoFather', 'specimens': '3'}
+    assert hasattr(
+        specimen, "flatten"
+    ), "Specimen object does not have a 'flatten' method"
+    assert specimen.flatten() == {
+        "resourceType": "Specimen",
+        "id": "denovo-3",
+        "status": "available",
+        "receivedTime": "2021-01-01T01:01:01+00:00",
+        "subject": "Patient/denovoFather",
+        "specimens": "3",
+    }
 
 
-def test_eye_color_observation(patched_scalars_references_identifiers_observation: bool, observation_eye_color_dict: dict):
+def test_eye_color_observation(
+    patched_scalars_references_identifiers_observation: bool,
+    observation_eye_color_dict: dict,
+):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
     observation = Observation.parse_obj(observation_eye_color_dict)
-    assert hasattr(observation, 'flatten'), "Observation object does not have a 'flatten' method"
-    assert observation.flatten() == {'resourceType': 'Observation', 'id': 'eye-color', 'status': 'final',
-                                     'effectiveDateTime': '2016-05-18', 'value': 'blue',
-                                     'subject': 'Patient/example'}
+    assert hasattr(
+        observation, "flatten"
+    ), "Observation object does not have a 'flatten' method"
+    assert observation.flatten() == {
+        "resourceType": "Observation",
+        "id": "eye-color",
+        "status": "final",
+        "effectiveDateTime": "2016-05-18",
+        "value": "blue",
+        "subject": "Patient/example",
+    }
 
 
-def test_bmi_observation(patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict):
+def test_bmi_observation(
+    patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict
+):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
     observation = Observation.parse_obj(observation_bmi_dict)
-    assert hasattr(observation, 'flatten'), "Observation object does not have a 'flatten' method"
-    assert observation.flatten() == {'effectiveDateTime': '1999-07-02',
-                                     'id': 'bmi-using-related',
-                                     'resourceType': 'Observation',
-                                     'status': 'final',
-                                     'subject': 'Patient/example',
-                                     'value': '16.2 kg/m2'}
+    assert hasattr(
+        observation, "flatten"
+    ), "Observation object does not have a 'flatten' method"
+    assert observation.flatten() == {
+        "effectiveDateTime": "1999-07-02",
+        "id": "bmi-using-related",
+        "resourceType": "Observation",
+        "status": "final",
+        "subject": "Patient/example",
+        "value": "16.2 kg/m2",
+    }
diff --git a/tests/unit/test_hash_types.py b/tests/unit/test_hash_types.py
index ab51944c..68559e6f 100644
--- a/tests/unit/test_hash_types.py
+++ b/tests/unit/test_hash_types.py
@@ -5,20 +5,20 @@
 from gen3_tracker.git import DVCItem
 
 VALID_HASHES = {
-    'md5': 'acbd18db4cc2f85cedef654fccc4a4d8',
-    'sha1': '2ef7bde608ce5404e97d5f042f95f89f1c232871',
-    'sha256': '5bf8aa57fc5a6bc547decf1cc6db63f10deb55a3c6c5df497d631fb3d95e1abf',
-    'sha512': '3ba2942ed1d05551d4360a2a7bb6298c2359061dc07b368949bd3fb7feca3344221257672d772ce456075b7cfa50fd7ce41eaefe529d056bf23dd665de668b78',
-    'crc': '3e25960a',
-    'etag': 'acbd18db4cc2f85cedef654fccc4a4d8-3'
+    "md5": "acbd18db4cc2f85cedef654fccc4a4d8",
+    "sha1": "2ef7bde608ce5404e97d5f042f95f89f1c232871",
+    "sha256": "5bf8aa57fc5a6bc547decf1cc6db63f10deb55a3c6c5df497d631fb3d95e1abf",
+    "sha512": "3ba2942ed1d05551d4360a2a7bb6298c2359061dc07b368949bd3fb7feca3344221257672d772ce456075b7cfa50fd7ce41eaefe529d056bf23dd665de668b78",
+    "crc": "3e25960a",
+    "etag": "acbd18db4cc2f85cedef654fccc4a4d8-3",
 }
 
 
 def test_invalid_hash_values():
     """Test that invalid hash values raise a ValidationError."""
     for hash_type in ACCEPTABLE_HASHES.keys():
-        _ = dict(hash=hash_type, modified='2013-07-01T16:10-04:00', path='dddd', size=1)
-        _[hash_type] = 'foo'
+        _ = dict(hash=hash_type, modified="2013-07-01T16:10-04:00", path="dddd", size=1)
+        _[hash_type] = "foo"
         print(_)
         with pytest.raises(ValidationError):
             item = DVCItem(**_)
@@ -28,7 +28,7 @@ def test_invalid_hash_values():
 def test_valid_hash_values():
     """Test that valid hash values do raise a ValidationError."""
     for hash_type in VALID_HASHES.keys():
-        _ = dict(hash=hash_type, modified='2013-07-01T16:10-04:00', path='dddd', size=1)
+        _ = dict(hash=hash_type, modified="2013-07-01T16:10-04:00", path="dddd", size=1)
         _[hash_type] = VALID_HASHES[hash_type]
         print(_)
         item = DVCItem(**_)
diff --git a/tests/unit/test_read_dvc.py b/tests/unit/test_read_dvc.py
index 7feef182..c9a5c231 100644
--- a/tests/unit/test_read_dvc.py
+++ b/tests/unit/test_read_dvc.py
@@ -3,15 +3,23 @@
 
 
 def test_read_dvc(data_path: Path):
-    dvc = to_dvc(data_path / 'hello.txt.dvc')
+    dvc = to_dvc(data_path / "hello.txt.dvc")
     assert dvc
     assert dvc.outs
-    assert dvc.outs[0].path == 'my-project-data/hello.txt'
+    assert dvc.outs[0].path == "my-project-data/hello.txt"
 
 
 def test_read_dvc_item():
-    _ = {'hash': 'md5', 'is_symlink': False, 'md5': 'b1946ac92492d2347c6235b4d2611184', 'mime': 'text/plain', 'modified': '2024-04-30T17:46:30.819143+00:00',
-         'path': 'my-project-data/hello.txt', 'realpath': '/Users/walsbr/aced/g3t-git/attic/cbds-test39/my-project-data/hello.txt', 'size': 6}
+    _ = {
+        "hash": "md5",
+        "is_symlink": False,
+        "md5": "b1946ac92492d2347c6235b4d2611184",
+        "mime": "text/plain",
+        "modified": "2024-04-30T17:46:30.819143+00:00",
+        "path": "my-project-data/hello.txt",
+        "realpath": "/Users/walsbr/aced/g3t-git/attic/cbds-test39/my-project-data/hello.txt",
+        "size": 6,
+    }
     item = DVCItem(**_)
     assert item
-    assert item.hash == 'md5'
+    assert item.hash == "md5"

From 2cb2feb138aff124c258f107ccfbfb420608a070 Mon Sep 17 00:00:00 2001
From: quinnwai <quinnwai.wong@gmail.com>
Date: Mon, 2 Dec 2024 13:00:20 -0800
Subject: [PATCH 03/18] ensure dataframer unit tests pass

---
 tests/unit/dataframer/test_dataframer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/unit/dataframer/test_dataframer.py b/tests/unit/dataframer/test_dataframer.py
index 87a51f22..422665f6 100644
--- a/tests/unit/dataframer/test_dataframer.py
+++ b/tests/unit/dataframer/test_dataframer.py
@@ -93,6 +93,7 @@ def simplified_resources(
             "category": "Laboratory",
             "sample_type": "Primary Solid Tumor",
             "library_id": "12345",
+            "observation_code": "sample type abc",
             "tissue_type": "Tumor",
             "treatments": "Trastuzumab",
             "allocated_for_site": "TEST Clinical Research",
@@ -284,6 +285,7 @@ def specimen_row(simplified_resources, specimen_key):
         "tissue_type": "Tumor",
         "treatments": "Trastuzumab",
         "allocated_for_site": "TEST Clinical Research",
+        "observation_code": "sample type abc",
         "indexed_collection_date": "365",
         "biopsy_specimens": "specimenA, specimenB, specimenC",
         "biopsy_procedure_type": "Biopsy - Core",

From 4a71cf1f57755610e3516e8e9b4abcfbda5e094e Mon Sep 17 00:00:00 2001
From: matthewpeterkort <matthewpeterkort@gmail.com>
Date: Mon, 2 Dec 2024 13:34:55 -0800
Subject: [PATCH 04/18] fix test

---
 tests/integration/test_end_to_end_workflow.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 6237edab..d4c5f407 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     result = run(
         runner,
         ["push", "--skip_validate", "--overwrite"],
-        expected_exit_code=1,
+        expected_exit_code=0    ,
         expected_files=[log_file_path],
     )
 
@@ -316,6 +316,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     with open(log_file_path, "r") as log_file:
         lines = log_file.readlines()
         str_lines = str(lines)
+        print("log lines: ", str_lines)
 
         for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
             assert (

From 885bfce2e0828c37724fcd63f33a73ae74678a46 Mon Sep 17 00:00:00 2001
From: matthewpeterkort <matthewpeterkort@gmail.com>
Date: Tue, 3 Dec 2024 14:45:09 -0800
Subject: [PATCH 05/18] fix test to work with new output

---
 tests/integration/test_end_to_end_workflow.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index d4c5f407..911ffc2e 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     result = run(
         runner,
         ["push", "--skip_validate", "--overwrite"],
-        expected_exit_code=0    ,
+        expected_exit_code=1    ,
         expected_files=[log_file_path],
     )
 
@@ -316,8 +316,6 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     with open(log_file_path, "r") as log_file:
         lines = log_file.readlines()
         str_lines = str(lines)
-        print("log lines: ", str_lines)
-
         for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
             assert (
                 keyword in str_lines

From 6d46e966914e362e94a1a24e00b1b91ff6ec2d2c Mon Sep 17 00:00:00 2001
From: quinnwai <quinnwai.wong@gmail.com>
Date: Tue, 3 Dec 2024 14:52:18 -0800
Subject: [PATCH 06/18] fix spacing

---
 tests/integration/test_end_to_end_workflow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 911ffc2e..5fdca0c0 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     result = run(
         runner,
         ["push", "--skip_validate", "--overwrite"],
-        expected_exit_code=1    ,
+        expected_exit_code=1,
         expected_files=[log_file_path],
     )
 

From c6a9a4213a7133c116792a9a610387769f9b6613 Mon Sep 17 00:00:00 2001
From: Brian Walsh <brian@bwalsh.com>
Date: Mon, 2 Dec 2024 17:37:34 -0800
Subject: [PATCH 07/18] test improved validation

---
 tests/unit/test_validate_float_to_int.py | 123 +++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 tests/unit/test_validate_float_to_int.py

diff --git a/tests/unit/test_validate_float_to_int.py b/tests/unit/test_validate_float_to_int.py
new file mode 100644
index 00000000..d10fc83c
--- /dev/null
+++ b/tests/unit/test_validate_float_to_int.py
@@ -0,0 +1,123 @@
+import pytest
+from fhir.resources.observation import Observation
+from fhir.resources.patient import Patient
+from pydantic import ValidationError
+
+
+def test_validate_observation():
+    """Test validate observation."""
+    false = False
+    observation_dict = {
+        "resourceType": "Observation",
+        "id": "9d11e26b-0307-5573-aee8-d145bdc259f3",
+        "status": "final",
+        "category": [
+            {
+                "coding": [
+                    {
+                        "system": "http://terminology.hl7.org/CodeSystem/observation-category",
+                        "code": "laboratory",
+                        "display": "Laboratory"
+                    }
+                ]
+            }
+        ],
+        "code": {
+            "coding": [
+                {
+                    "system": "http://loinc.org",
+                    "code": "81247-9",
+                    "display": "Master HL7 genetic variant reporting panel"
+                }
+            ]
+        },
+        "subject": {
+            "reference": "Patient/16244c6a-028a-5d8b-ac80-22e7b870544b"
+        },
+        "specimen": {
+            "reference": "Specimen/f7f2ceb6-53f3-561a-960d-0c47700c14a2"
+        },
+        "focus": [
+            {
+                "reference": "Specimen/f7f2ceb6-53f3-561a-960d-0c47700c14a2"
+            }
+        ],
+        "effectiveDateTime": "2024-06-03T08:00:00+00:00",
+        "valueString": "Sequencing parameters",
+        "component": [
+            {
+                "code": {
+                    "coding": [
+                        {
+                            "system": "https://cadsr.cancer.gov/sample_laboratory_observation",
+                            "code": "weight",
+                            "display": "weight"
+                        }
+                    ],
+                    "text": "weight"
+                },
+                "valueInteger": 32.9
+            },
+            {
+                "code": {
+                    "coding": [
+                        {
+                            "system": "https://cadsr.cancer.gov/sample_laboratory_observation",
+                            "code": "is_ffpe",
+                            "display": "is_ffpe"
+                        }
+                    ],
+                    "text": "is_ffpe"
+                },
+                "valueBoolean": false
+            },
+            {
+                "code": {
+                    "coding": [
+                        {
+                            "system": "https://cadsr.cancer.gov/sample_laboratory_observation",
+                            "code": "sample_type",
+                            "display": "sample_type"
+                        }
+                    ],
+                    "text": "sample_type"
+                },
+                "valueString": "Solid Tissue Normal"
+            },
+            {
+                "code": {
+                    "coding": [
+                        {
+                            "system": "https://cadsr.cancer.gov/sample_laboratory_observation",
+                            "code": "updated_datetime",
+                            "display": "updated_datetime"
+                        }
+                    ],
+                    "text": "updated_datetime"
+                },
+                "valueDateTime": "2018-09-06T17:41:51.247648-05:00"
+            }
+        ]
+    }
+    observation_dict['component'][0]['valueInteger'] = 32.0
+    observation = Observation.model_validate(observation_dict)
+
+    assert observation, "Should have accepted valueInteger: 32.0"
+
+    observation_dict['component'][0]['valueInteger'] = 32.9
+
+    with pytest.raises(ValidationError):
+        Observation.model_validate(observation_dict)
+
+
+def test_patient():
+    with pytest.raises(ValidationError):
+        patient_dict = {"multipleBirthInteger": 32.9}
+        patient = Patient.model_validate(patient_dict)
+        assert patient.multipleBirthInteger == 32.9, "Should not have accepted multipleBirthInteger: 32.9"
+
+    patient_dict = {"multipleBirthInteger": 32.0}
+    Patient.model_validate(patient_dict)
+
+    patient_dict = {"multipleBirthInteger": 32}
+    Patient.model_validate(patient_dict)

From e8dfca71bd9c08370f390b3d95bef8fc5fcc6756 Mon Sep 17 00:00:00 2001
From: Brian Walsh <brian@bwalsh.com>
Date: Mon, 2 Dec 2024 18:18:22 -0800
Subject: [PATCH 08/18] ensure monkey patches no longer necessary

---
 ...t_deprecated_pydantic_v1_monkey_patches.py | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 tests/unit/test_deprecated_pydantic_v1_monkey_patches.py

diff --git a/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py b/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py
new file mode 100644
index 00000000..895786b5
--- /dev/null
+++ b/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py
@@ -0,0 +1,52 @@
+import pathlib
+from typing import Annotated
+
+import pydantic
+from fhir.resources.attachment import Attachment
+from pydantic import UrlConstraints, AnyUrl
+
+
+def test_validate_any_url():
+
+    class MyModel(pydantic.BaseModel):
+        url: Annotated[AnyUrl, UrlConstraints(host_required=False)]
+
+    _ = MyModel(url='file:///foo/bar')
+    assert _, "file:///foo/bar is a valid file url"
+    assert _.url.host is None, "file:///foo/bar has no host"
+
+    _ = MyModel(url='xxx:///XXXX')
+    assert _, "file:///foo/bar is a valid file url"
+    assert _.url.host is None, "file:///foo/bar has no host"
+
+
+def test_fhir_url():
+    """Previously a monkey patch was used to enable file urls.  Any xs:anyURI is now allowed. See https://w3.org/TR/xmlschema-2/#anyURI
+    From https://hl7.org/fhir/datatypes.html#url (This regex is very permissive, but URIs must be valid. Implementers are welcome to use more specific regex statements for a URI in specific contexts)"""
+    attachment: Attachment = Attachment(url='file:///foo/bar')
+    assert attachment.validate_after_model_construction()
+
+    attachment: Attachment = Attachment.model_validate({'url': 'file:///foo/bar'})
+    assert attachment
+
+    attachment: Attachment = Attachment.model_validate({'url': 'xxx:///XXXX'})
+    assert attachment
+
+    attachment: Attachment = Attachment.model_validate({'url': 'FOO BAR'})
+    assert attachment
+
+
+def test_path_encoders():
+    """Previously a monkey patch was used to enable correct serialization of path objects"""
+    # eg
+    # # default initializers for path
+    # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.PosixPath] = str
+    # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.WindowsPath] = str
+    # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.Path] = str
+
+    class MyModel(pydantic.BaseModel):
+        path: pathlib.Path
+
+    _ = MyModel(path=pathlib.Path('/foo/bar'))
+    assert _, "/foo/bar is a valid path"
+    _.model_dump()['path'] == '/foo/bar'

From 46d58aa6f42e646bd7c3f53105a2eceeeeaa95fd Mon Sep 17 00:00:00 2001
From: Brian Walsh <brian@bwalsh.com>
Date: Mon, 2 Dec 2024 18:19:20 -0800
Subject: [PATCH 09/18] datetimes w/out time now return XXXXT00:00:00

---
 tests/unit/test_flatten_fhir_example.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py
index 9b923d88..0ea020e5 100644
--- a/tests/unit/test_flatten_fhir_example.py
+++ b/tests/unit/test_flatten_fhir_example.py
@@ -195,7 +195,7 @@ def flatten_scalars(self: DomainResource) -> dict:
     """Convert the DomainResource instance to a dictionary."""
     _ = {
         k: _isodate(v)
-        for k, v in self.dict().items()
+        for k, v in self.model_dump().items()
         if not isinstance(v, (list, dict))
     }
     return _
@@ -203,7 +203,7 @@ def flatten_scalars(self: DomainResource) -> dict:
 
 def flatten_references(self: DomainResource) -> dict:
     """Convert the DomainResource instance to a dictionary."""
-    fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")]
+    fields = [_ for _ in self.model_fields.keys() if not _.endswith("__ext")]
     _ = {}
     # if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference
     for k in fields:
@@ -327,7 +327,7 @@ def patched_scalars_references_identifiers_observation() -> bool:
 def test_patient_without_flatten(patient_dict: dict):
     """This patient object should NOT have a 'flatten' method."""
     # without path dependency, just have a plain patient object with no flatten method
-    patient = Patient.parse_obj(patient_dict)
+    patient = Patient.model_validate(patient_dict)
     assert not hasattr(
         patient, "flatten"
     ), "Patient object should not have a 'flatten' method"
@@ -335,7 +335,7 @@ def test_patient_without_flatten(patient_dict: dict):
 
 def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict):
     """This patient object should have a 'flatten' method."""
-    patient = Patient.parse_obj(patient_dict)
+    patient = Patient.model_validate(patient_dict)
     assert hasattr(
         patient, "flatten"
     ), "Patient object does not have a 'flatten' method"
@@ -346,7 +346,7 @@ def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict:
 
 def test_patient_with_scalars(patched_scalars: bool, patient_dict: dict):
     """This patient object should have a 'flatten' method that returns a dict of scalar values."""
-    patient = Patient.parse_obj(patient_dict)
+    patient = Patient.model_validate(patient_dict)
     assert hasattr(
         patient, "flatten"
     ), "Patient object does not have a 'flatten' method"
@@ -362,7 +362,7 @@ def test_patient_with_scalars_and_references(
     patched_scalars_and_references: bool, patient_dict: dict
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    patient = Patient.parse_obj(patient_dict)
+    patient = Patient.model_validate(patient_dict)
     assert hasattr(
         patient, "flatten"
     ), "Patient object does not have a 'flatten' method"
@@ -379,7 +379,7 @@ def test_patient_with_scalars_references_identifiers(
     patched_scalars_references_identifiers: bool, patient_dict: dict
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    patient = Patient.parse_obj(patient_dict)
+    patient = Patient.model_validate(patient_dict)
     assert hasattr(
         patient, "flatten"
     ), "Patient object does not have a 'flatten' method"
@@ -397,7 +397,7 @@ def test_specimen_with_scalars_references_identifiers(
     patched_scalars_references_identifiers: bool, specimen_dict: dict
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    specimen = Specimen.parse_obj(specimen_dict)
+    specimen = Specimen.model_validate(specimen_dict)
     assert hasattr(
         specimen, "flatten"
     ), "Specimen object does not have a 'flatten' method"
@@ -416,7 +416,7 @@ def test_eye_color_observation(
     observation_eye_color_dict: dict,
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    observation = Observation.parse_obj(observation_eye_color_dict)
+    observation = Observation.model_validate(observation_eye_color_dict)
     assert hasattr(
         observation, "flatten"
     ), "Observation object does not have a 'flatten' method"
@@ -424,7 +424,7 @@ def test_eye_color_observation(
         "resourceType": "Observation",
         "id": "eye-color",
         "status": "final",
-        "effectiveDateTime": "2016-05-18",
+        "effectiveDateTime": "2016-05-18T00:00:00",
         "value": "blue",
         "subject": "Patient/example",
     }
@@ -434,12 +434,12 @@ def test_bmi_observation(
     patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    observation = Observation.parse_obj(observation_bmi_dict)
+    observation = Observation.model_validate(observation_bmi_dict)
     assert hasattr(
         observation, "flatten"
     ), "Observation object does not have a 'flatten' method"
     assert observation.flatten() == {
-        "effectiveDateTime": "1999-07-02",
+        "effectiveDateTime": "1999-07-02T00:00:00",
         "id": "bmi-using-related",
         "resourceType": "Observation",
         "status": "final",

From 23b89786736f3e31ab913ed7452b51dc2d4bdc97 Mon Sep 17 00:00:00 2001
From: Brian Walsh <brian@bwalsh.com>
Date: Mon, 2 Dec 2024 18:21:51 -0800
Subject: [PATCH 10/18] fhir.resources==8

---
 gen3_tracker/__init__.py        | 35 +--------------------------------
 gen3_tracker/common/__init__.py |  8 ++++----
 gen3_tracker/git/cli.py         |  2 +-
 gen3_tracker/meta/__init__.py   | 12 +++++------
 gen3_tracker/meta/skeleton.py   | 17 ++++++++--------
 gen3_tracker/meta/validator.py  |  4 ++--
 requirements.txt                |  2 +-
 7 files changed, 23 insertions(+), 57 deletions(-)

diff --git a/gen3_tracker/__init__.py b/gen3_tracker/__init__.py
index 4e860e2f..f3470b98 100644
--- a/gen3_tracker/__init__.py
+++ b/gen3_tracker/__init__.py
@@ -5,14 +5,12 @@
 import typing
 import uuid
 from collections import OrderedDict
-from typing import Union, Optional
+from typing import Optional
 
 import click
-import pydantic
 from click import Context, Command
 from pydantic import BaseModel, field_validator
 
-
 ACED_NAMESPACE = uuid.uuid3(uuid.NAMESPACE_DNS, b'aced-idp.org')
 ENV_VARIABLE_PREFIX = 'G3T_'
 
@@ -24,28 +22,6 @@
 }
 
 
-def monkey_patch_url_validate():
-    # monkey patch to allow file: urls
-    import fhir.resources.fhirtypes
-    from pydantic import FileUrl
-
-    original_url_validate = fhir.resources.fhirtypes.Url.validate
-
-    @classmethod
-    def better_url_validate(cls, value: str, field: "ModelField", config: "BaseConfig") -> Union["AnyUrl", str]:    # noqa
-        """Allow file: urls. see https://github.com/pydantic/pydantic/issues/1983
-        bugfix: addresses issue introduced with `fhir.resources`==7.0.1
-        """
-        if value.startswith("file:"):
-            _ = FileUrl(value)
-            return value
-            # return FileUrl.validate(value, field, config)
-        value = original_url_validate(value, field, config)
-        return value
-
-    fhir.resources.fhirtypes.Url.validate = better_url_validate
-
-
 class LogConfig(BaseModel):
     format: str
     """https://docs.python.org/3/library/logging.html#logging.Formatter"""
@@ -177,12 +153,3 @@ def resolve_command(
                 # os._exit(1)  # noqa
 
             raise e
-
-
-# main
-monkey_patch_url_validate()
-
-# default initializers for path
-pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.PosixPath] = str
-pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.WindowsPath] = str
-pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.Path] = str
diff --git a/gen3_tracker/common/__init__.py b/gen3_tracker/common/__init__.py
index 5aec35a7..35bbf3db 100644
--- a/gen3_tracker/common/__init__.py
+++ b/gen3_tracker/common/__init__.py
@@ -321,7 +321,7 @@ def create_resource_id(resource, project_id) -> str:
     assert resource, "resource required"
     assert project_id, "project_id required"
     identifier_string = identifier_to_string(resource.identifier)
-    return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.resource_type}/{identifier_string}"))
+    return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.get_resource_type()}/{identifier_string}"))
 
 
 def create_object_id(path: str, project_id: str) -> str:
@@ -344,7 +344,7 @@ def assert_valid_id(resource, project_id):
     """Ensure that the id is correct."""
     assert resource, "resource required"
     assert project_id, "project_id required"
-    if resource.resource_type == "DocumentReference":
+    if resource.get_resource_type() == "DocumentReference":
         document_reference: DocumentReference = resource
         official_identifier = document_reference.content[0].attachment.url
         recreate_id = create_object_id(official_identifier, project_id)
@@ -354,7 +354,7 @@ def assert_valid_id(resource, project_id):
         recreate_id = create_resource_id(resource, project_id)
     if resource.id == recreate_id:
         return
-    msg = f"The current {resource.resource_type}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.resource_type}:{official_identifier}"
+    msg = f"The current {resource.get_resource_type()}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.get_resource_type()}:{official_identifier}"
     raise Exception(msg)
 
 
@@ -523,7 +523,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
             elif hasattr(self.output.obj, 'model_dump'):
                 _.update(self.output.obj.model_dump())
             else:
-                _.update(self.output.obj.dict())
+                _.update(self.output.obj.model_dump())
         rc = self.output.exit_code
         if exc_type is not None:
             if isinstance(self.output.obj, dict):
diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py
index c0222bd8..bc932d77 100644
--- a/gen3_tracker/git/cli.py
+++ b/gen3_tracker/git/cli.py
@@ -677,7 +677,7 @@ def push(
                 bundle.entry.append(bundle_entry)
 
             headers = {"Authorization": f"{auth._access_token}"}
-            bundle_dict = bundle.dict()
+            bundle_dict = bundle.model_dump()
             with Halo(
                 text="Sending to FHIR Server",
                 spinner="line",
diff --git a/gen3_tracker/meta/__init__.py b/gen3_tracker/meta/__init__.py
index 963d5abf..256dfcc5 100644
--- a/gen3_tracker/meta/__init__.py
+++ b/gen3_tracker/meta/__init__.py
@@ -47,10 +47,10 @@ def parse_obj(resource: dict, validate=True) -> ParseResult:
     try:
         assert 'resourceType' in resource, "Dict missing `resourceType`, is it a FHIR dict?"
         klass = FHIR_CLASSES.get_fhir_model_class(resource['resourceType'])
-        _ = klass.parse_obj(resource)
+        _ = klass.model_validate(resource)
         if validate:
             # trigger object traversal, see monkey patch below, at bottom of file
-            _.dict()
+            _.model_dump()
         return ParseResult(resource=_, exception=None, path=None, resource_id=_.id)
     except (ValidationError, AssertionError) as e:
         return ParseResult(resource=None, exception=e, path=None, resource_id=resource.get('id', None))
@@ -68,11 +68,11 @@ def _entry_iterator(parse_result: ParseResult) -> Iterator[ParseResult]:
                 if _ is None:
                     break
                 if hasattr(_, 'resource') and _.resource:  # BundleEntry
-                    yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.dict())
+                    yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.model_dump())
                 elif hasattr(_, 'item'):  # ListEntry
-                    yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict())
+                    yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump())
                 else:
-                    yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict())
+                    yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump())
                 offset += 1
     pass
 
@@ -81,7 +81,7 @@ def _has_entries(_: ParseResult):
     """FHIR types Bundles List have entries"""
     if _.resource is None:
         return False
-    return _.resource.resource_type in ["List"] and _.resource.entry is not None
+    return _.resource.get_resource_type() in ["List"] and _.resource.entry is not None
 
 
 def directory_reader(directory_path: str,
diff --git a/gen3_tracker/meta/skeleton.py b/gen3_tracker/meta/skeleton.py
index 2e5df9ff..e3531885 100644
--- a/gen3_tracker/meta/skeleton.py
+++ b/gen3_tracker/meta/skeleton.py
@@ -7,8 +7,7 @@
 import orjson
 from fhir.resources.attachment import Attachment
 from fhir.resources.bundle import Bundle, BundleEntry, BundleEntryRequest
-from fhir.resources.documentreference import DocumentReference
-from fhir.resources.fhirtypes import DocumentReferenceContentType
+from fhir.resources.documentreference import DocumentReference, DocumentReferenceContent
 from fhir.resources.identifier import Identifier
 from fhir.resources.observation import Observation
 from fhir.resources.operationoutcome import OperationOutcome
@@ -74,7 +73,7 @@ def get_data_from_meta() -> Generator[int, None, None]:
 
 def update_document_reference(document_reference: DocumentReference, dvc_data: DVC):
     """Update document reference with index record."""
-    assert document_reference.resource_type == 'DocumentReference'
+    assert document_reference.get_resource_type() == 'DocumentReference'
     assert dvc_data.out.object_id == document_reference.id, f"{dvc_data['did']} != {document_reference.id}"
     assert dvc_data.out.modified, f"dvc_data missing modified: {dvc_data}"
     document_reference.docStatus = 'final'
@@ -108,7 +107,7 @@ def update_document_reference(document_reference: DocumentReference, dvc_data: D
     attachment.title = pathlib.Path(dvc_data.out.path).name
     attachment.creation = dvc_data.out.modified
 
-    content = DocumentReferenceContentType(attachment=attachment)
+    content = DocumentReferenceContent(attachment=attachment)
 
     document_reference.content = [content]
 
@@ -289,10 +288,10 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]:
         for _ in dvc_data(dvc_files):
             resources = create_skeleton(_, project_id, meta_index())
             for resource in resources:
-                key = f"{resource.resource_type}/{resource.id}"
+                key = f"{resource.get_resource_type()}/{resource.id}"
                 if key not in emitted_already:
-                    emitter.emit(resource.resource_type).write(
-                        resource.json(option=orjson.OPT_APPEND_NEWLINE)
+                    emitter.emit(resource.get_resource_type()).write(
+                        resource.model_dump_json() + '\n'
                     )
                     emitted_already.append(key)
 
@@ -317,8 +316,8 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]:
             bundle.entry.append(bundle_entry)
 
         with EmitterContextManager('META') as emitter:
-            emitter.emit(bundle.resource_type, file_mode='a').write(
-                bundle.json(option=orjson.OPT_APPEND_NEWLINE)
+            emitter.emit(bundle.get_resource_type(), file_mode='a').write(
+                bundle.model_dump_json() + '\n'
             )
 
     after_meta_files = [_ for _ in pathlib.Path('META').glob('*.ndjson')]
diff --git a/gen3_tracker/meta/validator.py b/gen3_tracker/meta/validator.py
index 6630c196..941e2d3f 100644
--- a/gen3_tracker/meta/validator.py
+++ b/gen3_tracker/meta/validator.py
@@ -98,14 +98,14 @@ def validate(directory_path: pathlib.Path, project_id=None) -> ValidateDirectory
             continue
 
         _ = parse_result.resource
-        ids.append(f"{_.resource_type}/{_.id}")
+        ids.append(f"{_.get_resource_type()}/{_.id}")
         nested_references = nested_lookup('reference', parse_result.json_obj)
         # https://www.hl7.org/fhir/medicationrequest-definitions.html#MedicationRequest.medication
         # is a reference to a Medication resource https://www.hl7.org/fhir/references.html#CodeableReference
         # so it has a reference.reference form, strip it out
         nested_references = [_ for _ in nested_references if isinstance(_, str)]
         references.extend(nested_references)
-        resources[parse_result.resource.resource_type] += 1
+        resources[parse_result.resource.get_resource_type()] += 1
 
     # assert references exist
     references = set(references)
diff --git a/requirements.txt b/requirements.txt
index 7005d471..ae767ae9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ halo
 tqdm
 deepdiff
 
-fhir.resources==7.1.0  # FHIR Model
+fhir.resources==8.0.0b4  # FHIR Model Pre-release
 orjson
 nested_lookup
 

From 54d29553b118f8554d35ffcd43eba532b7f336f8 Mon Sep 17 00:00:00 2001
From: matthewpeterkort <matthewpeterkort@gmail.com>
Date: Wed, 4 Dec 2024 11:14:24 -0800
Subject: [PATCH 11/18] Initial checkin med admin tab

---
 gen3_tracker/meta/cli.py        |  2 +-
 gen3_tracker/meta/dataframer.py | 24 ++++++++++++++++++++++++
 gen3_tracker/meta/entities.py   | 31 +++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/gen3_tracker/meta/cli.py b/gen3_tracker/meta/cli.py
index 3dd37bfd..9462c7c3 100644
--- a/gen3_tracker/meta/cli.py
+++ b/gen3_tracker/meta/cli.py
@@ -103,7 +103,7 @@ def render_graph(config: Config, directory_path: str, output_path: str, browser:
 @meta.command("dataframe")
 @click.argument('data_type',
                 required=True,
-                type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject']),
+                type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject', "MedicationAdministration"]),
                 default=None)
 @click.argument("directory_path",
                 type=click.Path(exists=True, file_okay=False),
diff --git a/gen3_tracker/meta/dataframer.py b/gen3_tracker/meta/dataframer.py
index c7d1cd3f..1cd2129d 100644
--- a/gen3_tracker/meta/dataframer.py
+++ b/gen3_tracker/meta/dataframer.py
@@ -504,6 +504,28 @@ def flattened_research_subjects(self) -> Generator[dict, None, None]:
 
             yield flat_research_subject
 
+
+    def flattened_medication_administrations(self) -> Generator[dict, None, None]:
+
+         # get all MedicationAdministrations
+         cursor = self.connect()
+         cursor.execute(
+             "SELECT * FROM resources where resource_type = ?", ("MedicationAdministration",)
+         )
+
+         # get research subject and associated .subject patient
+         for _, _, raw_medication_administration in cursor.fetchall():
+             medication_administration = json.loads(raw_medication_administration)
+             flat_medication_administration = SimplifiedResource.build(
+                 resource=medication_administration
+             ).simplified
+
+             patient = get_subject(self, medication_administration)
+             flat_medication_administration.update(patient)
+
+             yield flat_medication_administration
+
+
     def flattened_document_references(self) -> Generator[dict, None, None]:
         """generator that yields document references populated
         with DocumentReference.subject fields and Observation codes through Observation.focus
@@ -607,6 +629,8 @@ def create_dataframe(
         df = pd.DataFrame(db.flattened_document_references())
     elif data_type == "ResearchSubject":
         df = pd.DataFrame(db.flattened_research_subjects())
+    elif data_type == "MedicationAdministration":
+        df = pd.DataFrame(db.flattened_medication_administrations())
     elif data_type == "Specimen":
         df = pd.DataFrame(db.flattened_specimens())
     else:
diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py
index c6beda14..16efe955 100644
--- a/gen3_tracker/meta/entities.py
+++ b/gen3_tracker/meta/entities.py
@@ -405,6 +405,35 @@ def values(self) -> dict:
         return _values
 
 
+class SimplifiedMedicationAdministration(SimplifiedFHIR):
+    @computed_field
+    @property
+    def values(self) -> dict:
+        """Return a dictionary of 'value':value."""
+        _values = super().values
+        # Plucking out fields that didn't get picked up by default class simplifier.'
+        dose_value = self.resource.get("dosage", {}).get("dose", {}).get("value", None)
+        if dose_value:
+            _values["total_dosage"] = dose_value
+        occurenceTiming = self.resource.get("occurenceTiming", {}).get("repeat", {}).get("boundsRange")
+        if occurenceTiming:
+            low = occurenceTiming.get("low", {}).get("value")
+            _values["index_date_start_days"] = low if low else None
+            high = occurenceTiming.get("high", {}).get("value")
+            _values["index_date_end_days"] = high if high else None
+        for notes in self.resource.get("note", []):
+            note = notes.get("value", None)
+            if note:
+                # Probably best to concat notes together
+                _values["notes"] = _values["notes"] + "; " + note
+        for identifier in self.resource.get("identifier", []):
+            system = identifier.get("system", None)
+            if system:
+                if system.split("/")[-1] == "regimen":
+                    _values["regimen_id"] = identifier["value"]
+        return _values
+
+
 class SimplifiedCondition(SimplifiedFHIR):
     @computed_field
     @property
@@ -440,4 +469,6 @@ def build(resource: dict) -> SimplifiedFHIR:
             return SimplifiedDocumentReference(resource=resource)
         if resource_type == "Condition":
             return SimplifiedCondition(resource=resource)
+        if resource_type == "MedicationAdministration":
+            return SimplifiedMedicationAdministration(resource=resource)
         return SimplifiedFHIR(resource=resource)

From 0a1cc17cbe9e243f31c119a2b8fd1b454a8c8f26 Mon Sep 17 00:00:00 2001
From: matthewpeterkort <matthewpeterkort@gmail.com>
Date: Thu, 5 Dec 2024 08:47:23 -0800
Subject: [PATCH 12/18] fix identifier to avoid hyphen in column name

---
 gen3_tracker/meta/entities.py | 13 ++++++-------
 setup.py                      |  2 +-
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py
index 16efe955..493ff604 100644
--- a/gen3_tracker/meta/entities.py
+++ b/gen3_tracker/meta/entities.py
@@ -286,13 +286,12 @@ def identifiers(self) -> dict:
         elif identifiers_len == 1:
             return {"identifier": identifiers[0].get("value")}
         else:
-            base_identifier = {"identifier": identifiers[0].get("value")}
-            base_identifier.update(
-                {
-                    identifier.get("system").split("/")[-1]: identifier.get("value")
-                    for identifier in identifiers[1:]
-                }
-            )
+            # Todo: Raise an execption if there are multiple identifiers with a "-" in them
+            base_identifier = {
+                "identifier" if "-" in identifier.get("system", "").split("/")[-1]
+                else identifier.get("system").split("/")[-1]: identifier.get("value")
+                for identifier in identifiers
+            }
             return base_identifier
 
     @computed_field
diff --git a/setup.py b/setup.py
index 1daa321f..3c3b3de1 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='gen3_tracker',
-    version='0.0.7rc2',
+    version='0.0.7rc4',
     description='A CLI for adding version control to Gen3 data submission projects.',
     long_description=long_description,
     long_description_content_type='text/markdown',

From ebc8dd4fd0ce48d1bc937c49a469701b2df6d2af Mon Sep 17 00:00:00 2001
From: Quinn Wai Wong <54592956+quinnwai@users.noreply.github.com>
Date: Wed, 4 Dec 2024 12:23:40 -0800
Subject: [PATCH 13/18] Release/0.0.7rc2 (#107)

* update pytest and version number

* linting

* ensure dataframer unit tests pass

* fix test

* fix test to work with new output

* fix spacing

---------

Co-authored-by: matthewpeterkort <matthewpeterkort@gmail.com>
---
 gen3_tracker/git/cli.py                 |  2 +-
 gen3_tracker/meta/entities.py           |  1 +
 tests/unit/test_flatten_fhir_example.py | 35 ++++++++++++++++---------
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py
index bc932d77..c0222bd8 100644
--- a/gen3_tracker/git/cli.py
+++ b/gen3_tracker/git/cli.py
@@ -677,7 +677,7 @@ def push(
                 bundle.entry.append(bundle_entry)
 
             headers = {"Authorization": f"{auth._access_token}"}
-            bundle_dict = bundle.model_dump()
+            bundle_dict = bundle.dict()
             with Halo(
                 text="Sending to FHIR Server",
                 spinner="line",
diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py
index 493ff604..9941a4de 100644
--- a/gen3_tracker/meta/entities.py
+++ b/gen3_tracker/meta/entities.py
@@ -292,6 +292,7 @@ def identifiers(self) -> dict:
                 else identifier.get("system").split("/")[-1]: identifier.get("value")
                 for identifier in identifiers
             }
+
             return base_identifier
 
     @computed_field
diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py
index 0ea020e5..983f008c 100644
--- a/tests/unit/test_flatten_fhir_example.py
+++ b/tests/unit/test_flatten_fhir_example.py
@@ -195,7 +195,7 @@ def flatten_scalars(self: DomainResource) -> dict:
     """Convert the DomainResource instance to a dictionary."""
     _ = {
         k: _isodate(v)
-        for k, v in self.model_dump().items()
+        for k, v in self.dict().items()
         if not isinstance(v, (list, dict))
     }
     return _
@@ -203,7 +203,8 @@ def flatten_scalars(self: DomainResource) -> dict:
 
 def flatten_references(self: DomainResource) -> dict:
     """Convert the DomainResource instance to a dictionary."""
-    fields = [_ for _ in self.model_fields.keys() if not _.endswith("__ext")]
+
+    fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")]
     _ = {}
     # if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference
     for k in fields:
@@ -327,7 +328,8 @@ def patched_scalars_references_identifiers_observation() -> bool:
 def test_patient_without_flatten(patient_dict: dict):
     """This patient object should NOT have a 'flatten' method."""
     # without path dependency, just have a plain patient object with no flatten method
-    patient = Patient.model_validate(patient_dict)
+
+    patient = Patient.parse_obj(patient_dict)
     assert not hasattr(
         patient, "flatten"
     ), "Patient object should not have a 'flatten' method"
@@ -335,7 +337,8 @@ def test_patient_without_flatten(patient_dict: dict):
 
 def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict):
     """This patient object should have a 'flatten' method."""
-    patient = Patient.model_validate(patient_dict)
+
+    patient = Patient.parse_obj(patient_dict)
     assert hasattr(
         patient, "flatten"
     ), "Patient object does not have a 'flatten' method"
@@ -346,7 +349,8 @@ def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict:
 
 def test_patient_with_scalars(patched_scalars: bool, patient_dict: dict):
     """This patient object should have a 'flatten' method that returns a dict of scalar values."""
-    patient = Patient.model_validate(patient_dict)
+
+    patient = Patient.parse_obj(patient_dict)
     assert hasattr(
         patient, "flatten"
     ), "Patient object does not have a 'flatten' method"
@@ -362,7 +366,8 @@ def test_patient_with_scalars_and_references(
     patched_scalars_and_references: bool, patient_dict: dict
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    patient = Patient.model_validate(patient_dict)
+
+    patient = Patient.parse_obj(patient_dict)
     assert hasattr(
         patient, "flatten"
     ), "Patient object does not have a 'flatten' method"
@@ -379,7 +384,8 @@ def test_patient_with_scalars_references_identifiers(
     patched_scalars_references_identifiers: bool, patient_dict: dict
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    patient = Patient.model_validate(patient_dict)
+
+    patient = Patient.parse_obj(patient_dict)
     assert hasattr(
         patient, "flatten"
     ), "Patient object does not have a 'flatten' method"
@@ -397,7 +403,8 @@ def test_specimen_with_scalars_references_identifiers(
     patched_scalars_references_identifiers: bool, specimen_dict: dict
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    specimen = Specimen.model_validate(specimen_dict)
+
+    specimen = Specimen.parse_obj(specimen_dict)
     assert hasattr(
         specimen, "flatten"
     ), "Specimen object does not have a 'flatten' method"
@@ -416,7 +423,8 @@ def test_eye_color_observation(
     observation_eye_color_dict: dict,
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    observation = Observation.model_validate(observation_eye_color_dict)
+
+    observation = Observation.parse_obj(observation_eye_color_dict)
     assert hasattr(
         observation, "flatten"
     ), "Observation object does not have a 'flatten' method"
@@ -424,7 +432,8 @@ def test_eye_color_observation(
         "resourceType": "Observation",
         "id": "eye-color",
         "status": "final",
-        "effectiveDateTime": "2016-05-18T00:00:00",
+
+        "effectiveDateTime": "2016-05-18",
         "value": "blue",
         "subject": "Patient/example",
     }
@@ -434,12 +443,14 @@ def test_bmi_observation(
     patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict
 ):
     """This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
-    observation = Observation.model_validate(observation_bmi_dict)
+
+    observation = Observation.parse_obj(observation_bmi_dict)
     assert hasattr(
         observation, "flatten"
     ), "Observation object does not have a 'flatten' method"
     assert observation.flatten() == {
-        "effectiveDateTime": "1999-07-02T00:00:00",
+
+        "effectiveDateTime": "1999-07-02",
         "id": "bmi-using-related",
         "resourceType": "Observation",
         "status": "final",

From d41e50ff0526829f04735626e9d744b0f0a8515b Mon Sep 17 00:00:00 2001
From: quinnwai <quinnwai.wong@gmail.com>
Date: Mon, 2 Dec 2024 11:32:27 -0800
Subject: [PATCH 14/18] update pytest and version number

---
 tests/integration/test_end_to_end_workflow.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 5fdca0c0..e1f99bf4 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -300,12 +300,11 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     log_file_path = "logs/publish.log"
     os.chdir(new_project_dir)
     run(runner, ["init", project_id, "--approve"])
-    result = run(
-        runner,
-        ["push", "--skip_validate", "--overwrite"],
-        expected_exit_code=1,
-        expected_files=[log_file_path],
-    )
+    result = run(runner,
+                 ["push", "--skip_validate", "--overwrite"],
+                 expected_exit_code=1,
+                 expected_files=[log_file_path]
+                )
 
     # ensure push has useful useful error logs
     assert (
@@ -316,10 +315,9 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     with open(log_file_path, "r") as log_file:
         lines = log_file.readlines()
         str_lines = str(lines)
+
         for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
-            assert (
-                keyword in str_lines
-            ), f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
+            assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
 
 
 def test_push_fails_with_no_write_permissions(

From 2af91112f03d4216d7ce2ecf9d6241fe5b63a25c Mon Sep 17 00:00:00 2001
From: quinnwai <quinnwai.wong@gmail.com>
Date: Mon, 2 Dec 2024 11:36:29 -0800
Subject: [PATCH 15/18] linting

---
 tests/integration/test_end_to_end_workflow.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index e1f99bf4..6237edab 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -300,11 +300,12 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
     log_file_path = "logs/publish.log"
     os.chdir(new_project_dir)
     run(runner, ["init", project_id, "--approve"])
-    result = run(runner,
-                 ["push", "--skip_validate", "--overwrite"],
-                 expected_exit_code=1,
-                 expected_files=[log_file_path]
-                )
+    result = run(
+        runner,
+        ["push", "--skip_validate", "--overwrite"],
+        expected_exit_code=1,
+        expected_files=[log_file_path],
+    )
 
     # ensure push has useful useful error logs
     assert (
@@ -317,7 +318,9 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
         str_lines = str(lines)
 
         for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
-            assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
+            assert (
+                keyword in str_lines
+            ), f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
 
 
 def test_push_fails_with_no_write_permissions(

From 7b4627ab55e15b447b043f4adcae2eb33c980828 Mon Sep 17 00:00:00 2001
From: matthewpeterkort <matthewpeterkort@gmail.com>
Date: Tue, 21 Jan 2025 09:18:39 -0800
Subject: [PATCH 16/18] hardcode a solution fun

---
 gen3_tracker/meta/entities.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py
index 9941a4de..9c1ed456 100644
--- a/gen3_tracker/meta/entities.py
+++ b/gen3_tracker/meta/entities.py
@@ -219,7 +219,10 @@ def _populate_simplified_extension(extension: dict):
             resource = self.resource
 
         for _ in resource.get("extension", [resource]):
-            if "extension" not in _.keys():
+            # special case data looks like this skip it, no extension to extract
+            if set(_.keys()) == {"url", "size", "hash", "title"}:
+               continue
+            elif "extension" not in _.keys():
                 if "resourceType" not in _.keys():
                     _populate_simplified_extension(_)
                 continue

From 2ce278b2da1d104326550fdc20939511d74cd987 Mon Sep 17 00:00:00 2001
From: matthewpeterkort <matthewpeterkort@gmail.com>
Date: Tue, 21 Jan 2025 16:12:19 -0800
Subject: [PATCH 17/18] revert fhir.resources version

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index ae767ae9..ef36c0f0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ halo
 tqdm
 deepdiff
 
-fhir.resources==8.0.0b4  # FHIR Model Pre-release
+fhir.resources==7.1.0  # FHIR Model Pre-release
 orjson
 nested_lookup
 
diff --git a/setup.py b/setup.py
index 3c3b3de1..c2482b8d 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='gen3_tracker',
-    version='0.0.7rc4',
+    version='0.0.7rc5',
     description='A CLI for adding version control to Gen3 data submission projects.',
     long_description=long_description,
     long_description_content_type='text/markdown',

From eda70f54135487c031224bcfde3712155edec8f0 Mon Sep 17 00:00:00 2001
From: matthewpeterkort <matthewpeterkort@gmail.com>
Date: Wed, 22 Jan 2025 12:21:21 -0800
Subject: [PATCH 18/18] bump version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c2482b8d..47573991 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='gen3_tracker',
-    version='0.0.7rc5',
+    version='0.0.7rc6',
     description='A CLI for adding version control to Gen3 data submission projects.',
     long_description=long_description,
     long_description_content_type='text/markdown',