From 8155ed1eeb3782c8ba59d96d826991b4869035f4 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 2 Dec 2024 11:32:27 -0800 Subject: [PATCH 01/18] update pytest and version number --- setup.py | 2 +- tests/integration/test_end_to_end_workflow.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index ab0c30b5..1daa321f 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc1', + version='0.0.7rc2', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown', diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 429ae5fb..86b03fb0 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -58,7 +58,7 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: run(runner, ["--debug", "meta", "validate"]) # update the file - test_file = pathlib.Path("my-project-data/hello.txt") + test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) test_file.write_text('hello UPDATE\n') # re-add the file @@ -213,7 +213,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec run(runner, ["init", project_id, "--approve"]) result = run(runner, ["push", "--skip_validate", "--overwrite"], - expected_exit_code=0, + expected_exit_code=1, expected_files=[log_file_path] ) @@ -225,9 +225,8 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec lines = log_file.readlines() str_lines = str(lines) - assert "/content/0/attachment/creation" in str_lines, f"expected errors to describe to /content/0/attachment/creation, instead got: \n{str_lines}" - assert "jsonschema" in str_lines, f"expected errors to mention jsonschema, instead got: \n{str_lines}" - assert invalid_date in str_lines, f"expected invalid date {invalid_date} to be logged, instead got: \n{str_lines} " + for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: + assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' def test_push_fails_with_no_write_permissions(runner: CliRunner, project_id: str, tmp_path: Path): From ec045a3cf49d42c8ab604188e546d796da77e0b4 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 2 Dec 2024 11:36:29 -0800 Subject: [PATCH 02/18] linting --- gen3_tracker/gen3/jobs.py | 128 ++- gen3_tracker/git/cli.py | 853 +++++++++++++----- gen3_tracker/meta/entities.py | 21 +- tests/__init__.py | 18 +- tests/integration/__init__.py | 33 +- tests/integration/conftest.py | 2 +- tests/integration/test_bucket_import.py | 17 +- tests/integration/test_bundle.py | 32 +- tests/integration/test_end_to_end_workflow.py | 195 +++- tests/unit/meta/conftest.py | 2 +- tests/unit/meta/test_meta.py | 136 ++- tests/unit/test_coding_conventions.py | 5 +- tests/unit/test_flatten_fhir_example.py | 277 ++++-- tests/unit/test_hash_types.py | 18 +- tests/unit/test_read_dvc.py | 18 +- 15 files changed, 1238 insertions(+), 517 deletions(-) diff --git a/gen3_tracker/gen3/jobs.py b/gen3_tracker/gen3/jobs.py index f544d4a6..24f6b3e7 100644 --- a/gen3_tracker/gen3/jobs.py +++ b/gen3_tracker/gen3/jobs.py @@ -14,26 +14,36 @@ from gen3_tracker import Config from gen3_tracker.common import Push, Commit from gen3_tracker.gen3.indexd import write_indexd -from gen3_tracker.git import calculate_hash, DVC, run_command, DVCMeta, DVCItem, modified_date +from gen3_tracker.git import ( + calculate_hash, + DVC, + run_command, + DVCMeta, + DVCItem, + modified_date, +) def _validate_parameters(from_: str) -> pathlib.Path: - assert len(urlparse(from_).scheme) == 0, f"{from_} appears to be an url. url to url cp not supported" + assert ( + len(urlparse(from_).scheme) == 0 + ), f"{from_} appears to be an url. url to url cp not supported" return from_ -def cp(config: Config, - from_: str, - project_id: str, - ignore_state: bool, - auth=None, - user=None, - object_name=None, - bucket_name=None, - metadata: dict = {}, - ): +def cp( + config: Config, + from_: str, + project_id: str, + ignore_state: bool, + auth=None, + user=None, + object_name=None, + bucket_name=None, + metadata: dict = {}, +): """Copy meta to bucket, used by etl_pod job""" from_ = _validate_parameters(str(from_)) if not isinstance(from_, pathlib.Path): @@ -41,13 +51,15 @@ def cp(config: Config, assert auth, "auth is required" - metadata = dict({'submitter': None, 'metadata_version': '0.0.1', 'is_metadata': True} | metadata) - if not metadata['submitter']: + metadata = dict( + {"submitter": None, "metadata_version": "0.0.1", "is_metadata": True} | metadata + ) + if not metadata["submitter"]: if not user: - user = auth.curl('/user/user').json() - metadata['submitter'] = user['name'] + user = auth.curl("/user/user").json() + metadata["submitter"] = user["name"] - program, project = project_id.split('-') + program, project = project_id.split("-") assert bucket_name, f"could not find bucket for {program}" @@ -57,27 +69,26 @@ def cp(config: Config, if not object_name: now = datetime.now().strftime("%Y%m%d-%H%M%S") - object_name = f'_{project_id}-{now}_meta.zip' + object_name = f"_{project_id}-{now}_meta.zip" zipfile_path = temp_dir / object_name - with ZipFile(zipfile_path, 'w') as zip_object: + with ZipFile(zipfile_path, "w") as zip_object: for _ in from_.glob("*.ndjson"): zip_object.write(_) stat = zipfile_path.stat() - md5_sum = calculate_hash('md5', zipfile_path) + md5_sum = calculate_hash("md5", zipfile_path) my_dvc = DVC( meta=DVCMeta(), outs=[ DVCItem( path=object_name, md5=md5_sum, - hash='md5', + hash="md5", modified=modified_date(zipfile_path), size=stat.st_size, - ) - ] + ], ) metadata = write_indexd( @@ -92,56 +103,81 @@ def cp(config: Config, # document = file_client.upload_file_to_guid(guid=id_, file_name=object_name, bucket=bucket_name) # print(document, file=sys.stderr) - run_command(f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}", no_capture=False) + run_command( + f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}", + no_capture=False, + ) - return {'msg': f"Uploaded {zipfile_path} to {bucket_name}", "object_id": my_dvc.object_id, "object_name": object_name} + return { + "msg": f"Uploaded {zipfile_path} to {bucket_name}", + "object_id": my_dvc.object_id, + "object_name": object_name, + } -def publish_commits(config: Config, wait: bool, auth: Gen3Auth, bucket_name: str, spinner=None) -> dict: +def publish_commits( + config: Config, wait: bool, auth: Gen3Auth, bucket_name: str, spinner=None +) -> dict: """Publish commits to the portal.""" # TODO legacy fhir-import-export job: copies meta to bucket and triggers job, # meta information is already in git REPO, # we should consider changing the fhir_import_export job to use the git REPO - user = auth.curl('/user/user').json() + user = auth.curl("/user/user").json() # copy meta to bucket upload_result = cp( config=config, - from_='META', + from_="META", project_id=config.gen3.project_id, ignore_state=True, auth=auth, user=user, - bucket_name=bucket_name + bucket_name=bucket_name, ) - object_id = upload_result['object_id'] + object_id = upload_result["object_id"] push = Push(config=config) jobs_client = Gen3Jobs(auth_provider=auth) # create "legacy" commit object, read by fhir-import-export job - push.commits.append(Commit(object_id=object_id, message='From g3t-git', meta_path=upload_result['object_name'], commit_id=object_id)) - args = {'push': push.model_dump(), 'project_id': config.gen3.project_id, 'method': 'put'} + push.commits.append( + Commit( + object_id=object_id, + message="From g3t-git", + meta_path=upload_result["object_name"], + commit_id=object_id, + ) + ) + args = { + "push": push.model_dump(), + "project_id": config.gen3.project_id, + "method": "put", + } # capture logging from gen3.jobs from cdislogging import get_logger # noqa + cdis_logging = get_logger("__name__") cdis_logging.setLevel(logging.WARN) - + if wait: # async_run_job_and_wait monkeypatched below - _ = asyncio.run(jobs_client.async_run_job_and_wait(job_name='fhir_import_export', job_input=args, spinner=spinner)) + _ = asyncio.run( + jobs_client.async_run_job_and_wait( + job_name="fhir_import_export", job_input=args, spinner=spinner + ) + ) else: - _ = jobs_client.create_job('fhir_import_export', args) + _ = jobs_client.create_job("fhir_import_export", args) if not isinstance(_, dict): - _ = {'output': _} - if isinstance(_['output'], str): + _ = {"output": _} + if isinstance(_["output"], str): try: - _['output'] = json.loads(_['output']) + _["output"] = json.loads(_["output"]) except json.JSONDecodeError: pass return _ @@ -149,7 +185,9 @@ def publish_commits(config: Config, wait: bool, auth: Gen3Auth, bucket_name: str # monkey patch for gen3.jobs.Gen3Jobs.async_run_job_and_wait # make it less noisy and sleep less (max of 30 seconds) -async def async_run_job_and_wait(self, job_name, job_input, spinner=None, _ssl=None, **kwargs): +async def async_run_job_and_wait( + self, job_name, job_input, spinner=None, _ssl=None, **kwargs +): """ Asynchronous function to create a job, wait for output, and return. Will sleep in a linear delay until the job is done, starting with 1 second. @@ -188,12 +226,12 @@ async def async_run_job_and_wait(self, job_name, job_input, spinner=None, _ssl=N if status.get("status") != "Completed": # write failed output to log file before raising exception response = await self.async_get_output(job_create_response.get("uid")) - with open("logs/publish.log", 'a') as f: - log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat()} - log_msg.update(response) - f.write(json.dumps(log_msg, separators=(',', ':'))) - f.write('\n') - + with open("logs/publish.log", "a") as f: + log_msg = {"timestamp": datetime.now(pytz.UTC).isoformat()} + log_msg.update(response) + f.write(json.dumps(log_msg, separators=(",", ":"))) + f.write("\n") + raise Exception(f"Job status not complete: {status.get('status')}") response = await self.async_get_output(job_create_response.get("uid")) diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py index e7c4cc40..c0222bd8 100644 --- a/gen3_tracker/git/cli.py +++ b/gen3_tracker/git/cli.py @@ -34,20 +34,36 @@ import gen3_tracker from gen3_tracker import Config -from gen3_tracker.common import CLIOutput, INFO_COLOR, ERROR_COLOR, is_url, filter_dicts, SUCCESS_COLOR, \ - read_ndjson_file +from gen3_tracker.common import ( + CLIOutput, + INFO_COLOR, + ERROR_COLOR, + is_url, + filter_dicts, + SUCCESS_COLOR, + read_ndjson_file, +) from gen3_tracker.config import init as config_init, ensure_auth from gen3_tracker.gen3.buckets import get_buckets -from gen3_tracker.git import git_files, to_indexd, to_remote, dvc_data, \ - data_file_changes, modified_date, git_status, DVC, MISSING_G3T_MESSAGE -from gen3_tracker.git import run_command, \ - MISSING_GIT_MESSAGE, git_repository_exists +from gen3_tracker.git import ( + git_files, + to_indexd, + to_remote, + dvc_data, + data_file_changes, + modified_date, + git_status, + DVC, + MISSING_G3T_MESSAGE, +) +from gen3_tracker.git import run_command, MISSING_GIT_MESSAGE, git_repository_exists from gen3_tracker.git.adder import url_path, write_dvc_file from gen3_tracker.git.cloner import ls from gen3_tracker.git.initializer import initialize_project_server_side from gen3_tracker.git.snapshotter import push_snapshot from gen3_tracker.meta.skeleton import meta_index, get_data_from_meta from gen3_tracker.common import _default_json_serializer + # logging.basicConfig(level=logging.INFO) _logger = logging.getLogger(__package__) @@ -66,14 +82,20 @@ # if debug: # _logger.setLevel(logging.DEBUG) + def _check_parameters(config, project_id): """Common parameter checks.""" if not project_id: raise AssertionError("project_id is required") - if not project_id.count('-') == 1: - raise AssertionError(f"project_id must be of the form program-project {project_id}") + if not project_id.count("-") == 1: + raise AssertionError( + f"project_id must be of the form program-project {project_id}" + ) if not config.gen3.profile: - click.secho("No profile set. Continuing in disconnected mode. Use `set profile `", fg='yellow') + click.secho( + "No profile set. Continuing in disconnected mode. Use `set profile `", + fg="yellow", + ) @click.group(cls=gen3_tracker.NaturalOrderGroup) @@ -84,10 +106,34 @@ def cli(): @cli.command(context_settings=dict(ignore_unknown_options=True)) # @click.option('--force', '-f', is_flag=True, help='Force the init.') -@click.argument('project_id', default=None, required=False, envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID") -@click.option('--approve', '-a', help='Approve the addition (privileged)', is_flag=True, default=False, show_default=True) -@click.option('--no-server', help='Skip server setup (testing)', is_flag=True, default=False, show_default=True, hidden=True) -@click.option('--debug', is_flag=True, envvar='G3T_DEBUG', help='Enable debug mode. G3T_DEBUG environment variable can also be used.') +@click.argument( + "project_id", + default=None, + required=False, + envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID", +) +@click.option( + "--approve", + "-a", + help="Approve the addition (privileged)", + is_flag=True, + default=False, + show_default=True, +) +@click.option( + "--no-server", + help="Skip server setup (testing)", + is_flag=True, + default=False, + show_default=True, + hidden=True, +) +@click.option( + "--debug", + is_flag=True, + envvar="G3T_DEBUG", + help="Enable debug mode. G3T_DEBUG environment variable can also be used.", +) @click.pass_obj def init(config: Config, project_id: str, approve: bool, no_server: bool, debug: bool): """Initialize a new repository.""" @@ -113,15 +159,29 @@ def init(config: Config, project_id: str, approve: bool, no_server: bool, debug: ensure_git_repo(config) if not no_server: - init_logs, approval_needed = initialize_project_server_side(config, project_id) + init_logs, approval_needed = initialize_project_server_side( + config, project_id + ) logs.extend(init_logs) if approve and approval_needed: - run_command('g3t collaborator approve --all', dry_run=config.dry_run, no_capture=True) + run_command( + "g3t collaborator approve --all", + dry_run=config.dry_run, + no_capture=True, + ) elif approval_needed and not approve: - click.secho("Approval needed. to approve the project, a privileged user must run `g3t collaborator approve --all`", fg=INFO_COLOR, file=sys.stderr) + click.secho( + "Approval needed. to approve the project, a privileged user must run `g3t collaborator approve --all`", + fg=INFO_COLOR, + file=sys.stderr, + ) else: - click.secho(f"Approval not needed. Project {project_id} has approved read/write", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Approval not needed. Project {project_id} has approved read/write", + fg=INFO_COLOR, + file=sys.stderr, + ) if config.debug: for _ in logs: @@ -135,26 +195,34 @@ def init(config: Config, project_id: str, approve: bool, no_server: bool, debug: def ensure_git_repo(config): # ensure a git repo - if pathlib.Path('.git').exists(): + if pathlib.Path(".git").exists(): return - if not pathlib.Path('.git').exists(): - command = 'git init' + if not pathlib.Path(".git").exists(): + command = "git init" run_command(command, dry_run=config.dry_run, no_capture=True) else: - click.secho('Git repository already exists.', fg=INFO_COLOR, file=sys.stderr) - pathlib.Path('MANIFEST').mkdir(exist_ok=True) - pathlib.Path('META').mkdir(exist_ok=True) - pathlib.Path('LOGS').mkdir(exist_ok=True) - with open('.gitignore', 'w') as f: - f.write('LOGS/\n') - f.write('.g3t/state/\n') # legacy - with open('META/README.md', 'w') as f: - f.write('This directory contains metadata files for the data files in the MANIFEST directory.\n') - with open('MANIFEST/README.md', 'w') as f: - f.write('This directory contains dvc files that reference the data files.\n') - run_command('git add MANIFEST META .gitignore .g3t', dry_run=config.dry_run, no_capture=True) - run_command('git commit -m "initialized" MANIFEST META .gitignore .g3t', dry_run=config.dry_run, no_capture=True) + click.secho("Git repository already exists.", fg=INFO_COLOR, file=sys.stderr) + pathlib.Path("MANIFEST").mkdir(exist_ok=True) + pathlib.Path("META").mkdir(exist_ok=True) + pathlib.Path("LOGS").mkdir(exist_ok=True) + with open(".gitignore", "w") as f: + f.write("LOGS/\n") + f.write(".g3t/state/\n") # legacy + with open("META/README.md", "w") as f: + f.write( + "This directory contains metadata files for the data files in the MANIFEST directory.\n" + ) + with open("MANIFEST/README.md", "w") as f: + f.write("This directory contains dvc files that reference the data files.\n") + run_command( + "git add MANIFEST META .gitignore .g3t", dry_run=config.dry_run, no_capture=True + ) + run_command( + 'git commit -m "initialized" MANIFEST META .gitignore .g3t', + dry_run=config.dry_run, + no_capture=True, + ) # Note: The commented code below is an example of how to use context settings to allow extra arguments. @@ -165,8 +233,8 @@ def ensure_git_repo(config): @cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True)) -@click.argument('target') -@click.option('--no-git-add', default=False, is_flag=True, hidden=True) +@click.argument("target") +@click.option("--no-git-add", default=False, is_flag=True, hidden=True) @click.pass_context def add(ctx, target, no_git_add: bool): """ @@ -211,10 +279,10 @@ def add(ctx, target, no_git_add: bool): assert not config.no_config_found, MISSING_G3T_MESSAGE # needs to have a target - assert target, 'No targets specified.' + assert target, "No targets specified." # Expand wildcard paths - if is_url(target) and not target.startswith('file://'): + if is_url(target) and not target.startswith("file://"): all_changed_files, updates = add_url(ctx, target) else: all_changed_files, updates = add_file(ctx, target) @@ -224,8 +292,12 @@ def add(ctx, target, no_git_add: bool): # adds = [str(_) for _ in all_changed_files if _ not in updates] if adds and not no_git_add: - adds.append('.gitignore') - run_command(f'git add {" ".join([str(_) for _ in adds])}', dry_run=config.dry_run, no_capture=True) + adds.append(".gitignore") + run_command( + f'git add {" ".join([str(_) for _ in adds])}', + dry_run=config.dry_run, + no_capture=True, + ) except Exception as e: click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr) @@ -234,9 +306,15 @@ def add(ctx, target, no_git_add: bool): @cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True)) -@click.argument('targets', nargs=-1) -@click.option('--message', '-m', help='The commit message.') -@click.option('--all', '-a', is_flag=True, default=False, help='Automatically stage files that have been modified and deleted.') +@click.argument("targets", nargs=-1) +@click.option("--message", "-m", help="The commit message.") +@click.option( + "--all", + "-a", + is_flag=True, + default=False, + help="Automatically stage files that have been modified and deleted.", +) @click.pass_context def commit(ctx, targets, message, all): """Commit the changes @@ -269,11 +347,11 @@ def status(config): """Show changed files.""" soft_error = False try: - with Halo(text='Scanning', spinner='line', placement='right', color='white'): - manifest_path = pathlib.Path('MANIFEST') + with Halo(text="Scanning", spinner="line", placement="right", color="white"): + manifest_path = pathlib.Path("MANIFEST") changes = data_file_changes(manifest_path) # Get a list of all files in the MANIFEST directory and its subdirectories - files = glob.glob('MANIFEST/**/*.dvc', recursive=True) + files = glob.glob("MANIFEST/**/*.dvc", recursive=True) # Filter out directories, keep only files files = [f for f in files if os.path.isfile(f)] if not files: @@ -284,28 +362,46 @@ def status(config): document_reference_mtime = 0 - if pathlib.Path('META/DocumentReference.ndjson').exists(): + if pathlib.Path("META/DocumentReference.ndjson").exists(): # Get the modification time - document_reference_mtime = os.path.getmtime('META/DocumentReference.ndjson') + document_reference_mtime = os.path.getmtime( + "META/DocumentReference.ndjson" + ) latest_file_mtime = os.path.getmtime(latest_file) if document_reference_mtime < latest_file_mtime: - document_reference_mtime = datetime.fromtimestamp(document_reference_mtime).isoformat() - latest_file_mtime = datetime.fromtimestamp(latest_file_mtime).isoformat() - click.secho(f"WARNING: DocumentReference.ndjson is out of date {document_reference_mtime}. The most recently changed file is {latest_file} {latest_file_mtime}. Please check DocumentReferences.ndjson", fg=INFO_COLOR, file=sys.stderr) + document_reference_mtime = datetime.fromtimestamp( + document_reference_mtime + ).isoformat() + latest_file_mtime = datetime.fromtimestamp( + latest_file_mtime + ).isoformat() + click.secho( + f"WARNING: DocumentReference.ndjson is out of date {document_reference_mtime}. The most recently changed file is {latest_file} {latest_file_mtime}. Please check DocumentReferences.ndjson", + fg=INFO_COLOR, + file=sys.stderr, + ) soft_error = True if changes: - click.secho(f"# There are {len(changes)} data files that you need to update via `g3t add`:", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"# There are {len(changes)} data files that you need to update via `g3t add`:", + fg=INFO_COLOR, + file=sys.stderr, + ) cwd = pathlib.Path.cwd() for _ in changes: - data_path = str(_.data_path).replace(str(cwd) + '/', "") - click.secho(f' g3t add {data_path} # changed: {modified_date(_.data_path)}, last added: {modified_date(_.dvc_path)}', fg=INFO_COLOR, file=sys.stderr) + data_path = str(_.data_path).replace(str(cwd) + "/", "") + click.secho( + f" g3t add {data_path} # changed: {modified_date(_.data_path)}, last added: {modified_date(_.dvc_path)}", + fg=INFO_COLOR, + file=sys.stderr, + ) soft_error = True else: click.secho("No data file changes.", fg=INFO_COLOR, file=sys.stderr) - _ = run_command('git status') + _ = run_command("git status") print(_.stdout) if soft_error: exit(1) @@ -316,27 +412,66 @@ def status(config): @cli.command() -@click.option('--step', - type=click.Choice(['index', 'upload', 'publish', 'all', 'fhir']), - default='all', - show_default=True, - help='The step to run ' - ) -@click.option('--transfer-method', - type=click.Choice(gen3_tracker.FILE_TRANSFER_METHODS.keys()), - default='gen3', - show_default=True, - help='The upload method.' - ) -@click.option('--overwrite', is_flag=True, help='(index): Overwrite previously submitted files.') -@click.option('--wait', default=True, is_flag=True, show_default=True, help="(publish): Wait for metadata completion.") -@click.option('--dry-run', show_default=True, default=False, is_flag=True, help='Print the commands that would be executed, but do not execute them.') -@click.option('--re-run', show_default=True, default=False, is_flag=True, help='Re-run the last publish step') -@click.option('--fhir-server', show_default=True, default=False, is_flag=True, help='Push data in META directory to FHIR Server. Whatever FHIR data that exists in META dir will be upserted into the fhir server') -@click.option('--debug', is_flag=True) -@click.option('--skip_validate', is_flag=True, help='Skip validation of the metadata') +@click.option( + "--step", + type=click.Choice(["index", "upload", "publish", "all", "fhir"]), + default="all", + show_default=True, + help="The step to run ", +) +@click.option( + "--transfer-method", + type=click.Choice(gen3_tracker.FILE_TRANSFER_METHODS.keys()), + default="gen3", + show_default=True, + help="The upload method.", +) +@click.option( + "--overwrite", is_flag=True, help="(index): Overwrite previously submitted files." +) +@click.option( + "--wait", + default=True, + is_flag=True, + show_default=True, + help="(publish): Wait for metadata completion.", +) +@click.option( + "--dry-run", + show_default=True, + default=False, + is_flag=True, + help="Print the commands that would be executed, but do not execute them.", +) +@click.option( + "--re-run", + show_default=True, + default=False, + is_flag=True, + help="Re-run the last publish step", +) +@click.option( + "--fhir-server", + show_default=True, + default=False, + is_flag=True, + help="Push data in META directory to FHIR Server. Whatever FHIR data that exists in META dir will be upserted into the fhir server", +) +@click.option("--debug", is_flag=True) +@click.option("--skip_validate", is_flag=True, help="Skip validation of the metadata") @click.pass_context -def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wait: bool, dry_run: bool, fhir_server: bool, debug: bool, skip_validate: bool): +def push( + ctx, + step: str, + transfer_method: str, + overwrite: bool, + re_run: bool, + wait: bool, + dry_run: bool, + fhir_server: bool, + debug: bool, + skip_validate: bool, +): """Push changes to the remote repository. \b steps: @@ -362,27 +497,35 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa raise NotImplementedError("Re-run not implemented") try: - with Halo(text='Checking', spinner='line', placement='right', color='white'): + with Halo( + text="Checking", spinner="line", placement="right", color="white" + ): run_command("g3t status") if not skip_validate: run_command("g3t meta validate", no_capture=True) except Exception as e: - click.secho("Please correct issues before pushing.", fg=ERROR_COLOR, file=sys.stderr) + click.secho( + "Please correct issues before pushing.", fg=ERROR_COLOR, file=sys.stderr + ) click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr) if config.debug: raise exit(1) - with Halo(text='Scanning', spinner='line', placement='right', color='white'): + with Halo(text="Scanning", spinner="line", placement="right", color="white"): # check git status branch, uncommitted = git_status() - assert not uncommitted, "Uncommitted changes found. Please commit or stash them first." + assert ( + not uncommitted + ), "Uncommitted changes found. Please commit or stash them first." # check dvc vs external files - changes = data_file_changes(pathlib.Path('MANIFEST')) - assert not changes, f"# There are {len(changes)} data files that you need to update. See `g3t status`" + changes = data_file_changes(pathlib.Path("MANIFEST")) + assert ( + not changes + ), f"# There are {len(changes)} data files that you need to update. See `g3t status`" # initialize dvc objects with this project_id committed_files, dvc_objects = manifest(config.gen3.project_id) @@ -392,133 +535,232 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa bucket_name = get_program_bucket(config=config, auth=auth) # check for new files - records = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth)['records'] - dids = {_['did']: _['updated_date'] for _ in records} + records = ls( + config, metadata={"project_id": config.gen3.project_id}, auth=auth + )["records"] + dids = {_["did"]: _["updated_date"] for _ in records} new_dvc_objects = [_ for _ in dvc_objects if _.object_id not in dids] - updated_dvc_objects = [_ for _ in dvc_objects if _.object_id in dids and _.out.modified > dids[_.object_id]] + updated_dvc_objects = [ + _ + for _ in dvc_objects + if _.object_id in dids and _.out.modified > dids[_.object_id] + ] if step not in ["publish", "fhir"]: if not overwrite: dvc_objects = new_dvc_objects + updated_dvc_objects - assert dvc_objects, "No new files to index. Use --overwrite to force" - - click.secho(f'Scanned new: {len(new_dvc_objects)}, updated: {len(updated_dvc_objects)} files', fg=INFO_COLOR, file=sys.stderr) + assert ( + dvc_objects + ), "No new files to index. Use --overwrite to force" + + click.secho( + f"Scanned new: {len(new_dvc_objects)}, updated: {len(updated_dvc_objects)} files", + fg=INFO_COLOR, + file=sys.stderr, + ) if updated_dvc_objects: - click.secho(f'Found {len(updated_dvc_objects)} updated files. overwriting', fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Found {len(updated_dvc_objects)} updated files. overwriting", + fg=INFO_COLOR, + file=sys.stderr, + ) overwrite = True - if step in ['index', 'all']: + if step in ["index", "all"]: # send to index if dry_run: - click.secho("Dry run: not indexing files", fg=INFO_COLOR, file=sys.stderr) + click.secho( + "Dry run: not indexing files", fg=INFO_COLOR, file=sys.stderr + ) yaml.dump( { - 'new': [_.model_dump() for _ in new_dvc_objects], - 'updated': [_.model_dump() for _ in updated_dvc_objects], + "new": [_.model_dump() for _ in new_dvc_objects], + "updated": [_.model_dump() for _ in updated_dvc_objects], }, - sys.stdout + sys.stdout, ) return for _ in tqdm( - to_indexd( - dvc_objects=dvc_objects, - auth=auth, - project_id=config.gen3.project_id, - bucket_name=bucket_name, - overwrite=overwrite, - restricted_project_id=None - - ), - desc='Indexing', unit='file', leave=False, total=len(committed_files)): + to_indexd( + dvc_objects=dvc_objects, + auth=auth, + project_id=config.gen3.project_id, + bucket_name=bucket_name, + overwrite=overwrite, + restricted_project_id=None, + ), + desc="Indexing", + unit="file", + leave=False, + total=len(committed_files), + ): pass - click.secho(f'Indexed {len(committed_files)} files.', fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Indexed {len(committed_files)} files.", fg=INFO_COLOR, file=sys.stderr + ) - if step in ['upload', 'all']: - click.secho(f'Checking {len(dvc_objects)} files for upload via {transfer_method}', fg=INFO_COLOR, file=sys.stderr) + if step in ["upload", "all"]: + click.secho( + f"Checking {len(dvc_objects)} files for upload via {transfer_method}", + fg=INFO_COLOR, + file=sys.stderr, + ) to_remote( upload_method=transfer_method, dvc_objects=dvc_objects, bucket_name=bucket_name, profile=config.gen3.profile, dry_run=config.dry_run, - work_dir=config.work_dir + work_dir=config.work_dir, ) - if fhir_server or step in ['fhir']: + if fhir_server or step in ["fhir"]: """Either there exists a Bundle.ndjson file in META signifying a revision to the data, or there is no bundle.json, - signifying that the data in the META directory should be upserted into gen34""" - meta_dir = pathlib.Path('META') + signifying that the data in the META directory should be upserted into gen34 + """ + meta_dir = pathlib.Path("META") bundle_file = meta_dir / "Bundle.ndjson" if os.path.isfile(bundle_file): - with Halo(text='Sending to FHIR Server', spinner='line', placement='right', color='white'): + with Halo( + text="Sending to FHIR Server", + spinner="line", + placement="right", + color="white", + ): with open(bundle_file, "r") as file: json_string = file.read() bundle_data = orjson.loads(json_string) headers = {"Authorization": f"{auth._access_token}"} - result = requests.delete(url=f'{auth.endpoint}/Bundle', data=orjson.dumps(bundle_data, default=_default_json_serializer, - option=orjson.OPT_APPEND_NEWLINE).decode(), headers=headers) - - with open("logs/publish.log", 'a') as f: - log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat(), "result": f"{result}"} - click.secho('Published project. See logs/publish.log', fg=SUCCESS_COLOR, file=sys.stderr) - f.write(json.dumps(log_msg, separators=(',', ':'))) - f.write('\n') + result = requests.delete( + url=f"{auth.endpoint}/Bundle", + data=orjson.dumps( + bundle_data, + default=_default_json_serializer, + option=orjson.OPT_APPEND_NEWLINE, + ).decode(), + headers=headers, + ) + + with open("logs/publish.log", "a") as f: + log_msg = { + "timestamp": datetime.now(pytz.UTC).isoformat(), + "result": f"{result}", + } + click.secho( + "Published project. See logs/publish.log", + fg=SUCCESS_COLOR, + file=sys.stderr, + ) + f.write(json.dumps(log_msg, separators=(",", ":"))) + f.write("\n") return project_id = config.gen3.project_id now = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ") - bundle = Bundle(type='transaction', timestamp=now) - bundle.identifier = Identifier(value=project_id, system="https://aced-idp.org/project_id") + bundle = Bundle(type="transaction", timestamp=now) + bundle.identifier = Identifier( + value=project_id, system="https://aced-idp.org/project_id" + ) from gen3_tracker import ACED_NAMESPACE + bundle.id = str(uuid.uuid5(ACED_NAMESPACE, f"Bundle/{project_id}/{now}")) bundle.entry = [] for _ in get_data_from_meta(): bundle_entry = BundleEntry() # See https://build.fhir.org/bundle-definitions.html#Bundle.entry.request.url - bundle_entry.request = BundleEntryRequest(url=f"{_['resourceType']}/{_['id']}", method='PUT') + bundle_entry.request = BundleEntryRequest( + url=f"{_['resourceType']}/{_['id']}", method="PUT" + ) bundle_entry.resource = _ bundle.entry.append(bundle_entry) headers = {"Authorization": f"{auth._access_token}"} bundle_dict = bundle.dict() - with Halo(text='Sending to FHIR Server', spinner='line', placement='right', color='white'): - result = requests.put(url=f'{auth.endpoint}/Bundle', data=orjson.dumps(bundle_dict, default=_default_json_serializer, - option=orjson.OPT_APPEND_NEWLINE).decode(), headers=headers) - - with open("logs/publish.log", 'a') as f: - log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat(), "result": f"{result}"} - click.secho('Published project. See logs/publish.log', fg=SUCCESS_COLOR, file=sys.stderr) - f.write(json.dumps(log_msg, separators=(',', ':'))) - f.write('\n') + with Halo( + text="Sending to FHIR Server", + spinner="line", + placement="right", + color="white", + ): + result = requests.put( + url=f"{auth.endpoint}/Bundle", + data=orjson.dumps( + bundle_dict, + default=_default_json_serializer, + option=orjson.OPT_APPEND_NEWLINE, + ).decode(), + headers=headers, + ) + + with open("logs/publish.log", "a") as f: + log_msg = { + "timestamp": datetime.now(pytz.UTC).isoformat(), + "result": f"{result}", + } + click.secho( + "Published project. See logs/publish.log", + fg=SUCCESS_COLOR, + file=sys.stderr, + ) + f.write(json.dumps(log_msg, separators=(",", ":"))) + f.write("\n") return - if step in ['publish', 'all'] and not fhir_server: + if step in ["publish", "all"] and not fhir_server: log_path = "logs/publish.log" - with Halo(text='Uploading snapshot', spinner='line', placement='right', color='white'): + with Halo( + text="Uploading snapshot", + spinner="line", + placement="right", + color="white", + ): # push the snapshot of the `.git` sub-directory in the current directory push_snapshot(config, auth=auth) - if transfer_method == 'gen3': + if transfer_method == "gen3": try: # legacy, "old" fhir_import_export use publish_commits to publish the META - with Halo(text='Publishing', spinner='line', placement='right', color='white') as spinner: - _ = publish_commits(config, wait=wait, auth=auth, bucket_name=bucket_name, spinner=spinner) + with Halo( + text="Publishing", + spinner="line", + placement="right", + color="white", + ) as spinner: + _ = publish_commits( + config, + wait=wait, + auth=auth, + bucket_name=bucket_name, + spinner=spinner, + ) except Exception as e: - click.secho(f'Unable to publish project. See {log_path} for more info', fg=ERROR_COLOR, file=sys.stderr) + click.secho( + f"Unable to publish project. See {log_path} for more info", + fg=ERROR_COLOR, + file=sys.stderr, + ) raise e # print success message and save logs - with open(log_path, 'a') as f: - log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat()} + with open(log_path, "a") as f: + log_msg = {"timestamp": datetime.now(pytz.UTC).isoformat()} log_msg.update(_) - f.write(json.dumps(log_msg, separators=(',', ':'))) - f.write('\n') - click.secho(f'Published project. Logs found at {log_path}', fg=SUCCESS_COLOR, file=sys.stderr) + f.write(json.dumps(log_msg, separators=(",", ":"))) + f.write("\n") + click.secho( + f"Published project. Logs found at {log_path}", + fg=SUCCESS_COLOR, + file=sys.stderr, + ) else: - click.secho(f'Auto-publishing not supported for {transfer_method}. Please use --step publish after uploading', fg=ERROR_COLOR, file=sys.stderr) + click.secho( + f"Auto-publishing not supported for {transfer_method}. Please use --step publish after uploading", + fg=ERROR_COLOR, + file=sys.stderr, + ) except Exception as e: click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr) @@ -529,7 +771,7 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa def manifest(project_id) -> tuple[list[str], list[DVC]]: """Get the committed files and their dvc objects. Initialize dvc objects with this project_id""" - committed_files = [_ for _ in git_files() if _.endswith('.dvc')] + committed_files = [_ for _ in git_files() if _.endswith(".dvc")] dvc_objects = [_ for _ in dvc_data(committed_files)] for _ in dvc_objects: _.project_id = project_id @@ -537,26 +779,43 @@ def manifest(project_id) -> tuple[list[str], list[DVC]]: @cli.command() -@click.option('--remote', - type=click.Choice(['gen3', 's3', 'ln', 'scp']), - default='gen3', - show_default=True, - help='Specify the remote storage type. gen3:download, s3:s3 cp, ln: symbolic link, scp: scp copy' - ) -@click.option('--worker_count', '-w', default=(multiprocessing.cpu_count() - 1), show_default=True, - type=int, - help='Number of workers to use.') -@click.option('--data-only', help='Ignore git snapshot', is_flag=True, default=False, show_default=True) +@click.option( + "--remote", + type=click.Choice(["gen3", "s3", "ln", "scp"]), + default="gen3", + show_default=True, + help="Specify the remote storage type. gen3:download, s3:s3 cp, ln: symbolic link, scp: scp copy", +) +@click.option( + "--worker_count", + "-w", + default=(multiprocessing.cpu_count() - 1), + show_default=True, + type=int, + help="Number of workers to use.", +) +@click.option( + "--data-only", + help="Ignore git snapshot", + is_flag=True, + default=False, + show_default=True, +) @click.pass_obj def pull(config: Config, remote: str, worker_count: int, data_only: bool): - """ Fetch from and integrate with a remote repository.""" + """Fetch from and integrate with a remote repository.""" try: - with Halo(text='Authorizing', spinner='line', placement='right', color='white'): + with Halo(text="Authorizing", spinner="line", placement="right", color="white"): auth = gen3_tracker.config.ensure_auth(config=config) if not data_only: - with Halo(text='Pulling git snapshot', spinner='line', placement='right', color='white'): + with Halo( + text="Pulling git snapshot", + spinner="line", + placement="right", + color="white", + ): if not auth: auth = gen3_tracker.config.ensure_auth(config=config) snapshot, zip_filepath = download_snapshot(auth, config) @@ -567,35 +826,52 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool): # Rename the directory shutil.move(".git", new_dir_name) # unzip the snapshot - with zipfile.ZipFile(zip_filepath, 'r') as zip_ref: - zip_ref.extractall('.') - click.secho(f"Pulled {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr) + with zipfile.ZipFile(zip_filepath, "r") as zip_ref: + zip_ref.extractall(".") + click.secho( + f"Pulled {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr + ) manifest_files, dvc_objects = manifest(config.gen3.project_id) - if remote == 'gen3': + if remote == "gen3": # download the files - with Halo(text='Pulling from gen3', spinner='line', placement='right', color='white'): - object_ids = [{'object_id': _.object_id} for _ in dvc_objects] # if not _.out.source_url - current_time = datetime.now().strftime("%Y%m%d%H%M%S") # Format datetime as you need - manifest_file = pathlib.Path(config.work_dir) / f'manifest-{current_time}.json' - with open(manifest_file, 'w') as fp: + with Halo( + text="Pulling from gen3", + spinner="line", + placement="right", + color="white", + ): + object_ids = [ + {"object_id": _.object_id} for _ in dvc_objects + ] # if not _.out.source_url + current_time = datetime.now().strftime( + "%Y%m%d%H%M%S" + ) # Format datetime as you need + manifest_file = ( + pathlib.Path(config.work_dir) / f"manifest-{current_time}.json" + ) + with open(manifest_file, "w") as fp: json.dump(object_ids, fp) - cmd = f'gen3-client download-multiple --no-prompt --profile {config.gen3.profile} --manifest {manifest_file} --numparallel {worker_count}' + cmd = f"gen3-client download-multiple --no-prompt --profile {config.gen3.profile} --manifest {manifest_file} --numparallel {worker_count}" print(cmd) run_command(cmd, no_capture=True) - elif remote == 's3': - with Halo(text='Pulling from s3', spinner='line', placement='right', color='white'): + elif remote == "s3": + with Halo( + text="Pulling from s3", spinner="line", placement="right", color="white" + ): if not auth: auth = gen3_tracker.config.ensure_auth(config=config) - results = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth) + results = ls( + config, metadata={"project_id": config.gen3.project_id}, auth=auth + ) object_ids = [_.object_id for _ in dvc_objects] - for _ in results['records']: - if _['did'] in object_ids: - print('aws s3 cp ', _['urls'][0], _['file_name']) - elif remote == 'ln': + for _ in results["records"]: + if _["did"] in object_ids: + print("aws s3 cp ", _["urls"][0], _["file_name"]) + elif remote == "ln": for _ in dvc_objects: print(f"ln -s {_.out.realpath} {_.out.path}") - elif remote == 'scp': + elif remote == "scp": for _ in dvc_objects: print(f"scp USER@HOST:{_.out.realpath} {_.out.path}") @@ -609,56 +885,79 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool): @cli.command() -@click.argument('project_id', default=None, required=False, envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID", metavar='PROJECT_ID') +@click.argument( + "project_id", + default=None, + required=False, + envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID", + metavar="PROJECT_ID", +) @click.pass_obj def clone(config, project_id): """Clone a repository into a new directory""" try: config.gen3.project_id = project_id - assert not pathlib.Path(project_id).exists(), f"{project_id} already exists. Please remove it first." + assert not pathlib.Path( + project_id + ).exists(), f"{project_id} already exists. Please remove it first." os.mkdir(project_id) os.chdir(project_id) - with Halo(text='Cloning', spinner='line', placement='right', color='white'): + with Halo(text="Cloning", spinner="line", placement="right", color="white"): auth = gen3_tracker.config.ensure_auth(config=config) snapshot, zip_filepath = download_snapshot(auth, config) - assert not pathlib.Path('.git').exists(), "A git repository already exists. Please remove it, or move to another directory first." + assert not pathlib.Path( + ".git" + ).exists(), "A git repository already exists. Please remove it, or move to another directory first." # unzip - with zipfile.ZipFile(zip_filepath, 'r') as zip_ref: - zip_ref.extractall('.') + with zipfile.ZipFile(zip_filepath, "r") as zip_ref: + zip_ref.extractall(".") # if we just unzipped a .git these directories will exist - expected_dirs = ['.git', 'META', 'MANIFEST'] + expected_dirs = [".git", "META", "MANIFEST"] if not all([pathlib.Path(_).exists() for _ in expected_dirs]): # if not, we have downloaded a legacy SNAPSHOT.zip, so lets migrate the data to the expected drirectories - click.secho(f"{expected_dirs} not found after downloading {snapshot['file_name']} processing legacy snapshot", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"{expected_dirs} not found after downloading {snapshot['file_name']} processing legacy snapshot", + fg=INFO_COLOR, + file=sys.stderr, + ) # legacy - was this a *SNAPSHOT.zip? - meta_files = (pathlib.Path('studies') / config.gen3.project) + meta_files = pathlib.Path("studies") / config.gen3.project # legacy - was this a *meta.zip? if not meta_files.exists(): - meta_files = pathlib.Path('.') + meta_files = pathlib.Path(".") # create local directories and git [_ for _ in config_init(config, project_id)] ensure_git_repo(config=config) # move ndjson from studies to META - for _ in meta_files.glob('*.ndjson'): - shutil.move(_, 'META/') + for _ in meta_files.glob("*.ndjson"): + shutil.move(_, "META/") # add to git - run_command('git add META/*.*') + run_command("git add META/*.*") # migrate DocumentReferences to MANIFEST references = meta_index() manifest_files = [] - for _ in read_ndjson_file('META/DocumentReference.ndjson'): + for _ in read_ndjson_file("META/DocumentReference.ndjson"): document_reference = DocumentReference.parse_obj(_) - dvc_object = DVC.from_document_reference(config, document_reference, references) - manifest_files.append(write_dvc_file(yaml_data=dvc_object.model_dump(), target=dvc_object.out.path)) + dvc_object = DVC.from_document_reference( + config, document_reference, references + ) + manifest_files.append( + write_dvc_file( + yaml_data=dvc_object.model_dump(), + target=dvc_object.out.path, + ) + ) # Get the current time in seconds since the epoch current_time = time.time() # Update the access and modification times of the file - os.utime('META/DocumentReference.ndjson', (current_time, current_time)) + os.utime("META/DocumentReference.ndjson", (current_time, current_time)) - run_command('git add MANIFEST/') - run_command('git commit -m "migrated from legacy" MANIFEST/ META/ .gitignore') + run_command("git add MANIFEST/") + run_command( + 'git commit -m "migrated from legacy" MANIFEST/ META/ .gitignore' + ) shutil.move(zip_filepath, config.work_dir / zip_filepath.name) click.secho(f"Cloned {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr) @@ -673,27 +972,30 @@ def clone(config, project_id): def download_snapshot(auth, config): """Download the latest snapshot.""" from gen3_tracker.git.cloner import find_latest_snapshot + snapshot = find_latest_snapshot(auth, config) gen3_file = Gen3File(auth) - pathlib.Path(snapshot['file_name']).parent.mkdir(exist_ok=True, parents=True) - ok = gen3_file.download_single(snapshot['did'], '.') + pathlib.Path(snapshot["file_name"]).parent.mkdir(exist_ok=True, parents=True) + ok = gen3_file.download_single(snapshot["did"], ".") assert ok, f"Failed to download {snapshot['did']}" - zip_filepath = pathlib.Path(snapshot['file_name']) + zip_filepath = pathlib.Path(snapshot["file_name"]) assert zip_filepath.exists(), f"Failed to download {snapshot['did']}" return snapshot, zip_filepath def file_name_or_guid(config, object_id) -> (str, pathlib.Path): """Check if the object_id is a file name or a GUID.""" - guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$') + guid_pattern = re.compile( + r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" + ) path = None if not guid_pattern.match(object_id): if not is_url(object_id): - path = pathlib.Path('MANIFEST') / (object_id + ".dvc") + path = pathlib.Path("MANIFEST") / (object_id + ".dvc") else: - path = pathlib.Path('MANIFEST') / (url_path(object_id) + ".dvc") + path = pathlib.Path("MANIFEST") / (url_path(object_id) + ".dvc") if path.exists(): dvc_object = next(iter(dvc_data([str(path)])), None) @@ -702,20 +1004,33 @@ def file_name_or_guid(config, object_id) -> (str, pathlib.Path): object_id = dvc_object.object_id else: raise ValueError( - f"{object_id} was not found in the MANIFEST and does not appear to be an object identifier (GUID).") + f"{object_id} was not found in the MANIFEST and does not appear to be an object identifier (GUID)." + ) else: committed_files, dvc_objects = manifest(config.gen3.project_id) - dvc_objects = [dvc_object for dvc_object in dvc_objects if dvc_object.object_id == object_id] + dvc_objects = [ + dvc_object + for dvc_object in dvc_objects + if dvc_object.object_id == object_id + ] assert dvc_objects, f"{object_id} not found in MANIFEST." - path = pathlib.Path('MANIFEST') / (dvc_objects[0].out.path + ".dvc") + path = pathlib.Path("MANIFEST") / (dvc_objects[0].out.path + ".dvc") assert guid_pattern.match(object_id), f"{object_id} was not found in MANIFEST." return object_id, path @cli.command("ls") -@click.option('--long', '-l', 'long_flag', default=False, is_flag=True, help='Long listing format.', show_default=True) -@click.argument('target', default=None, required=False) +@click.option( + "--long", + "-l", + "long_flag", + default=False, + is_flag=True, + help="Long listing format.", + show_default=True, +) +@click.argument("target", default=None, required=False) @click.pass_obj def ls_cli(config: Config, long_flag: bool, target: str): """List files in the repository. @@ -724,10 +1039,14 @@ def ls_cli(config: Config, long_flag: bool, target: str): """ try: - with Halo(text='Pulling file list', spinner='line', placement='right', color='white'): + with Halo( + text="Pulling file list", spinner="line", placement="right", color="white" + ): auth = gen3_tracker.config.ensure_auth(config=config) - results = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth) - indexd_records = results['records'] + results = ls( + config, metadata={"project_id": config.gen3.project_id}, auth=auth + ) + indexd_records = results["records"] committed_files, dvc_objects = manifest(config.gen3.project_id) # list all data files dvc_objects = {_.object_id: _ for _ in dvc_objects} @@ -742,44 +1061,49 @@ def _dvc_meta(dvc_object, full=False) -> dict: _[k] = v else: _ = dvc_object.model_dump(exclude_none=True) - _['object_id'] = dvc_object.object_id + _["object_id"] = dvc_object.object_id return _ if not long_flag: indexd_records = [ { - 'did': _['did'], - 'file_name': _['file_name'], - 'indexd_created_date': _['created_date'], - 'meta': _dvc_meta(dvc_objects.get(_['did'], None)), - 'urls': _['urls'] - } for _ in indexd_records + "did": _["did"], + "file_name": _["file_name"], + "indexd_created_date": _["created_date"], + "meta": _dvc_meta(dvc_objects.get(_["did"], None)), + "urls": _["urls"], + } + for _ in indexd_records ] - bucket_ids = {_['did'] for _ in indexd_records} + bucket_ids = {_["did"] for _ in indexd_records} - uncommitted = pathlib.Path('MANIFEST').glob('**/*.dvc') + uncommitted = pathlib.Path("MANIFEST").glob("**/*.dvc") uncommitted = [str(_) for _ in uncommitted] uncommitted = [str(_) for _ in uncommitted if _ not in committed_files] uncommitted = [_.model_dump(exclude_none=True) for _ in dvc_data(uncommitted)] _ = { - 'bucket': indexd_records, - 'committed': [_dvc_meta(v, full=True) for k, v in dvc_objects.items() if k not in bucket_ids], - 'uncommitted': uncommitted + "bucket": indexd_records, + "committed": [ + _dvc_meta(v, full=True) + for k, v in dvc_objects.items() + if k not in bucket_ids + ], + "uncommitted": uncommitted, } if target: # Escape special characters and replace wildcard '*' with '.*' for regex pattern pattern = re.escape(target).replace("\\*", ".*") filtered = { - 'bucket': filter_dicts(_.get('bucket', []), pattern), - 'committed': filter_dicts(_.get('committed', []), pattern), - 'uncommitted': filter_dicts(_.get('uncommitted', []), pattern) + "bucket": filter_dicts(_.get("bucket", []), pattern), + "committed": filter_dicts(_.get("committed", []), pattern), + "uncommitted": filter_dicts(_.get("uncommitted", []), pattern), } _ = filtered - if config.output.format == 'json': + if config.output.format == "json": print(json.dumps(_, indent=2)) else: yaml.dump(_, sys.stdout, default_flow_style=False) @@ -791,7 +1115,7 @@ def _dvc_meta(dvc_object, full=False) -> dict: @cli.command() -@click.argument('object_id', metavar='') +@click.argument("object_id", metavar="") @click.pass_obj def rm(config: Config, object_id: str): """Remove a single file from the server index, and MANIFEST. Does not alter META. @@ -800,29 +1124,50 @@ def rm(config: Config, object_id: str): """ try: - with Halo(text='Searching', spinner='line', placement='right', color='white'): + with Halo(text="Searching", spinner="line", placement="right", color="white"): object_id, path = file_name_or_guid(config, object_id) - with Halo(text='Deleting from server', spinner='line', placement='right', color='white'): + with Halo( + text="Deleting from server", + spinner="line", + placement="right", + color="white", + ): auth = gen3_tracker.config.ensure_auth(config=config) index = Gen3Index(auth) result = index.delete_record(object_id) if not result: if not path: - path = '' - click.secho(f"Failed to delete {object_id} from server. {path}", fg=ERROR_COLOR, file=sys.stderr) + path = "" + click.secho( + f"Failed to delete {object_id} from server. {path}", + fg=ERROR_COLOR, + file=sys.stderr, + ) else: - click.secho(f"Deleted {object_id} from server. {path}", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Deleted {object_id} from server. {path}", + fg=INFO_COLOR, + file=sys.stderr, + ) - with Halo(text='Scanning', spinner='line', placement='right', color='white'): + with Halo(text="Scanning", spinner="line", placement="right", color="white"): committed_files, dvc_objects = manifest(config.gen3.project_id) - dvc_objects = [dvc_object for dvc_object in dvc_objects if dvc_object.object_id == object_id] + dvc_objects = [ + dvc_object + for dvc_object in dvc_objects + if dvc_object.object_id == object_id + ] assert dvc_objects, f"{object_id} not found in MANIFEST." dvc_object = dvc_objects[0] - path = pathlib.Path('MANIFEST') / (dvc_object.out.path + ".dvc") + path = pathlib.Path("MANIFEST") / (dvc_object.out.path + ".dvc") assert path.exists(), f"{path} not found" path.unlink() - click.secho(f"Deleted {path} from MANIFEST. Please adjust META resources", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Deleted {path} from MANIFEST. Please adjust META resources", + fg=INFO_COLOR, + file=sys.stderr, + ) except Exception as e: click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr) @@ -858,10 +1203,10 @@ def ping(config: Config): msgs.append(str(e)) ok = False except Gen3AuthError as e: - msg = str(e).split(':')[0] + msg = str(e).split(":")[0] msgs.append(msg) msg2 = str(e).split('

')[-1] - msg2 = msg2.split('

')[0] + msg2 = msg2.split("

")[0] msgs.append(msg2) ok = False @@ -871,34 +1216,42 @@ def ping(config: Config): _ = "Configuration ERROR: " output.exit_code = 1 - _ = {'msg': _ + ', '.join(msgs)} + _ = {"msg": _ + ", ".join(msgs)} if auth: - _['endpoint'] = auth.endpoint - user_info = auth.curl('/user/user').json() - _['username'] = user_info['username'] + _["endpoint"] = auth.endpoint + user_info = auth.curl("/user/user").json() + _["username"] = user_info["username"] buckets = get_buckets(config=config) bucket_info = {} program_info = defaultdict(list) - for k, v in buckets['S3_BUCKETS'].items(): + for k, v in buckets["S3_BUCKETS"].items(): bucket_info[k] = {} - if 'programs' not in v: + if "programs" not in v: bucket_info[k] = "No `programs` found" - click.secho(f"WARNING: No `programs` found for bucket {k}", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"WARNING: No `programs` found for bucket {k}", + fg=INFO_COLOR, + file=sys.stderr, + ) continue - bucket_info[k] = ",".join(v['programs']) - for program in v['programs']: + bucket_info[k] = ",".join(v["programs"]) + for program in v["programs"]: program_info[program].append(k) - _['bucket_programs'] = bucket_info + _["bucket_programs"] = bucket_info for k, v in program_info.items(): if len(v) > 1: - click.secho(f"WARNING: {k} is in multiple buckets: {', '.join(v)}", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"WARNING: {k} is in multiple buckets: {', '.join(v)}", + fg=INFO_COLOR, + file=sys.stderr, + ) - assert 'authz' in user_info, "No authz found" + assert "authz" in user_info, "No authz found" authz_info = defaultdict(dict) - for k, v in user_info['authz'].items(): - authz_info[k] = ",".join(set([_['method'] for _ in v])) - _['your_access'] = dict(authz_info) + for k, v in user_info["authz"].items(): + authz_info[k] = ",".join(set([_["method"] for _ in v])) + _["your_access"] = dict(authz_info) output.update(_) diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index 888cf903..c6beda14 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -246,7 +246,6 @@ def scalars(self) -> dict: if (not isinstance(v, list) and not isinstance(v, dict)) } - @computed_field @property def codings(self) -> dict: @@ -261,8 +260,12 @@ def codings(self) -> dict: if isinstance(elem, dict): # TODO: implement hierarchy of codes rather than just taking last code? for value, source in normalize_coding(elem): - if len(v) > 1 and get_nested_value(elem, [source, 0, 'system']): - _codings[elem[source][0]["system"].split("/")[-1]] = value + if len(v) > 1 and get_nested_value( + elem, [source, 0, "system"] + ): + _codings[elem[source][0]["system"].split("/")[-1]] = ( + value + ) else: _codings[k] = value elif isinstance(v, dict): @@ -281,10 +284,15 @@ def identifiers(self) -> dict: if not identifiers_len: return {"identifier": None} elif identifiers_len == 1: - return {"identifier": identifiers[0].get('value')} + return {"identifier": identifiers[0].get("value")} else: - base_identifier = {"identifier": identifiers[0].get('value')} - base_identifier.update({identifier.get("system").split("/")[-1]: identifier.get("value") for identifier in identifiers[1:]}) + base_identifier = {"identifier": identifiers[0].get("value")} + base_identifier.update( + { + identifier.get("system").split("/")[-1]: identifier.get("value") + for identifier in identifiers[1:] + } + ) return base_identifier @computed_field @@ -375,7 +383,6 @@ def values(self) -> dict: if "code" in self.resource and "text" in self.resource["code"]: _values["observation_code"] = self.resource["code"]["text"] - assert len(_values) > 0, f"no values found in Observation: {self.resource}" return _values diff --git a/tests/__init__.py b/tests/__init__.py index 204633dd..7ff71bef 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -5,7 +5,13 @@ from gen3_tracker.cli import cli -def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], expected_exit_code: int = 0, expected_files: list[pathlib.Path] = []) -> Result: +def run( + runner: CliRunner, + args: list[str], + expected_output: list[str] = [], + expected_exit_code: int = 0, + expected_files: list[pathlib.Path] = [], +) -> Result: """Run a command and check the output, exit code and expected files.""" if isinstance(args, str): args = args.split() @@ -15,16 +21,20 @@ def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], exp expected_files = [expected_files] expected_files = [pathlib.Path(_) for _ in expected_files] - print('------------------------------------------------------------') + print("------------------------------------------------------------") print("g3t " + " ".join(args)) result = runner.invoke(cli, args) print("result.stdout", result.stdout) print("result.output", result.output) print("result.exception", result.exception) print("CWD", pathlib.Path.cwd()) - assert result.exit_code == expected_exit_code, f"g3t {' '.join(args)} exit_code: {result.exit_code}, expected: {expected_exit_code}" + assert ( + result.exit_code == expected_exit_code + ), f"g3t {' '.join(args)} exit_code: {result.exit_code}, expected: {expected_exit_code}" for line in expected_output: - assert line in result.output, f"output: {result.output}, expected: {expected_output}" + assert ( + line in result.output + ), f"output: {result.output}, expected: {expected_output}" print(f"{line} found in output.") for file in expected_files: assert file.exists(), f"{file} does not exist." diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index fac2d49a..68c1c087 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -7,7 +7,13 @@ from gen3.query import Gen3Query -def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], expected_exit_code: int = 0, expected_files: list[pathlib.Path] = []) -> Result: +def run( + runner: CliRunner, + args: list[str], + expected_output: list[str] = [], + expected_exit_code: int = 0, + expected_files: list[pathlib.Path] = [], +) -> Result: """Run a command and check the output, exit code and expected files.""" if isinstance(args, str): args = args.split() @@ -17,13 +23,17 @@ def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], exp expected_files = [expected_files] expected_files = [pathlib.Path(_) for _ in expected_files] - print('------------------------------------------------------------') + print("------------------------------------------------------------") print("g3t " + " ".join(args)) result = runner.invoke(cli, args) print(result.stdout) - assert result.exit_code == expected_exit_code, f"exit_code: {result.exit_code}, expected: {expected_exit_code}" + assert ( + result.exit_code == expected_exit_code + ), f"exit_code: {result.exit_code}, expected: {expected_exit_code}" for line in expected_output: - assert line in result.output, f"output: {result.output}, expected: {expected_output}" + assert ( + line in result.output + ), f"output: {result.output}, expected: {expected_output}" print(f"{line} found in output.") for file in expected_files: assert file.exists(), f"{file} does not exist." @@ -37,11 +47,12 @@ def validate_document_in_grip(did: str, auth=None, project_id=None): if not auth: auth = ensure_auth(config=default()) token = auth.get_access_token() - result = requests.get(f"{auth.endpoint}/grip/writer/graphql/CALIPER/get-vertex/{did}/{project_id}", - headers={"Authorization": f"bearer {token}"} - ).json() - assert 'data' in result, f"Failed to query grip for {did} {result}" - assert result['data']['gid'] == did + result = requests.get( + f"{auth.endpoint}/grip/writer/graphql/CALIPER/get-vertex/{did}/{project_id}", + headers={"Authorization": f"bearer {token}"}, + ).json() + assert "data" in result, f"Failed to query grip for {did} {result}" + assert result["data"]["gid"] == did def validate_document_in_elastic(did, auth): @@ -55,7 +66,7 @@ def validate_document_in_elastic(did, auth): } } """, - variables={"filter": {"AND": [{"IN": {"id": [did]}}]}} + variables={"filter": {"AND": [{"IN": {"id": [did]}}]}}, ) print(result) - assert result['data']['file'][0]['id'] == did + assert result["data"]["file"][0]["id"] == did diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index b171120e..8de22443 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -16,7 +16,7 @@ def program() -> str: @pytest.fixture def project() -> str: - project = uuid.uuid4().hex.replace('-', '_') + project = uuid.uuid4().hex.replace("-", "_") return project diff --git a/tests/integration/test_bucket_import.py b/tests/integration/test_bucket_import.py index 5edc31d3..9a0959cc 100644 --- a/tests/integration/test_bucket_import.py +++ b/tests/integration/test_bucket_import.py @@ -42,8 +42,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None: print(project_id) - run(runner, ["--debug", "init", project_id, "--approve", "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve", "--no-server"], + expected_files=[".g3t", ".git"], + ) for _ in SHOULD_SUCCEED: run(runner, _.split()) @@ -60,11 +63,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None: result = run(runner, ["--debug", "--format", "json", "ls"]) listing = json.loads(result.stdout) - for _ in ['bucket', 'committed', 'uncommitted']: + for _ in ["bucket", "committed", "uncommitted"]: assert _ in listing # files should appear in uncommitted - assert len(listing['uncommitted']) == len(SHOULD_SUCCEED) + assert len(listing["uncommitted"]) == len(SHOULD_SUCCEED) # commit the changes run(runner, ["--debug", "commit", "-am", "initial commit"]) @@ -72,11 +75,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None: # test the ls command, should now be in committed result = run(runner, ["--debug", "--format", "json", "ls"]) listing = json.loads(result.stdout) - assert len(listing['committed']) == len(SHOULD_SUCCEED) + assert len(listing["committed"]) == len(SHOULD_SUCCEED) # test the ls filter for _ in EXPECTED_MANIFEST_PATHS: - bucket_name = _.split('/')[1] + bucket_name = _.split("/")[1] result = run(runner, ["--debug", "--format", "json", "ls", bucket_name]) listing = json.loads(result.stdout) - assert len(listing['committed']) == 1 + assert len(listing["committed"]) == 1 diff --git a/tests/integration/test_bundle.py b/tests/integration/test_bundle.py index f98d0e8c..d27d99ad 100644 --- a/tests/integration/test_bundle.py +++ b/tests/integration/test_bundle.py @@ -11,21 +11,21 @@ CHANGE_PATIENT = [ "--debug add s3://s3-bucket/p1-object.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1", "--debug meta init", - "--debug commit -am \"initial commit\"", + '--debug commit -am "initial commit"', "--debug add s3://s3-bucket/p1-object.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1-prime", "--debug meta init", - "--debug commit -am \"prime commit\"", + '--debug commit -am "prime commit"', ] # user made a mistake and added the wrong file CHANGE_FILE = [ "--debug add s3://s3-bucket/p1-object-mistake.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1", "--debug meta init", - "--debug commit -am \"initial commit\"", + '--debug commit -am "initial commit"', "--debug rm s3://s3-bucket/p1-object-mistake.txt", "--debug add s3://s3-bucket/p1-object-correct.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1", "--debug meta init", - "--debug commit -am \"prime commit\"", + '--debug commit -am "prime commit"', ] @@ -37,8 +37,11 @@ def test_change_patient(runner: CliRunner, project_id, tmpdir) -> None: print(project_id) - run(runner, ["--debug", "init", project_id, "--approve", "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve", "--no-server"], + expected_files=[".g3t", ".git"], + ) for _ in CHANGE_PATIENT: run(runner, _.split()) @@ -54,8 +57,10 @@ def test_change_patient(runner: CliRunner, project_id, tmpdir) -> None: assert all([_ == "DELETE" for _ in methods]), "Only DELETE method is expected." urls = [_.request.url for _ in bundle.entry] - assert any([_.startswith('Patient') for _ in urls]), "Expected to delete a Patient." - assert any([_.startswith('ResearchSubject') for _ in urls]), "Expected to delete a ResearchSubject." + assert any([_.startswith("Patient") for _ in urls]), "Expected to delete a Patient." + assert any( + [_.startswith("ResearchSubject") for _ in urls] + ), "Expected to delete a ResearchSubject." def test_change_file(runner: CliRunner, project_id, tmpdir) -> None: @@ -66,8 +71,11 @@ def test_change_file(runner: CliRunner, project_id, tmpdir) -> None: print(project_id) - run(runner, ["--debug", "init", project_id, "--approve", "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve", "--no-server"], + expected_files=[".g3t", ".git"], + ) for _ in CHANGE_FILE: run(runner, _.split()) @@ -83,4 +91,6 @@ def test_change_file(runner: CliRunner, project_id, tmpdir) -> None: assert all([_ == "DELETE" for _ in methods]), "Only DELETE method is expected." urls = [_.request.url for _ in bundle.entry] - assert any([_.startswith('DocumentReference') for _ in urls]), "Expected to delete a DocumentReference." + assert any( + [_.startswith("DocumentReference") for _ in urls] + ), "Expected to delete a DocumentReference." diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 86b03fb0..6237edab 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -17,23 +17,36 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: assert tmpdir.chdir() print(Path.cwd()) - assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set." + assert os.environ.get( + "G3T_PROFILE" + ), "G3T_PROFILE environment variable must be set." print(project_id) - run(runner, ["--debug", "init", project_id, "--approve"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve"], + expected_files=[".g3t", ".git"], + ) # check ping - run(runner, ["--debug", "ping"], expected_output=["bucket_programs", "your_access", "endpoint", "username"]) + run( + runner, + ["--debug", "ping"], + expected_output=["bucket_programs", "your_access", "endpoint", "username"], + ) # create a test file test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) - test_file.write_text('hello\n') + test_file.write_text("hello\n") # add the file - run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"]) + run( + runner, + ["--debug", "add", str(test_file)], + expected_files=["MANIFEST/my-project-data/hello.txt.dvc"], + ) # should create a dvc file dvc_path = Path("MANIFEST/my-project-data/hello.txt.dvc") @@ -49,7 +62,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: object_id = dvc.object_id # create the meta file - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"]) + run( + runner, + ["--debug", "meta", "init"], + expected_files=["META/DocumentReference.ndjson"], + ) # commit the changes, delegating to git run(runner, ["--debug", "commit", "-am", "initial commit"]) @@ -60,10 +77,18 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: # update the file test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) - test_file.write_text('hello UPDATE\n') + test_file.write_text("hello UPDATE\n") # re-add the file - run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"]) - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"]) + run( + runner, + ["--debug", "add", str(test_file)], + expected_files=["MANIFEST/my-project-data/hello.txt.dvc"], + ) + run( + runner, + ["--debug", "meta", "init"], + expected_files=["META/DocumentReference.ndjson"], + ) run(runner, ["--debug", "commit", "-am", "updated"]) run(runner, ["--debug", "meta", "validate"]) @@ -71,7 +96,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: run(runner, ["--debug", "meta", "graph"], expected_files=["meta.html"]) # create a dataframe - run(runner, ["--debug", "meta", "dataframe", 'DocumentReference'], expected_files=["DocumentReference.csv"]) + run( + runner, + ["--debug", "meta", "dataframe", "DocumentReference"], + expected_files=["DocumentReference.csv"], + ) # push to the server run(runner, ["--debug", "push"]) @@ -103,12 +132,25 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: # check the files exist in the cloned directory run_command("ls -l") - assert Path("my-project-data/hello.txt").exists(), "hello.txt does not exist in the cloned directory." + assert Path( + "my-project-data/hello.txt" + ).exists(), "hello.txt does not exist in the cloned directory." # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) # See https://ohsucomputationalbio.slack.com/archives/C043HPV0VMY/p1714065633867229 - run(runner, ["--debug", "projects", "empty", "--project_id", project_id, "--confirm", "empty"]) + run( + runner, + [ + "--debug", + "projects", + "empty", + "--project_id", + project_id, + "--confirm", + "empty", + ], + ) # TODO fix `collaborator rm` # arborist logs: "Policy `data_upload` does not exist for user `xxx@xxx.xxx`: not revoking. Check if it is assigned through a group." @@ -116,13 +158,27 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: # run(runner, ["--debug", "collaborator", "rm", username, "--approve"], expected_output=[username]) # add a user with write permissions - run(runner, ["--debug", "collaborator", "add", "foo@bar.com", "--write", "--approve"]) + run( + runner, + ["--debug", "collaborator", "add", "foo@bar.com", "--write", "--approve"], + ) # add a user from another directory (without config) os.mkdir("empty") os.chdir("empty") program, project = project_id.split("-") - run(runner, ["--debug", "collaborator", "add", "foo2@bar.com", f"/programs/{program}/projects/{project}", "--write", "--approve"]) + run( + runner, + [ + "--debug", + "collaborator", + "add", + "foo2@bar.com", + f"/programs/{program}/projects/{project}", + "--write", + "--approve", + ], + ) def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> None: @@ -131,20 +187,29 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N assert tmpdir.chdir() print(Path.cwd()) - assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set." + assert os.environ.get( + "G3T_PROFILE" + ), "G3T_PROFILE environment variable must be set." print(project_id) - run(runner, ["--debug", "init", project_id, "--approve"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve"], + expected_files=[".g3t", ".git"], + ) # create a test file test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) - test_file.write_text('hello\n') + test_file.write_text("hello\n") # add the file - run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"]) + run( + runner, + ["--debug", "add", str(test_file)], + expected_files=["MANIFEST/my-project-data/hello.txt.dvc"], + ) # should create a dvc file dvc_path = Path("MANIFEST/my-project-data/hello.txt.dvc") @@ -160,7 +225,11 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N object_id = dvc.object_id # create the meta file - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"]) + run( + runner, + ["--debug", "meta", "init"], + expected_files=["META/DocumentReference.ndjson"], + ) # commit the changes, delegating to git run(runner, ["--debug", "commit", "-am", "initial commit"]) @@ -184,24 +253,44 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) # See https://ohsucomputationalbio.slack.com/archives/C043HPV0VMY/p1714065633867229 - run(runner, ["--debug", "projects", "empty", "--project_id", project_id, "--confirm", "empty"]) - - -def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, project_id: str, tmp_path: Path): + run( + runner, + [ + "--debug", + "projects", + "empty", + "--project_id", + project_id, + "--confirm", + "empty", + ], + ) + + +def test_push_fails_with_invalid_doc_ref_creation_date( + runner: CliRunner, project_id: str, tmp_path: Path +): # check - assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set." + assert os.environ.get( + "G3T_PROFILE" + ), "G3T_PROFILE environment variable must be set." # copy fixture to temp test dir project_dir = "fhir-gdc-examples" fixtures_path = Path(os.path.dirname(__file__)).parent / "fixtures" fhir_gdc_dir = fixtures_path / project_dir - modified_doc_ref_path = fixtures_path / "negative-examples/fhir-gdc-DocumentReference-invalid-date.ndjson" + modified_doc_ref_path = ( + fixtures_path + / "negative-examples/fhir-gdc-DocumentReference-invalid-date.ndjson" + ) # init project new_project_dir = tmp_path / project_dir shutil.copytree(fhir_gdc_dir, new_project_dir) - shutil.copy(modified_doc_ref_path, new_project_dir / "META" / "DocumentReference.ndjson" ) + shutil.copy( + modified_doc_ref_path, new_project_dir / "META" / "DocumentReference.ndjson" + ) # get invalid date from fixture doc_ref_content = pd.read_json(modified_doc_ref_path, lines=True)["content"][0] @@ -211,14 +300,17 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec log_file_path = "logs/publish.log" os.chdir(new_project_dir) run(runner, ["init", project_id, "--approve"]) - result = run(runner, - ["push", "--skip_validate", "--overwrite"], - expected_exit_code=1, - expected_files=[log_file_path] - ) + result = run( + runner, + ["push", "--skip_validate", "--overwrite"], + expected_exit_code=1, + expected_files=[log_file_path], + ) # ensure push has useful useful error logs - assert log_file_path in result.output, f"expected log file path in stdout, instead got:\n{result.output}" + assert ( + log_file_path in result.output + ), f"expected log file path in stdout, instead got:\n{result.output}" # ensure saved log file contains info about invalid date with open(log_file_path, "r") as log_file: @@ -226,40 +318,53 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec str_lines = str(lines) for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: - assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' + assert ( + keyword in str_lines + ), f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' -def test_push_fails_with_no_write_permissions(runner: CliRunner, project_id: str, tmp_path: Path): +def test_push_fails_with_no_write_permissions( + runner: CliRunner, project_id: str, tmp_path: Path +): # setup - assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set." + assert os.environ.get( + "G3T_PROFILE" + ), "G3T_PROFILE environment variable must be set." os.chdir(tmp_path) # initialize project without approving permissions log_file_path = "logs/publish.log" - run(runner, [ "init", project_id], - expected_files=[".g3t", ".git"]) + run(runner, ["init", project_id], expected_files=[".g3t", ".git"]) # create test file test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) - test_file.write_text('hello\n') + test_file.write_text("hello\n") # prepare test file for submission - run(runner, ["add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"]) + run( + runner, + ["add", str(test_file)], + expected_files=["MANIFEST/my-project-data/hello.txt.dvc"], + ) run(runner, ["meta", "init"], expected_files=["META/DocumentReference.ndjson"]) - print("current directory:",os.getcwd()) + print("current directory:", os.getcwd()) run(runner, ["commit", "-m", "initial commit"]) # push result = run(runner, ["push"], expected_exit_code=1, expected_files=[log_file_path]) # ensure stdout mentions log files - assert log_file_path in result.output, f"expected log file path in stdout, instead got:\n{result.output}" + assert ( + log_file_path in result.output + ), f"expected log file path in stdout, instead got:\n{result.output}" # check valid error messages within with open(log_file_path, "r") as log_file: # grab last line - line = [l for l in log_file.readlines()][-1] + line = [_ for _ in log_file.readlines()][-1] for output in ["401", "permission"]: - assert "401" in line, f"expected {log_file_path} to contain {output}, instead got: \n{line}" + assert ( + "401" in line + ), f"expected {log_file_path} to contain {output}, instead got: \n{line}" diff --git a/tests/unit/meta/conftest.py b/tests/unit/meta/conftest.py index b171120e..8de22443 100644 --- a/tests/unit/meta/conftest.py +++ b/tests/unit/meta/conftest.py @@ -16,7 +16,7 @@ def program() -> str: @pytest.fixture def project() -> str: - project = uuid.uuid4().hex.replace('-', '_') + project = uuid.uuid4().hex.replace("-", "_") return project diff --git a/tests/unit/meta/test_meta.py b/tests/unit/meta/test_meta.py index 818ba083..d602a5c6 100644 --- a/tests/unit/meta/test_meta.py +++ b/tests/unit/meta/test_meta.py @@ -10,15 +10,20 @@ from tests import run -def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, project_id, tmp_path: pathlib.Path) -> None: +def test_assert_object_id_invalid_on_project_id_change( + runner: CliRunner, project_id, tmp_path: pathlib.Path +) -> None: """Test object_id validation command.""" # change to the temporary directory os.chdir(tmp_path) print(pathlib.Path.cwd()) print(project_id) - run(runner, ["--debug", "--profile", "local", "init", project_id, "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "--profile", "local", "init", project_id, "--no-server"], + expected_files=[".g3t", ".git"], + ) # create test files cmds = """ @@ -27,42 +32,67 @@ def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, projec echo "hello" > my-project-data/hello.txt echo "big-data" > my-read-only-data/big-file.txt ln -s $PWD/my-read-only-data/big-file.txt my-project-data/big-file.txt - """.split('\n') + """.split( + "\n" + ) for cmd in cmds: run_command(cmd, no_capture=True) - assert pathlib.Path("my-project-data/hello.txt").exists(), "hello.txt does not exist." - assert pathlib.Path("my-read-only-data/big-file.txt").exists(), "my-read-only-data/big-file.txt does not exist." - assert pathlib.Path("my-project-data/big-file.txt").exists(), "my-project-data/big-file.txt does not exist." + assert pathlib.Path( + "my-project-data/hello.txt" + ).exists(), "hello.txt does not exist." + assert pathlib.Path( + "my-read-only-data/big-file.txt" + ).exists(), "my-read-only-data/big-file.txt does not exist." + assert pathlib.Path( + "my-project-data/big-file.txt" + ).exists(), "my-project-data/big-file.txt does not exist." files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"] patients = ["P1", "P2"] for f, p in zip(files, patients): - run(runner, ["--debug", "add", str(f), "--patient", p], expected_files=[f"MANIFEST/{f}.dvc"]) - - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson"]) + run( + runner, + ["--debug", "add", str(f), "--patient", p], + expected_files=[f"MANIFEST/{f}.dvc"], + ) + + run( + runner, + ["--debug", "meta", "init"], + expected_files=[ + "META/DocumentReference.ndjson", + "META/Patient.ndjson", + "META/ResearchStudy.ndjson", + "META/ResearchSubject.ndjson", + ], + ) run(runner, ["--debug", "meta", "validate"]) - run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]) + run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]) # now change the project_id to something new # this should cause invalid object_id errors config = gen3_tracker.config.default() config.gen3.project_id = config.gen3.project_id + "XXXX" - with open('.g3t/config.yaml', 'w') as f: + with open(".g3t/config.yaml", "w") as f: yaml.dump(config.model_dump(), f) - run(runner, ["commit", "-m", "change-project_id", '.g3t/config.yaml']) + run(runner, ["commit", "-m", "change-project_id", ".g3t/config.yaml"]) # should error now run(runner, ["--debug", "meta", "validate"], expected_exit_code=1) run(runner, ["--debug", "push", "--dry-run"], expected_exit_code=1) # also check skip_validate - run(runner, ["--debug", "push", "--dry-run", "--skip_validate"], expected_exit_code=0) + run( + runner, + ["--debug", "push", "--dry-run", "--skip_validate"], + expected_exit_code=0, + ) # should pass now config.gen3.project_id = config.gen3.project_id.replace("XXXX", "") - with open('.g3t/config.yaml', 'w') as f: + with open(".g3t/config.yaml", "w") as f: yaml.dump(config.model_dump(), f) - run(runner, ["commit", "-m", "restore-project_id", '.g3t/config.yaml']) + run(runner, ["commit", "-m", "restore-project_id", ".g3t/config.yaml"]) # ensure we can validate without passing project id results = validate(directory_path="META") @@ -72,7 +102,9 @@ def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, projec run(runner, ["--debug", "push", "--dry-run"], expected_exit_code=0) -def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path: pathlib.Path) -> None: +def test_assert_add_specimen_after_init( + runner: CliRunner, project_id, tmp_path: pathlib.Path +) -> None: """Test meta skeleton handles re-add of data with new specimen""" # change to the temporary directory os.chdir(tmp_path) @@ -80,8 +112,11 @@ def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path: print(project_id) # init the project, no server - run(runner, ["--debug", "--profile", "local", "init", project_id, "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "--profile", "local", "init", project_id, "--no-server"], + expected_files=[".g3t", ".git"], + ) # create test files cmds = """ @@ -90,34 +125,73 @@ def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path: echo "hello" > my-project-data/hello.txt echo "big-data" > my-read-only-data/big-file.txt ln -s $PWD/my-read-only-data/big-file.txt my-project-data/big-file.txt - """.split('\n') + """.split( + "\n" + ) for cmd in cmds: run_command(cmd, no_capture=True) - assert pathlib.Path("my-project-data/hello.txt").exists(), "hello.txt does not exist." - assert pathlib.Path("my-read-only-data/big-file.txt").exists(), "my-read-only-data/big-file.txt does not exist." - assert pathlib.Path("my-project-data/big-file.txt").exists(), "my-project-data/big-file.txt does not exist." + assert pathlib.Path( + "my-project-data/hello.txt" + ).exists(), "hello.txt does not exist." + assert pathlib.Path( + "my-read-only-data/big-file.txt" + ).exists(), "my-read-only-data/big-file.txt does not exist." + assert pathlib.Path( + "my-project-data/big-file.txt" + ).exists(), "my-project-data/big-file.txt does not exist." def _files_with_patients(): files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"] patients = ["P1", "P2"] for f, p in zip(files, patients): - run(runner, ["--debug", "add", str(f), "--patient", p], expected_files=[f"MANIFEST/{f}.dvc"]) - - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson"]) + run( + runner, + ["--debug", "add", str(f), "--patient", p], + expected_files=[f"MANIFEST/{f}.dvc"], + ) + + run( + runner, + ["--debug", "meta", "init"], + expected_files=[ + "META/DocumentReference.ndjson", + "META/Patient.ndjson", + "META/ResearchStudy.ndjson", + "META/ResearchSubject.ndjson", + ], + ) run(runner, ["--debug", "meta", "validate"]) - run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]) + run( + runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"] + ) def _files_with_patients_and_specimens(): files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"] patients = ["P1", "P2"] specimens = ["S1", "S2"] for f, p, s in zip(files, patients, specimens): - run(runner, ["--debug", "add", str(f), "--patient", p, "--specimen", s], expected_files=[f"MANIFEST/{f}.dvc"]) - - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson", "META/Specimen.ndjson"]) + run( + runner, + ["--debug", "add", str(f), "--patient", p, "--specimen", s], + expected_files=[f"MANIFEST/{f}.dvc"], + ) + + run( + runner, + ["--debug", "meta", "init"], + expected_files=[ + "META/DocumentReference.ndjson", + "META/Patient.ndjson", + "META/ResearchStudy.ndjson", + "META/ResearchSubject.ndjson", + "META/Specimen.ndjson", + ], + ) run(runner, ["--debug", "meta", "validate"]) - run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]) + run( + runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"] + ) # create initial association between patients and files _files_with_patients() diff --git a/tests/unit/test_coding_conventions.py b/tests/unit/test_coding_conventions.py index 34171929..e88b5cb6 100644 --- a/tests/unit/test_coding_conventions.py +++ b/tests/unit/test_coding_conventions.py @@ -7,7 +7,10 @@ def test_coding_conventions(): """Check python conventions on key directories""" script_dir = os.path.dirname(os.path.abspath(__file__)) - directories = [os.path.join(script_dir, "../../gen3_tracker"), os.path.join(script_dir, "../../tests")] + directories = [ + os.path.join(script_dir, "../../gen3_tracker"), + os.path.join(script_dir, "../../tests"), + ] failures = [] for directory in directories: cmd_str = f"flake8 {directory} --max-line-length 256 --exclude test_flatten_fhir_example.py" diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py index 82acc470..9b923d88 100644 --- a/tests/unit/test_flatten_fhir_example.py +++ b/tests/unit/test_flatten_fhir_example.py @@ -29,19 +29,40 @@ # test data ------------------------------------------------------------ # The following fixtures provide test data for the tests below. + @pytest.fixture def patient_dict() -> dict: # TODO - read the patient example from a file - patient_dict = {"resourceType": "Patient", "id": "3", "meta": {"lastUpdated": "2012-05-29T23:45:32Z"}, - "text": {"status": "generated", - "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003eKidd, Kari. SSN:\n 444555555\u003c/div\u003e"}, - "identifier": [{"type": { - "coding": [{"system": "http://terminology.hl7.org/CodeSystem/v2-0203", "code": "SS", "display": "Social Security number"}]}, - "system": "http://hl7.org/fhir/sid/us-ssn", "value": "444555555"}], "active": True, - "name": [{"use": "official", "family": "Kidd", "given": ["Kari"]}], - "telecom": [{"system": "phone", "value": "555-555-2005", "use": "work"}], "gender": "female", - "address": [{"use": "home", "line": ["2222 Home Street"]}], - "managingOrganization": {"reference": "Organization/hl7"}} + patient_dict = { + "resourceType": "Patient", + "id": "3", + "meta": {"lastUpdated": "2012-05-29T23:45:32Z"}, + "text": { + "status": "generated", + "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003eKidd, Kari. SSN:\n 444555555\u003c/div\u003e', + }, + "identifier": [ + { + "type": { + "coding": [ + { + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "code": "SS", + "display": "Social Security number", + } + ] + }, + "system": "http://hl7.org/fhir/sid/us-ssn", + "value": "444555555", + } + ], + "active": True, + "name": [{"use": "official", "family": "Kidd", "given": ["Kari"]}], + "telecom": [{"system": "phone", "value": "555-555-2005", "use": "work"}], + "gender": "female", + "address": [{"use": "home", "line": ["2222 Home Street"]}], + "managingOrganization": {"reference": "Organization/hl7"}, + } yield patient_dict @@ -52,38 +73,42 @@ def specimen_dict(): "id": "denovo-3", "text": { "status": "generated", - "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative\u003c/b\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource \u0026quot;denovo-3\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003eidentifier\u003c/b\u003e: id: 3\u003c/p\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: available\u003c/p\u003e\u003cp\u003e\u003cb\u003etype\u003c/b\u003e: Venous blood specimen \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"https://browser.ihtsdotools.org/\"\u003eSNOMED CT\u003c/a\u003e#122555007)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"Patient-denovoFather.html\"\u003ePatient/denovoFather: John Doe\u003c/a\u003e \u0026quot; DOE\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003ereceivedTime\u003c/b\u003e: 2021-01-01 01:01:01+0000\u003c/p\u003e\u003cp\u003e\u003cb\u003erequest\u003c/b\u003e: \u003ca href\u003d\"ServiceRequest-genomicServiceRequest.html\"\u003eServiceRequest/genomicServiceRequest\u003c/a\u003e\u003c/p\u003e\u003ch3\u003eCollections\u003c/h3\u003e\u003ctable class\u003d\"grid\"\u003e\u003ctr\u003e\u003ctd\u003e-\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollector\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollected[x]\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eQuantity\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eMethod\u003c/b\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd\u003e*\u003c/td\u003e\u003ctd\u003e\u003ca href\u003d\"Practitioner-practitioner01.html\"\u003ePractitioner/practitioner01\u003c/a\u003e \u0026quot; DOEL\u0026quot;\u003c/td\u003e\u003ctd\u003e2021-01-01 01:01:00+0000\u003c/td\u003e\u003ctd\u003e1 mL\u003c/td\u003e\u003ctd\u003eLine, Venous \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"http://terminology.hl7.org/3.1.0/CodeSystem-v2-0488.html\"\u003especimenCollectionMethod\u003c/a\u003e#LNV)\u003c/span\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/table\u003e\u003c/div\u003e"}, + "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative\u003c/b\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource \u0026quot;denovo-3\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003eidentifier\u003c/b\u003e: id: 3\u003c/p\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: available\u003c/p\u003e\u003cp\u003e\u003cb\u003etype\u003c/b\u003e: Venous blood specimen \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"https://browser.ihtsdotools.org/"\u003eSNOMED CT\u003c/a\u003e#122555007)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"Patient-denovoFather.html"\u003ePatient/denovoFather: John Doe\u003c/a\u003e \u0026quot; DOE\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003ereceivedTime\u003c/b\u003e: 2021-01-01 01:01:01+0000\u003c/p\u003e\u003cp\u003e\u003cb\u003erequest\u003c/b\u003e: \u003ca href\u003d"ServiceRequest-genomicServiceRequest.html"\u003eServiceRequest/genomicServiceRequest\u003c/a\u003e\u003c/p\u003e\u003ch3\u003eCollections\u003c/h3\u003e\u003ctable class\u003d"grid"\u003e\u003ctr\u003e\u003ctd\u003e-\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollector\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollected[x]\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eQuantity\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eMethod\u003c/b\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd\u003e*\u003c/td\u003e\u003ctd\u003e\u003ca href\u003d"Practitioner-practitioner01.html"\u003ePractitioner/practitioner01\u003c/a\u003e \u0026quot; DOEL\u0026quot;\u003c/td\u003e\u003ctd\u003e2021-01-01 01:01:00+0000\u003c/td\u003e\u003ctd\u003e1 mL\u003c/td\u003e\u003ctd\u003eLine, Venous \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"http://terminology.hl7.org/3.1.0/CodeSystem-v2-0488.html"\u003especimenCollectionMethod\u003c/a\u003e#LNV)\u003c/span\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/table\u003e\u003c/div\u003e', + }, "identifier": [ { "system": "http://www.somesystemabc.net/identifiers/specimens", - "value": "3"}], + "value": "3", + } + ], "status": "available", "type": { "coding": [ { "system": "http://snomed.info/sct", "code": "122555007", - "display": "Venous blood specimen"}]}, - "subject": { - "reference": "Patient/denovoFather", - "display": "John Doe"}, + "display": "Venous blood specimen", + } + ] + }, + "subject": {"reference": "Patient/denovoFather", "display": "John Doe"}, "receivedTime": "2021-01-01T01:01:01Z", - "request": [ - { - "reference": "ServiceRequest/genomicServiceRequest"}], + "request": [{"reference": "ServiceRequest/genomicServiceRequest"}], "collection": { - "collector": { - "reference": "Practitioner/practitioner01"}, + "collector": {"reference": "Practitioner/practitioner01"}, "collectedDateTime": "2021-01-01T01:01:00Z", - "quantity": { - "value": 1, - "unit": "mL"}, + "quantity": {"value": 1, "unit": "mL"}, "method": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/v2-0488", "code": "LNV", - "display": "Line, Venous"}]}}} + "display": "Line, Venous", + } + ] + }, + }, + } @pytest.fixture @@ -93,14 +118,14 @@ def observation_eye_color_dict(): "id": "eye-color", "text": { "status": "generated", - "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d\"eye-color\"\u003e \u003c/a\u003e\u003ca name\u003d\"hceye-color\"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource Observation \u0026quot;eye-color\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: final\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: eye color \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e ()\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"patient-example.html\"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 2016-05-18\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: blue\u003c/p\u003e\u003c/div\u003e"}, + "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d"eye-color"\u003e \u003c/a\u003e\u003ca name\u003d"hceye-color"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource Observation \u0026quot;eye-color\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: final\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: eye color \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e ()\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"patient-example.html"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 2016-05-18\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: blue\u003c/p\u003e\u003c/div\u003e', + }, "status": "final", - "code": { - "text": "eye color"}, - "subject": { - "reference": "Patient/example"}, + "code": {"text": "eye color"}, + "subject": {"reference": "Patient/example"}, "effectiveDateTime": "2016-05-18", - "valueString": "blue"} + "valueString": "blue", + } @pytest.fixture @@ -110,7 +135,8 @@ def observation_bmi_dict(): "id": "bmi-using-related", "text": { "status": "generated", - "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d\"bmi-using-related\"\u003e \u003c/a\u003e\u003ca name\u003d\"hcbmi-using-related\"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource Observation \u0026quot;bmi-using-related\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: \u003cspan title\u003d\" \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d need to fix vitals to removed fixed value \u0027has-member\u0027 \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\t\u0026lt;meta\u0026gt;\n\t\t\u0026lt;profile value\u003d\u0026quot;http://hl7.org/fhir/StructureDefinition/vitalsigns\u0026quot;/\u0026gt;\n\t\u0026lt;/meta\u0026gt;\n \"\u003efinal\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecategory\u003c/b\u003e: Vital Signs \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"http://terminology.hl7.org/5.5.0/CodeSystem-observation-category.html\"\u003eObservation Category Codes\u003c/a\u003e#vital-signs)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: BMI \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"https://loinc.org/\"\u003eLOINC\u003c/a\u003e#39156-5 \u0026quot;Body mass index (BMI) [Ratio]\u0026quot;)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"patient-example.html\"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 1999-07-02\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: 16.2 kg/m2\u003cspan style\u003d\"background: LightGoldenRodYellow\"\u003e (Details: UCUM code kg/m2 \u003d \u0027kg/m2\u0027)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ederivedFrom\u003c/b\u003e: \u003c/p\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d\"broken-link.html\"\u003eObservation/bodyheight: Body Height\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d\"observation-example.html\"\u003eObservation/example: Body Weight\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e"}, + "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d"bmi-using-related"\u003e \u003c/a\u003e\u003ca name\u003d"hcbmi-using-related"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource Observation \u0026quot;bmi-using-related\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: \u003cspan title\u003d" \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d need to fix vitals to removed fixed value \u0027has-member\u0027 \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\t\u0026lt;meta\u0026gt;\n\t\t\u0026lt;profile value\u003d\u0026quot;http://hl7.org/fhir/StructureDefinition/vitalsigns\u0026quot;/\u0026gt;\n\t\u0026lt;/meta\u0026gt;\n "\u003efinal\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecategory\u003c/b\u003e: Vital Signs \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"http://terminology.hl7.org/5.5.0/CodeSystem-observation-category.html"\u003eObservation Category Codes\u003c/a\u003e#vital-signs)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: BMI \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"https://loinc.org/"\u003eLOINC\u003c/a\u003e#39156-5 \u0026quot;Body mass index (BMI) [Ratio]\u0026quot;)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"patient-example.html"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 1999-07-02\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: 16.2 kg/m2\u003cspan style\u003d"background: LightGoldenRodYellow"\u003e (Details: UCUM code kg/m2 \u003d \u0027kg/m2\u0027)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ederivedFrom\u003c/b\u003e: \u003c/p\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d"broken-link.html"\u003eObservation/bodyheight: Body Height\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d"observation-example.html"\u003eObservation/example: Body Weight\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e', + }, "status": "final", "category": [ { @@ -118,35 +144,41 @@ def observation_bmi_dict(): { "system": "http://terminology.hl7.org/CodeSystem/observation-category", "code": "vital-signs", - "display": "Vital Signs"}], - "text": "Vital Signs"}], + "display": "Vital Signs", + } + ], + "text": "Vital Signs", + } + ], "code": { "coding": [ { "system": "http://loinc.org", "code": "39156-5", - "display": "Body mass index (BMI) [Ratio]"}], - "text": "BMI"}, - "subject": { - "reference": "Patient/example"}, + "display": "Body mass index (BMI) [Ratio]", + } + ], + "text": "BMI", + }, + "subject": {"reference": "Patient/example"}, "effectiveDateTime": "1999-07-02", "valueQuantity": { "value": 16.2, "unit": "kg/m2", "system": "http://unitsofmeasure.org", - "code": "kg/m2"}, + "code": "kg/m2", + }, "derivedFrom": [ - { - "reference": "Observation/bodyheight", - "display": "Body Height"}, - { - "reference": "Observation/example", - "display": "Body Weight"}]} + {"reference": "Observation/bodyheight", "display": "Body Height"}, + {"reference": "Observation/example", "display": "Body Weight"}, + ], + } # flatteners ------------------------------------------------------------ # The following functions are used to flatten the FHIR resources. + def flatten_simple(self: DomainResource): """Convert the DomainResource instance to just an id.""" return self.id @@ -161,13 +193,17 @@ def _isodate(v): def flatten_scalars(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" - _ = {k: _isodate(v) for k, v in self.dict().items() if not isinstance(v, (list, dict))} + _ = { + k: _isodate(v) + for k, v in self.dict().items() + if not isinstance(v, (list, dict)) + } return _ def flatten_references(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" - fields = [_ for _ in self.__fields__.keys() if not _.endswith('__ext')] + fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")] _ = {} # if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference for k in fields: @@ -181,14 +217,16 @@ def flatten_references(self: DomainResource) -> dict: def flatten_identifier(self: Identifier) -> dict: """Convert the Identifier instance to a key value, use a simplified system as key.""" parsed_url = urlparse(self.system) - path_parts = parsed_url.path.split('/') # e.g. "http://hl7.org/fhir/sid/us-ssn" -> us-ssn - key = path_parts[-1] if path_parts else 'identifier' + path_parts = parsed_url.path.split( + "/" + ) # e.g. "http://hl7.org/fhir/sid/us-ssn" -> us-ssn + key = path_parts[-1] if path_parts else "identifier" return {key: self.value} def flatten_coding(self: Coding) -> dict: """Convert the DomainResource instance to a dictionary.""" - return {'display': self.display} + return {"display": self.display} def flatten_scalars_and_references(self: DomainResource) -> dict: @@ -213,12 +251,12 @@ def flatten_observation(self: Observation) -> dict: _ = flatten_scalars_references_identifiers(self) # normalize all the valueXXXXX to 'value' if self.valueQuantity: - _['value'] = f"{self.valueQuantity.value} {self.valueQuantity.unit}" + _["value"] = f"{self.valueQuantity.value} {self.valueQuantity.unit}" elif self.valueString: - _['value'] = self.valueString - del _['valueString'] + _["value"] = self.valueString + del _["valueString"] elif self.valueCodeableConcept: - _['value'] = self.valueCodeableConcept.text + _["value"] = self.valueCodeableConcept.text # there are many other value types, but we'll ignore them for now # see https://build.fhir.org/observation-definitions.html#Observation.value_x_ # Quantity|CodeableConcept|string|boolean|integer|Range|Ratio|SampledData|time|dateTime|Period|Attachment|Reference(MolecularSequence) @@ -229,6 +267,7 @@ def flatten_observation(self: Observation) -> dict: # patchers ------------------------------------------------------------ # The following fixtures are used to patch the DomainResource class to add the desired method. + @pytest.fixture def patched_domain_resource_simple() -> bool: """Patch the DomainResource class to add a flatten method.""" @@ -284,66 +323,126 @@ def patched_scalars_references_identifiers_observation() -> bool: # tests ------------------------------------------------------------ + def test_patient_without_flatten(patient_dict: dict): """This patient object should NOT have a 'flatten' method.""" # without path dependency, just have a plain patient object with no flatten method patient = Patient.parse_obj(patient_dict) - assert not hasattr(patient, 'flatten'), "Patient object should not have a 'flatten' method" + assert not hasattr( + patient, "flatten" + ), "Patient object should not have a 'flatten' method" def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict): """This patient object should have a 'flatten' method.""" patient = Patient.parse_obj(patient_dict) - assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method" - assert patient.flatten() == patient.id, f"Patient.flatten() should return {patient.id}" + assert hasattr( + patient, "flatten" + ), "Patient object does not have a 'flatten' method" + assert ( + patient.flatten() == patient.id + ), f"Patient.flatten() should return {patient.id}" def test_patient_with_scalars(patched_scalars: bool, patient_dict: dict): """This patient object should have a 'flatten' method that returns a dict of scalar values.""" patient = Patient.parse_obj(patient_dict) - assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method" - assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'resourceType': 'Patient'}, "Patient.flatten() should return a dict of all scalar values" - - -def test_patient_with_scalars_and_references(patched_scalars_and_references: bool, patient_dict: dict): + assert hasattr( + patient, "flatten" + ), "Patient object does not have a 'flatten' method" + assert patient.flatten() == { + "active": True, + "gender": "female", + "id": "3", + "resourceType": "Patient", + }, "Patient.flatten() should return a dict of all scalar values" + + +def test_patient_with_scalars_and_references( + patched_scalars_and_references: bool, patient_dict: dict +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" patient = Patient.parse_obj(patient_dict) - assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method" - assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'managingOrganization': 'Organization/hl7', 'resourceType': 'Patient'}, "Patient.flatten() should return a dict of all scalar values and references" - - -def test_patient_with_scalars_references_identifiers(patched_scalars_references_identifiers: bool, patient_dict: dict): + assert hasattr( + patient, "flatten" + ), "Patient object does not have a 'flatten' method" + assert patient.flatten() == { + "active": True, + "gender": "female", + "id": "3", + "managingOrganization": "Organization/hl7", + "resourceType": "Patient", + }, "Patient.flatten() should return a dict of all scalar values and references" + + +def test_patient_with_scalars_references_identifiers( + patched_scalars_references_identifiers: bool, patient_dict: dict +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" patient = Patient.parse_obj(patient_dict) - assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method" - assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'managingOrganization': 'Organization/hl7', 'resourceType': 'Patient', 'us-ssn': '444555555'}, "Patient.flatten() should return a dict of all scalar values and references" - - -def test_specimen_with_scalars_references_identifiers(patched_scalars_references_identifiers: bool, specimen_dict: dict): + assert hasattr( + patient, "flatten" + ), "Patient object does not have a 'flatten' method" + assert patient.flatten() == { + "active": True, + "gender": "female", + "id": "3", + "managingOrganization": "Organization/hl7", + "resourceType": "Patient", + "us-ssn": "444555555", + }, "Patient.flatten() should return a dict of all scalar values and references" + + +def test_specimen_with_scalars_references_identifiers( + patched_scalars_references_identifiers: bool, specimen_dict: dict +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" specimen = Specimen.parse_obj(specimen_dict) - assert hasattr(specimen, 'flatten'), "Specimen object does not have a 'flatten' method" - assert specimen.flatten() == {'resourceType': 'Specimen', 'id': 'denovo-3', 'status': 'available', - 'receivedTime': '2021-01-01T01:01:01+00:00', - 'subject': 'Patient/denovoFather', 'specimens': '3'} + assert hasattr( + specimen, "flatten" + ), "Specimen object does not have a 'flatten' method" + assert specimen.flatten() == { + "resourceType": "Specimen", + "id": "denovo-3", + "status": "available", + "receivedTime": "2021-01-01T01:01:01+00:00", + "subject": "Patient/denovoFather", + "specimens": "3", + } -def test_eye_color_observation(patched_scalars_references_identifiers_observation: bool, observation_eye_color_dict: dict): +def test_eye_color_observation( + patched_scalars_references_identifiers_observation: bool, + observation_eye_color_dict: dict, +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" observation = Observation.parse_obj(observation_eye_color_dict) - assert hasattr(observation, 'flatten'), "Observation object does not have a 'flatten' method" - assert observation.flatten() == {'resourceType': 'Observation', 'id': 'eye-color', 'status': 'final', - 'effectiveDateTime': '2016-05-18', 'value': 'blue', - 'subject': 'Patient/example'} + assert hasattr( + observation, "flatten" + ), "Observation object does not have a 'flatten' method" + assert observation.flatten() == { + "resourceType": "Observation", + "id": "eye-color", + "status": "final", + "effectiveDateTime": "2016-05-18", + "value": "blue", + "subject": "Patient/example", + } -def test_bmi_observation(patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict): +def test_bmi_observation( + patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" observation = Observation.parse_obj(observation_bmi_dict) - assert hasattr(observation, 'flatten'), "Observation object does not have a 'flatten' method" - assert observation.flatten() == {'effectiveDateTime': '1999-07-02', - 'id': 'bmi-using-related', - 'resourceType': 'Observation', - 'status': 'final', - 'subject': 'Patient/example', - 'value': '16.2 kg/m2'} + assert hasattr( + observation, "flatten" + ), "Observation object does not have a 'flatten' method" + assert observation.flatten() == { + "effectiveDateTime": "1999-07-02", + "id": "bmi-using-related", + "resourceType": "Observation", + "status": "final", + "subject": "Patient/example", + "value": "16.2 kg/m2", + } diff --git a/tests/unit/test_hash_types.py b/tests/unit/test_hash_types.py index ab51944c..68559e6f 100644 --- a/tests/unit/test_hash_types.py +++ b/tests/unit/test_hash_types.py @@ -5,20 +5,20 @@ from gen3_tracker.git import DVCItem VALID_HASHES = { - 'md5': 'acbd18db4cc2f85cedef654fccc4a4d8', - 'sha1': '2ef7bde608ce5404e97d5f042f95f89f1c232871', - 'sha256': '5bf8aa57fc5a6bc547decf1cc6db63f10deb55a3c6c5df497d631fb3d95e1abf', - 'sha512': '3ba2942ed1d05551d4360a2a7bb6298c2359061dc07b368949bd3fb7feca3344221257672d772ce456075b7cfa50fd7ce41eaefe529d056bf23dd665de668b78', - 'crc': '3e25960a', - 'etag': 'acbd18db4cc2f85cedef654fccc4a4d8-3' + "md5": "acbd18db4cc2f85cedef654fccc4a4d8", + "sha1": "2ef7bde608ce5404e97d5f042f95f89f1c232871", + "sha256": "5bf8aa57fc5a6bc547decf1cc6db63f10deb55a3c6c5df497d631fb3d95e1abf", + "sha512": "3ba2942ed1d05551d4360a2a7bb6298c2359061dc07b368949bd3fb7feca3344221257672d772ce456075b7cfa50fd7ce41eaefe529d056bf23dd665de668b78", + "crc": "3e25960a", + "etag": "acbd18db4cc2f85cedef654fccc4a4d8-3", } def test_invalid_hash_values(): """Test that invalid hash values raise a ValidationError.""" for hash_type in ACCEPTABLE_HASHES.keys(): - _ = dict(hash=hash_type, modified='2013-07-01T16:10-04:00', path='dddd', size=1) - _[hash_type] = 'foo' + _ = dict(hash=hash_type, modified="2013-07-01T16:10-04:00", path="dddd", size=1) + _[hash_type] = "foo" print(_) with pytest.raises(ValidationError): item = DVCItem(**_) @@ -28,7 +28,7 @@ def test_invalid_hash_values(): def test_valid_hash_values(): """Test that valid hash values do raise a ValidationError.""" for hash_type in VALID_HASHES.keys(): - _ = dict(hash=hash_type, modified='2013-07-01T16:10-04:00', path='dddd', size=1) + _ = dict(hash=hash_type, modified="2013-07-01T16:10-04:00", path="dddd", size=1) _[hash_type] = VALID_HASHES[hash_type] print(_) item = DVCItem(**_) diff --git a/tests/unit/test_read_dvc.py b/tests/unit/test_read_dvc.py index 7feef182..c9a5c231 100644 --- a/tests/unit/test_read_dvc.py +++ b/tests/unit/test_read_dvc.py @@ -3,15 +3,23 @@ def test_read_dvc(data_path: Path): - dvc = to_dvc(data_path / 'hello.txt.dvc') + dvc = to_dvc(data_path / "hello.txt.dvc") assert dvc assert dvc.outs - assert dvc.outs[0].path == 'my-project-data/hello.txt' + assert dvc.outs[0].path == "my-project-data/hello.txt" def test_read_dvc_item(): - _ = {'hash': 'md5', 'is_symlink': False, 'md5': 'b1946ac92492d2347c6235b4d2611184', 'mime': 'text/plain', 'modified': '2024-04-30T17:46:30.819143+00:00', - 'path': 'my-project-data/hello.txt', 'realpath': '/Users/walsbr/aced/g3t-git/attic/cbds-test39/my-project-data/hello.txt', 'size': 6} + _ = { + "hash": "md5", + "is_symlink": False, + "md5": "b1946ac92492d2347c6235b4d2611184", + "mime": "text/plain", + "modified": "2024-04-30T17:46:30.819143+00:00", + "path": "my-project-data/hello.txt", + "realpath": "/Users/walsbr/aced/g3t-git/attic/cbds-test39/my-project-data/hello.txt", + "size": 6, + } item = DVCItem(**_) assert item - assert item.hash == 'md5' + assert item.hash == "md5" From 2cb2feb138aff124c258f107ccfbfb420608a070 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 2 Dec 2024 13:00:20 -0800 Subject: [PATCH 03/18] ensure dataframer unit tests pass --- tests/unit/dataframer/test_dataframer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/dataframer/test_dataframer.py b/tests/unit/dataframer/test_dataframer.py index 87a51f22..422665f6 100644 --- a/tests/unit/dataframer/test_dataframer.py +++ b/tests/unit/dataframer/test_dataframer.py @@ -93,6 +93,7 @@ def simplified_resources( "category": "Laboratory", "sample_type": "Primary Solid Tumor", "library_id": "12345", + "observation_code": "sample type abc", "tissue_type": "Tumor", "treatments": "Trastuzumab", "allocated_for_site": "TEST Clinical Research", @@ -284,6 +285,7 @@ def specimen_row(simplified_resources, specimen_key): "tissue_type": "Tumor", "treatments": "Trastuzumab", "allocated_for_site": "TEST Clinical Research", + "observation_code": "sample type abc", "indexed_collection_date": "365", "biopsy_specimens": "specimenA, specimenB, specimenC", "biopsy_procedure_type": "Biopsy - Core", From 4a71cf1f57755610e3516e8e9b4abcfbda5e094e Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Mon, 2 Dec 2024 13:34:55 -0800 Subject: [PATCH 04/18] fix test --- tests/integration/test_end_to_end_workflow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 6237edab..d4c5f407 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( result = run( runner, ["push", "--skip_validate", "--overwrite"], - expected_exit_code=1, + expected_exit_code=0 , expected_files=[log_file_path], ) @@ -316,6 +316,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( with open(log_file_path, "r") as log_file: lines = log_file.readlines() str_lines = str(lines) + print("log lines: ", str_lines) for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: assert ( From 885bfce2e0828c37724fcd63f33a73ae74678a46 Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Tue, 3 Dec 2024 14:45:09 -0800 Subject: [PATCH 05/18] fix test to work with new output --- tests/integration/test_end_to_end_workflow.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index d4c5f407..911ffc2e 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( result = run( runner, ["push", "--skip_validate", "--overwrite"], - expected_exit_code=0 , + expected_exit_code=1 , expected_files=[log_file_path], ) @@ -316,8 +316,6 @@ def test_push_fails_with_invalid_doc_ref_creation_date( with open(log_file_path, "r") as log_file: lines = log_file.readlines() str_lines = str(lines) - print("log lines: ", str_lines) - for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: assert ( keyword in str_lines From 6d46e966914e362e94a1a24e00b1b91ff6ec2d2c Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 3 Dec 2024 14:52:18 -0800 Subject: [PATCH 06/18] fix spacing --- tests/integration/test_end_to_end_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 911ffc2e..5fdca0c0 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( result = run( runner, ["push", "--skip_validate", "--overwrite"], - expected_exit_code=1 , + expected_exit_code=1, expected_files=[log_file_path], ) From c6a9a4213a7133c116792a9a610387769f9b6613 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 2 Dec 2024 17:37:34 -0800 Subject: [PATCH 07/18] test improved validation --- tests/unit/test_validate_float_to_int.py | 123 +++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 tests/unit/test_validate_float_to_int.py diff --git a/tests/unit/test_validate_float_to_int.py b/tests/unit/test_validate_float_to_int.py new file mode 100644 index 00000000..d10fc83c --- /dev/null +++ b/tests/unit/test_validate_float_to_int.py @@ -0,0 +1,123 @@ +import pytest +from fhir.resources.observation import Observation +from fhir.resources.patient import Patient +from pydantic import ValidationError + + +def test_validate_observation(): + """Test validate observation.""" + false = False + observation_dict = { + "resourceType": "Observation", + "id": "9d11e26b-0307-5573-aee8-d145bdc259f3", + "status": "final", + "category": [ + { + "coding": [ + { + "system": "http://terminology.hl7.org/CodeSystem/observation-category", + "code": "laboratory", + "display": "Laboratory" + } + ] + } + ], + "code": { + "coding": [ + { + "system": "http://loinc.org", + "code": "81247-9", + "display": "Master HL7 genetic variant reporting panel" + } + ] + }, + "subject": { + "reference": "Patient/16244c6a-028a-5d8b-ac80-22e7b870544b" + }, + "specimen": { + "reference": "Specimen/f7f2ceb6-53f3-561a-960d-0c47700c14a2" + }, + "focus": [ + { + "reference": "Specimen/f7f2ceb6-53f3-561a-960d-0c47700c14a2" + } + ], + "effectiveDateTime": "2024-06-03T08:00:00+00:00", + "valueString": "Sequencing parameters", + "component": [ + { + "code": { + "coding": [ + { + "system": "https://cadsr.cancer.gov/sample_laboratory_observation", + "code": "weight", + "display": "weight" + } + ], + "text": "weight" + }, + "valueInteger": 32.9 + }, + { + "code": { + "coding": [ + { + "system": "https://cadsr.cancer.gov/sample_laboratory_observation", + "code": "is_ffpe", + "display": "is_ffpe" + } + ], + "text": "is_ffpe" + }, + "valueBoolean": false + }, + { + "code": { + "coding": [ + { + "system": "https://cadsr.cancer.gov/sample_laboratory_observation", + "code": "sample_type", + "display": "sample_type" + } + ], + "text": "sample_type" + }, + "valueString": "Solid Tissue Normal" + }, + { + "code": { + "coding": [ + { + "system": "https://cadsr.cancer.gov/sample_laboratory_observation", + "code": "updated_datetime", + "display": "updated_datetime" + } + ], + "text": "updated_datetime" + }, + "valueDateTime": "2018-09-06T17:41:51.247648-05:00" + } + ] + } + observation_dict['component'][0]['valueInteger'] = 32.0 + observation = Observation.model_validate(observation_dict) + + assert observation, "Should have accepted valueInteger: 32.0" + + observation_dict['component'][0]['valueInteger'] = 32.9 + + with pytest.raises(ValidationError): + Observation.model_validate(observation_dict) + + +def test_patient(): + with pytest.raises(ValidationError): + patient_dict = {"multipleBirthInteger": 32.9} + patient = Patient.model_validate(patient_dict) + assert patient.multipleBirthInteger == 32.9, "Should not have accepted multipleBirthInteger: 32.9" + + patient_dict = {"multipleBirthInteger": 32.0} + Patient.model_validate(patient_dict) + + patient_dict = {"multipleBirthInteger": 32} + Patient.model_validate(patient_dict) From e8dfca71bd9c08370f390b3d95bef8fc5fcc6756 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 2 Dec 2024 18:18:22 -0800 Subject: [PATCH 08/18] ensure monkey patches no longer necessary --- ...t_deprecated_pydantic_v1_monkey_patches.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 tests/unit/test_deprecated_pydantic_v1_monkey_patches.py diff --git a/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py b/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py new file mode 100644 index 00000000..895786b5 --- /dev/null +++ b/tests/unit/test_deprecated_pydantic_v1_monkey_patches.py @@ -0,0 +1,52 @@ +import pathlib +from typing import Annotated + +import pydantic +from fhir.resources.attachment import Attachment +from pydantic import UrlConstraints, AnyUrl + + +def test_validate_any_url(): + + class MyModel(pydantic.BaseModel): + url: Annotated[AnyUrl, UrlConstraints(host_required=False)] + + _ = MyModel(url='file:///foo/bar') + assert _, "file:///foo/bar is a valid file url" + assert _.url.host is None, "file:///foo/bar has no host" + + _ = MyModel(url='xxx:///XXXX') + assert _, "file:///foo/bar is a valid file url" + assert _.url.host is None, "file:///foo/bar has no host" + + +def test_fhir_url(): + """Previously a monkey patch was used to enable file urls. Any xs:anyURI is now allowed. See https://w3.org/TR/xmlschema-2/#anyURI + From https://hl7.org/fhir/datatypes.html#url (This regex is very permissive, but URIs must be valid. Implementers are welcome to use more specific regex statements for a URI in specific contexts)""" + attachment: Attachment = Attachment(url='file:///foo/bar') + assert attachment.validate_after_model_construction() + + attachment: Attachment = Attachment.model_validate({'url': 'file:///foo/bar'}) + assert attachment + + attachment: Attachment = Attachment.model_validate({'url': 'xxx:///XXXX'}) + assert attachment + + attachment: Attachment = Attachment.model_validate({'url': 'FOO BAR'}) + assert attachment + + +def test_path_encoders(): + """Previously a monkey patch was used to enable correct serialization of path objects""" + # eg + # # default initializers for path + # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.PosixPath] = str + # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.WindowsPath] = str + # pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.Path] = str + + class MyModel(pydantic.BaseModel): + path: pathlib.Path + + _ = MyModel(path=pathlib.Path('/foo/bar')) + assert _, "/foo/bar is a valid path" + _.model_dump()['path'] == '/foo/bar' From 46d58aa6f42e646bd7c3f53105a2eceeeeaa95fd Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 2 Dec 2024 18:19:20 -0800 Subject: [PATCH 09/18] datetimes w/out time now return XXXXT00:00:00 --- tests/unit/test_flatten_fhir_example.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py index 9b923d88..0ea020e5 100644 --- a/tests/unit/test_flatten_fhir_example.py +++ b/tests/unit/test_flatten_fhir_example.py @@ -195,7 +195,7 @@ def flatten_scalars(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" _ = { k: _isodate(v) - for k, v in self.dict().items() + for k, v in self.model_dump().items() if not isinstance(v, (list, dict)) } return _ @@ -203,7 +203,7 @@ def flatten_scalars(self: DomainResource) -> dict: def flatten_references(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" - fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")] + fields = [_ for _ in self.model_fields.keys() if not _.endswith("__ext")] _ = {} # if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference for k in fields: @@ -327,7 +327,7 @@ def patched_scalars_references_identifiers_observation() -> bool: def test_patient_without_flatten(patient_dict: dict): """This patient object should NOT have a 'flatten' method.""" # without path dependency, just have a plain patient object with no flatten method - patient = Patient.parse_obj(patient_dict) + patient = Patient.model_validate(patient_dict) assert not hasattr( patient, "flatten" ), "Patient object should not have a 'flatten' method" @@ -335,7 +335,7 @@ def test_patient_without_flatten(patient_dict: dict): def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict): """This patient object should have a 'flatten' method.""" - patient = Patient.parse_obj(patient_dict) + patient = Patient.model_validate(patient_dict) assert hasattr( patient, "flatten" ), "Patient object does not have a 'flatten' method" @@ -346,7 +346,7 @@ def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: def test_patient_with_scalars(patched_scalars: bool, patient_dict: dict): """This patient object should have a 'flatten' method that returns a dict of scalar values.""" - patient = Patient.parse_obj(patient_dict) + patient = Patient.model_validate(patient_dict) assert hasattr( patient, "flatten" ), "Patient object does not have a 'flatten' method" @@ -362,7 +362,7 @@ def test_patient_with_scalars_and_references( patched_scalars_and_references: bool, patient_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - patient = Patient.parse_obj(patient_dict) + patient = Patient.model_validate(patient_dict) assert hasattr( patient, "flatten" ), "Patient object does not have a 'flatten' method" @@ -379,7 +379,7 @@ def test_patient_with_scalars_references_identifiers( patched_scalars_references_identifiers: bool, patient_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - patient = Patient.parse_obj(patient_dict) + patient = Patient.model_validate(patient_dict) assert hasattr( patient, "flatten" ), "Patient object does not have a 'flatten' method" @@ -397,7 +397,7 @@ def test_specimen_with_scalars_references_identifiers( patched_scalars_references_identifiers: bool, specimen_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - specimen = Specimen.parse_obj(specimen_dict) + specimen = Specimen.model_validate(specimen_dict) assert hasattr( specimen, "flatten" ), "Specimen object does not have a 'flatten' method" @@ -416,7 +416,7 @@ def test_eye_color_observation( observation_eye_color_dict: dict, ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - observation = Observation.parse_obj(observation_eye_color_dict) + observation = Observation.model_validate(observation_eye_color_dict) assert hasattr( observation, "flatten" ), "Observation object does not have a 'flatten' method" @@ -424,7 +424,7 @@ def test_eye_color_observation( "resourceType": "Observation", "id": "eye-color", "status": "final", - "effectiveDateTime": "2016-05-18", + "effectiveDateTime": "2016-05-18T00:00:00", "value": "blue", "subject": "Patient/example", } @@ -434,12 +434,12 @@ def test_bmi_observation( patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - observation = Observation.parse_obj(observation_bmi_dict) + observation = Observation.model_validate(observation_bmi_dict) assert hasattr( observation, "flatten" ), "Observation object does not have a 'flatten' method" assert observation.flatten() == { - "effectiveDateTime": "1999-07-02", + "effectiveDateTime": "1999-07-02T00:00:00", "id": "bmi-using-related", "resourceType": "Observation", "status": "final", From 23b89786736f3e31ab913ed7452b51dc2d4bdc97 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 2 Dec 2024 18:21:51 -0800 Subject: [PATCH 10/18] fhir.resources==8 --- gen3_tracker/__init__.py | 35 +-------------------------------- gen3_tracker/common/__init__.py | 8 ++++---- gen3_tracker/git/cli.py | 2 +- gen3_tracker/meta/__init__.py | 12 +++++------ gen3_tracker/meta/skeleton.py | 17 ++++++++-------- gen3_tracker/meta/validator.py | 4 ++-- requirements.txt | 2 +- 7 files changed, 23 insertions(+), 57 deletions(-) diff --git a/gen3_tracker/__init__.py b/gen3_tracker/__init__.py index 4e860e2f..f3470b98 100644 --- a/gen3_tracker/__init__.py +++ b/gen3_tracker/__init__.py @@ -5,14 +5,12 @@ import typing import uuid from collections import OrderedDict -from typing import Union, Optional +from typing import Optional import click -import pydantic from click import Context, Command from pydantic import BaseModel, field_validator - ACED_NAMESPACE = uuid.uuid3(uuid.NAMESPACE_DNS, b'aced-idp.org') ENV_VARIABLE_PREFIX = 'G3T_' @@ -24,28 +22,6 @@ } -def monkey_patch_url_validate(): - # monkey patch to allow file: urls - import fhir.resources.fhirtypes - from pydantic import FileUrl - - original_url_validate = fhir.resources.fhirtypes.Url.validate - - @classmethod - def better_url_validate(cls, value: str, field: "ModelField", config: "BaseConfig") -> Union["AnyUrl", str]: # noqa - """Allow file: urls. see https://github.com/pydantic/pydantic/issues/1983 - bugfix: addresses issue introduced with `fhir.resources`==7.0.1 - """ - if value.startswith("file:"): - _ = FileUrl(value) - return value - # return FileUrl.validate(value, field, config) - value = original_url_validate(value, field, config) - return value - - fhir.resources.fhirtypes.Url.validate = better_url_validate - - class LogConfig(BaseModel): format: str """https://docs.python.org/3/library/logging.html#logging.Formatter""" @@ -177,12 +153,3 @@ def resolve_command( # os._exit(1) # noqa raise e - - -# main -monkey_patch_url_validate() - -# default initializers for path -pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.PosixPath] = str -pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.WindowsPath] = str -pydantic.v1.json.ENCODERS_BY_TYPE[pathlib.Path] = str diff --git a/gen3_tracker/common/__init__.py b/gen3_tracker/common/__init__.py index 5aec35a7..35bbf3db 100644 --- a/gen3_tracker/common/__init__.py +++ b/gen3_tracker/common/__init__.py @@ -321,7 +321,7 @@ def create_resource_id(resource, project_id) -> str: assert resource, "resource required" assert project_id, "project_id required" identifier_string = identifier_to_string(resource.identifier) - return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.resource_type}/{identifier_string}")) + return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource.get_resource_type()}/{identifier_string}")) def create_object_id(path: str, project_id: str) -> str: @@ -344,7 +344,7 @@ def assert_valid_id(resource, project_id): """Ensure that the id is correct.""" assert resource, "resource required" assert project_id, "project_id required" - if resource.resource_type == "DocumentReference": + if resource.get_resource_type() == "DocumentReference": document_reference: DocumentReference = resource official_identifier = document_reference.content[0].attachment.url recreate_id = create_object_id(official_identifier, project_id) @@ -354,7 +354,7 @@ def assert_valid_id(resource, project_id): recreate_id = create_resource_id(resource, project_id) if resource.id == recreate_id: return - msg = f"The current {resource.resource_type}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.resource_type}:{official_identifier}" + msg = f"The current {resource.get_resource_type()}.id {resource.id} does not equal the calculated one {recreate_id}, has the project id changed? current:{project_id} {resource.get_resource_type()}:{official_identifier}" raise Exception(msg) @@ -523,7 +523,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): elif hasattr(self.output.obj, 'model_dump'): _.update(self.output.obj.model_dump()) else: - _.update(self.output.obj.dict()) + _.update(self.output.obj.model_dump()) rc = self.output.exit_code if exc_type is not None: if isinstance(self.output.obj, dict): diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py index c0222bd8..bc932d77 100644 --- a/gen3_tracker/git/cli.py +++ b/gen3_tracker/git/cli.py @@ -677,7 +677,7 @@ def push( bundle.entry.append(bundle_entry) headers = {"Authorization": f"{auth._access_token}"} - bundle_dict = bundle.dict() + bundle_dict = bundle.model_dump() with Halo( text="Sending to FHIR Server", spinner="line", diff --git a/gen3_tracker/meta/__init__.py b/gen3_tracker/meta/__init__.py index 963d5abf..256dfcc5 100644 --- a/gen3_tracker/meta/__init__.py +++ b/gen3_tracker/meta/__init__.py @@ -47,10 +47,10 @@ def parse_obj(resource: dict, validate=True) -> ParseResult: try: assert 'resourceType' in resource, "Dict missing `resourceType`, is it a FHIR dict?" klass = FHIR_CLASSES.get_fhir_model_class(resource['resourceType']) - _ = klass.parse_obj(resource) + _ = klass.model_validate(resource) if validate: # trigger object traversal, see monkey patch below, at bottom of file - _.dict() + _.model_dump() return ParseResult(resource=_, exception=None, path=None, resource_id=_.id) except (ValidationError, AssertionError) as e: return ParseResult(resource=None, exception=e, path=None, resource_id=resource.get('id', None)) @@ -68,11 +68,11 @@ def _entry_iterator(parse_result: ParseResult) -> Iterator[ParseResult]: if _ is None: break if hasattr(_, 'resource') and _.resource: # BundleEntry - yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.dict()) + yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.model_dump()) elif hasattr(_, 'item'): # ListEntry - yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict()) + yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump()) else: - yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict()) + yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.model_dump()) offset += 1 pass @@ -81,7 +81,7 @@ def _has_entries(_: ParseResult): """FHIR types Bundles List have entries""" if _.resource is None: return False - return _.resource.resource_type in ["List"] and _.resource.entry is not None + return _.resource.get_resource_type() in ["List"] and _.resource.entry is not None def directory_reader(directory_path: str, diff --git a/gen3_tracker/meta/skeleton.py b/gen3_tracker/meta/skeleton.py index 2e5df9ff..e3531885 100644 --- a/gen3_tracker/meta/skeleton.py +++ b/gen3_tracker/meta/skeleton.py @@ -7,8 +7,7 @@ import orjson from fhir.resources.attachment import Attachment from fhir.resources.bundle import Bundle, BundleEntry, BundleEntryRequest -from fhir.resources.documentreference import DocumentReference -from fhir.resources.fhirtypes import DocumentReferenceContentType +from fhir.resources.documentreference import DocumentReference, DocumentReferenceContent from fhir.resources.identifier import Identifier from fhir.resources.observation import Observation from fhir.resources.operationoutcome import OperationOutcome @@ -74,7 +73,7 @@ def get_data_from_meta() -> Generator[int, None, None]: def update_document_reference(document_reference: DocumentReference, dvc_data: DVC): """Update document reference with index record.""" - assert document_reference.resource_type == 'DocumentReference' + assert document_reference.get_resource_type() == 'DocumentReference' assert dvc_data.out.object_id == document_reference.id, f"{dvc_data['did']} != {document_reference.id}" assert dvc_data.out.modified, f"dvc_data missing modified: {dvc_data}" document_reference.docStatus = 'final' @@ -108,7 +107,7 @@ def update_document_reference(document_reference: DocumentReference, dvc_data: D attachment.title = pathlib.Path(dvc_data.out.path).name attachment.creation = dvc_data.out.modified - content = DocumentReferenceContentType(attachment=attachment) + content = DocumentReferenceContent(attachment=attachment) document_reference.content = [content] @@ -289,10 +288,10 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]: for _ in dvc_data(dvc_files): resources = create_skeleton(_, project_id, meta_index()) for resource in resources: - key = f"{resource.resource_type}/{resource.id}" + key = f"{resource.get_resource_type()}/{resource.id}" if key not in emitted_already: - emitter.emit(resource.resource_type).write( - resource.json(option=orjson.OPT_APPEND_NEWLINE) + emitter.emit(resource.get_resource_type()).write( + resource.model_dump_json() + '\n' ) emitted_already.append(key) @@ -317,8 +316,8 @@ def update_meta_files(dry_run=False, project_id=None) -> list[str]: bundle.entry.append(bundle_entry) with EmitterContextManager('META') as emitter: - emitter.emit(bundle.resource_type, file_mode='a').write( - bundle.json(option=orjson.OPT_APPEND_NEWLINE) + emitter.emit(bundle.get_resource_type(), file_mode='a').write( + bundle.model_dump_json() + '\n' ) after_meta_files = [_ for _ in pathlib.Path('META').glob('*.ndjson')] diff --git a/gen3_tracker/meta/validator.py b/gen3_tracker/meta/validator.py index 6630c196..941e2d3f 100644 --- a/gen3_tracker/meta/validator.py +++ b/gen3_tracker/meta/validator.py @@ -98,14 +98,14 @@ def validate(directory_path: pathlib.Path, project_id=None) -> ValidateDirectory continue _ = parse_result.resource - ids.append(f"{_.resource_type}/{_.id}") + ids.append(f"{_.get_resource_type()}/{_.id}") nested_references = nested_lookup('reference', parse_result.json_obj) # https://www.hl7.org/fhir/medicationrequest-definitions.html#MedicationRequest.medication # is a reference to a Medication resource https://www.hl7.org/fhir/references.html#CodeableReference # so it has a reference.reference form, strip it out nested_references = [_ for _ in nested_references if isinstance(_, str)] references.extend(nested_references) - resources[parse_result.resource.resource_type] += 1 + resources[parse_result.resource.get_resource_type()] += 1 # assert references exist references = set(references) diff --git a/requirements.txt b/requirements.txt index 7005d471..ae767ae9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ halo tqdm deepdiff -fhir.resources==7.1.0 # FHIR Model +fhir.resources==8.0.0b4 # FHIR Model Pre-release orjson nested_lookup From 54d29553b118f8554d35ffcd43eba532b7f336f8 Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Wed, 4 Dec 2024 11:14:24 -0800 Subject: [PATCH 11/18] Initial checkin med admin tab --- gen3_tracker/meta/cli.py | 2 +- gen3_tracker/meta/dataframer.py | 24 ++++++++++++++++++++++++ gen3_tracker/meta/entities.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/gen3_tracker/meta/cli.py b/gen3_tracker/meta/cli.py index 3dd37bfd..9462c7c3 100644 --- a/gen3_tracker/meta/cli.py +++ b/gen3_tracker/meta/cli.py @@ -103,7 +103,7 @@ def render_graph(config: Config, directory_path: str, output_path: str, browser: @meta.command("dataframe") @click.argument('data_type', required=True, - type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject']), + type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject', "MedicationAdministration"]), default=None) @click.argument("directory_path", type=click.Path(exists=True, file_okay=False), diff --git a/gen3_tracker/meta/dataframer.py b/gen3_tracker/meta/dataframer.py index c7d1cd3f..1cd2129d 100644 --- a/gen3_tracker/meta/dataframer.py +++ b/gen3_tracker/meta/dataframer.py @@ -504,6 +504,28 @@ def flattened_research_subjects(self) -> Generator[dict, None, None]: yield flat_research_subject + + def flattened_medication_administrations(self) -> Generator[dict, None, None]: + + # get all MedicationAdministrations + cursor = self.connect() + cursor.execute( + "SELECT * FROM resources where resource_type = ?", ("MedicationAdministration",) + ) + + # get research subject and associated .subject patient + for _, _, raw_medication_administration in cursor.fetchall(): + medication_administration = json.loads(raw_medication_administration) + flat_medication_administration = SimplifiedResource.build( + resource=medication_administration + ).simplified + + patient = get_subject(self, medication_administration) + flat_medication_administration.update(patient) + + yield flat_medication_administration + + def flattened_document_references(self) -> Generator[dict, None, None]: """generator that yields document references populated with DocumentReference.subject fields and Observation codes through Observation.focus @@ -607,6 +629,8 @@ def create_dataframe( df = pd.DataFrame(db.flattened_document_references()) elif data_type == "ResearchSubject": df = pd.DataFrame(db.flattened_research_subjects()) + elif data_type == "MedicationAdministration": + df = pd.DataFrame(db.flattened_medication_administrations()) elif data_type == "Specimen": df = pd.DataFrame(db.flattened_specimens()) else: diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index c6beda14..16efe955 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -405,6 +405,35 @@ def values(self) -> dict: return _values +class SimplifiedMedicationAdministration(SimplifiedFHIR): + @computed_field + @property + def values(self) -> dict: + """Return a dictionary of 'value':value.""" + _values = super().values + # Plucking out fields that didn't get picked up by default class simplifier.' + dose_value = self.resource.get("dosage", {}).get("dose", {}).get("value", None) + if dose_value: + _values["total_dosage"] = dose_value + occurenceTiming = self.resource.get("occurenceTiming", {}).get("repeat", {}).get("boundsRange") + if occurenceTiming: + low = occurenceTiming.get("low", {}).get("value") + _values["index_date_start_days"] = low if low else None + high = occurenceTiming.get("high", {}).get("value") + _values["index_date_end_days"] = high if high else None + for notes in self.resource.get("note", []): + note = notes.get("value", None) + if note: + # Probably best to concat notes together + _values["notes"] = _values["notes"] + "; " + note + for identifier in self.resource.get("identifier", []): + system = identifier.get("system", None) + if system: + if system.split("/")[-1] == "regimen": + _values["regimen_id"] = identifier["value"] + return _values + + class SimplifiedCondition(SimplifiedFHIR): @computed_field @property @@ -440,4 +469,6 @@ def build(resource: dict) -> SimplifiedFHIR: return SimplifiedDocumentReference(resource=resource) if resource_type == "Condition": return SimplifiedCondition(resource=resource) + if resource_type == "MedicationAdministration": + return SimplifiedMedicationAdministration(resource=resource) return SimplifiedFHIR(resource=resource) From 0a1cc17cbe9e243f31c119a2b8fd1b454a8c8f26 Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Thu, 5 Dec 2024 08:47:23 -0800 Subject: [PATCH 12/18] fix identifier to avoid hyphen in column name --- gen3_tracker/meta/entities.py | 13 ++++++------- setup.py | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index 16efe955..493ff604 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -286,13 +286,12 @@ def identifiers(self) -> dict: elif identifiers_len == 1: return {"identifier": identifiers[0].get("value")} else: - base_identifier = {"identifier": identifiers[0].get("value")} - base_identifier.update( - { - identifier.get("system").split("/")[-1]: identifier.get("value") - for identifier in identifiers[1:] - } - ) + # Todo: Raise an execption if there are multiple identifiers with a "-" in them + base_identifier = { + "identifier" if "-" in identifier.get("system", "").split("/")[-1] + else identifier.get("system").split("/")[-1]: identifier.get("value") + for identifier in identifiers + } return base_identifier @computed_field diff --git a/setup.py b/setup.py index 1daa321f..3c3b3de1 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc2', + version='0.0.7rc4', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown', From ebc8dd4fd0ce48d1bc937c49a469701b2df6d2af Mon Sep 17 00:00:00 2001 From: Quinn Wai Wong <54592956+quinnwai@users.noreply.github.com> Date: Wed, 4 Dec 2024 12:23:40 -0800 Subject: [PATCH 13/18] Release/0.0.7rc2 (#107) * update pytest and version number * linting * ensure dataframer unit tests pass * fix test * fix test to work with new output * fix spacing --------- Co-authored-by: matthewpeterkort --- gen3_tracker/git/cli.py | 2 +- gen3_tracker/meta/entities.py | 1 + tests/unit/test_flatten_fhir_example.py | 35 ++++++++++++++++--------- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py index bc932d77..c0222bd8 100644 --- a/gen3_tracker/git/cli.py +++ b/gen3_tracker/git/cli.py @@ -677,7 +677,7 @@ def push( bundle.entry.append(bundle_entry) headers = {"Authorization": f"{auth._access_token}"} - bundle_dict = bundle.model_dump() + bundle_dict = bundle.dict() with Halo( text="Sending to FHIR Server", spinner="line", diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index 493ff604..9941a4de 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -292,6 +292,7 @@ def identifiers(self) -> dict: else identifier.get("system").split("/")[-1]: identifier.get("value") for identifier in identifiers } + return base_identifier @computed_field diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py index 0ea020e5..983f008c 100644 --- a/tests/unit/test_flatten_fhir_example.py +++ b/tests/unit/test_flatten_fhir_example.py @@ -195,7 +195,7 @@ def flatten_scalars(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" _ = { k: _isodate(v) - for k, v in self.model_dump().items() + for k, v in self.dict().items() if not isinstance(v, (list, dict)) } return _ @@ -203,7 +203,8 @@ def flatten_scalars(self: DomainResource) -> dict: def flatten_references(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" - fields = [_ for _ in self.model_fields.keys() if not _.endswith("__ext")] + + fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")] _ = {} # if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference for k in fields: @@ -327,7 +328,8 @@ def patched_scalars_references_identifiers_observation() -> bool: def test_patient_without_flatten(patient_dict: dict): """This patient object should NOT have a 'flatten' method.""" # without path dependency, just have a plain patient object with no flatten method - patient = Patient.model_validate(patient_dict) + + patient = Patient.parse_obj(patient_dict) assert not hasattr( patient, "flatten" ), "Patient object should not have a 'flatten' method" @@ -335,7 +337,8 @@ def test_patient_without_flatten(patient_dict: dict): def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict): """This patient object should have a 'flatten' method.""" - patient = Patient.model_validate(patient_dict) + + patient = Patient.parse_obj(patient_dict) assert hasattr( patient, "flatten" ), "Patient object does not have a 'flatten' method" @@ -346,7 +349,8 @@ def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: def test_patient_with_scalars(patched_scalars: bool, patient_dict: dict): """This patient object should have a 'flatten' method that returns a dict of scalar values.""" - patient = Patient.model_validate(patient_dict) + + patient = Patient.parse_obj(patient_dict) assert hasattr( patient, "flatten" ), "Patient object does not have a 'flatten' method" @@ -362,7 +366,8 @@ def test_patient_with_scalars_and_references( patched_scalars_and_references: bool, patient_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - patient = Patient.model_validate(patient_dict) + + patient = Patient.parse_obj(patient_dict) assert hasattr( patient, "flatten" ), "Patient object does not have a 'flatten' method" @@ -379,7 +384,8 @@ def test_patient_with_scalars_references_identifiers( patched_scalars_references_identifiers: bool, patient_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - patient = Patient.model_validate(patient_dict) + + patient = Patient.parse_obj(patient_dict) assert hasattr( patient, "flatten" ), "Patient object does not have a 'flatten' method" @@ -397,7 +403,8 @@ def test_specimen_with_scalars_references_identifiers( patched_scalars_references_identifiers: bool, specimen_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - specimen = Specimen.model_validate(specimen_dict) + + specimen = Specimen.parse_obj(specimen_dict) assert hasattr( specimen, "flatten" ), "Specimen object does not have a 'flatten' method" @@ -416,7 +423,8 @@ def test_eye_color_observation( observation_eye_color_dict: dict, ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - observation = Observation.model_validate(observation_eye_color_dict) + + observation = Observation.parse_obj(observation_eye_color_dict) assert hasattr( observation, "flatten" ), "Observation object does not have a 'flatten' method" @@ -424,7 +432,8 @@ def test_eye_color_observation( "resourceType": "Observation", "id": "eye-color", "status": "final", - "effectiveDateTime": "2016-05-18T00:00:00", + + "effectiveDateTime": "2016-05-18", "value": "blue", "subject": "Patient/example", } @@ -434,12 +443,14 @@ def test_bmi_observation( patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict ): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" - observation = Observation.model_validate(observation_bmi_dict) + + observation = Observation.parse_obj(observation_bmi_dict) assert hasattr( observation, "flatten" ), "Observation object does not have a 'flatten' method" assert observation.flatten() == { - "effectiveDateTime": "1999-07-02T00:00:00", + + "effectiveDateTime": "1999-07-02", "id": "bmi-using-related", "resourceType": "Observation", "status": "final", From d41e50ff0526829f04735626e9d744b0f0a8515b Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 2 Dec 2024 11:32:27 -0800 Subject: [PATCH 14/18] update pytest and version number --- tests/integration/test_end_to_end_workflow.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 5fdca0c0..e1f99bf4 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -300,12 +300,11 @@ def test_push_fails_with_invalid_doc_ref_creation_date( log_file_path = "logs/publish.log" os.chdir(new_project_dir) run(runner, ["init", project_id, "--approve"]) - result = run( - runner, - ["push", "--skip_validate", "--overwrite"], - expected_exit_code=1, - expected_files=[log_file_path], - ) + result = run(runner, + ["push", "--skip_validate", "--overwrite"], + expected_exit_code=1, + expected_files=[log_file_path] + ) # ensure push has useful useful error logs assert ( @@ -316,10 +315,9 @@ def test_push_fails_with_invalid_doc_ref_creation_date( with open(log_file_path, "r") as log_file: lines = log_file.readlines() str_lines = str(lines) + for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: - assert ( - keyword in str_lines - ), f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' + assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' def test_push_fails_with_no_write_permissions( From 2af91112f03d4216d7ce2ecf9d6241fe5b63a25c Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 2 Dec 2024 11:36:29 -0800 Subject: [PATCH 15/18] linting --- tests/integration/test_end_to_end_workflow.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index e1f99bf4..6237edab 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -300,11 +300,12 @@ def test_push_fails_with_invalid_doc_ref_creation_date( log_file_path = "logs/publish.log" os.chdir(new_project_dir) run(runner, ["init", project_id, "--approve"]) - result = run(runner, - ["push", "--skip_validate", "--overwrite"], - expected_exit_code=1, - expected_files=[log_file_path] - ) + result = run( + runner, + ["push", "--skip_validate", "--overwrite"], + expected_exit_code=1, + expected_files=[log_file_path], + ) # ensure push has useful useful error logs assert ( @@ -317,7 +318,9 @@ def test_push_fails_with_invalid_doc_ref_creation_date( str_lines = str(lines) for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: - assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' + assert ( + keyword in str_lines + ), f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' def test_push_fails_with_no_write_permissions( From 7b4627ab55e15b447b043f4adcae2eb33c980828 Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Tue, 21 Jan 2025 09:18:39 -0800 Subject: [PATCH 16/18] hardcode a solution fun --- gen3_tracker/meta/entities.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index 9941a4de..9c1ed456 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -219,7 +219,10 @@ def _populate_simplified_extension(extension: dict): resource = self.resource for _ in resource.get("extension", [resource]): - if "extension" not in _.keys(): + # special case data looks like this skip it, no extension to extract + if set(_.keys()) == {"url", "size", "hash", "title"}: + continue + elif "extension" not in _.keys(): if "resourceType" not in _.keys(): _populate_simplified_extension(_) continue From 2ce278b2da1d104326550fdc20939511d74cd987 Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Tue, 21 Jan 2025 16:12:19 -0800 Subject: [PATCH 17/18] revert fhir.resources version --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index ae767ae9..ef36c0f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ halo tqdm deepdiff -fhir.resources==8.0.0b4 # FHIR Model Pre-release +fhir.resources==7.1.0 # FHIR Model Pre-release orjson nested_lookup diff --git a/setup.py b/setup.py index 3c3b3de1..c2482b8d 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc4', + version='0.0.7rc5', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown', From eda70f54135487c031224bcfde3712155edec8f0 Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Wed, 22 Jan 2025 12:21:21 -0800 Subject: [PATCH 18/18] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c2482b8d..47573991 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc5', + version='0.0.7rc6', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown',