From 657c724454fec838f2f3ca3a13c243398c922cf9 Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Fri, 19 Sep 2025 08:36:29 -0700 Subject: [PATCH 1/7] add prefixes to ensure unique col names --- gen3_tracker/meta/dataframer.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/gen3_tracker/meta/dataframer.py b/gen3_tracker/meta/dataframer.py index 8ad47d6a..2ef94d7d 100644 --- a/gen3_tracker/meta/dataframer.py +++ b/gen3_tracker/meta/dataframer.py @@ -682,21 +682,23 @@ def create_dataframe( f"Dataframe is empty, are there any {data_type} resources?" ) - front_column_names = [] - if "identifier" in df.columns: - front_column_names += ["identifier"] - if "resourceType" in df.columns: + prefix = inflection.underscore(data_type) + df = df.rename(columns={col: f"{prefix}_{col}" for col in df.columns}) - front_column_names += ["resourceType"] - if "patient" in df.columns: - front_column_names = front_column_names + ["patient"] + front_column_names = [] + if f"{prefix}_identifier" in df.columns: + front_column_names += [f"{prefix}_identifier"] + if f"{prefix}_resourceType" in df.columns: + front_column_names += [f"{prefix}_resourceType"] + if f"{prefix}_patient" in df.columns: + front_column_names = front_column_names + [f"{prefix}_patient"] remaining_columns = [col for col in df.columns if col not in front_column_names] rear_column_names = [ - "id" + f"{prefix}_id" ] # removed status for the purpose of not needing it for the demo - if "subject" in df.columns: - rear_column_names = rear_column_names + ["subject"] + if f"{prefix}_subject" in df.columns: + rear_column_names = rear_column_names + [f"{prefix}_subject"] for c in df.columns: if c.endswith("_identifier"): rear_column_names.append(c) From f919e5e79dc8b9e52f53f15fe67e0fe2d6c841ec Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Thu, 25 Sep 2025 16:26:44 -0700 Subject: [PATCH 2/7] update test, use data-client --- gen3_tracker/gen3/jobs.py | 2 +- gen3_tracker/git/__init__.py | 4 ++-- gen3_tracker/git/cli.py | 4 ++-- setup.py | 2 +- tests/integration/__init__.py | 8 ++++---- tests/integration/test_end_to_end_workflow.py | 6 ++++++ 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/gen3_tracker/gen3/jobs.py b/gen3_tracker/gen3/jobs.py index 54fb53cb..53b7799c 100644 --- a/gen3_tracker/gen3/jobs.py +++ b/gen3_tracker/gen3/jobs.py @@ -104,7 +104,7 @@ def cp( # print(document, file=sys.stderr) run_command( - f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}", + f"data-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}", no_capture=False, ) diff --git a/gen3_tracker/git/__init__.py b/gen3_tracker/git/__init__.py index 308c88b0..c8616729 100644 --- a/gen3_tracker/git/__init__.py +++ b/gen3_tracker/git/__init__.py @@ -610,11 +610,11 @@ def commit(self, dry_run=False, profile=None, upload_path=None, bucket_name=None with open(self.manifest_file_path, 'w') as f: json.dump(self.manifest, f) if len(self.manifest) > 0: - cmd = f"gen3-client upload-multiple --manifest {self.manifest_file_path} --profile {profile} --upload-path {upload_path} --bucket {bucket_name} --numparallel {worker_count}" + cmd = f"data-client upload-multiple --manifest {self.manifest_file_path} --profile {profile} --upload-path {upload_path} --bucket {bucket_name} --numparallel {worker_count}" print(cmd) run_command(cmd, dry_run=dry_run, raise_on_err=True, no_capture=True) else: - print(f'No files to upload to {self.remote} by gen3-client.') + print(f'No files to upload to {self.remote} by data-client.') return 'OK' diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py index 518bd67b..7bf511c1 100644 --- a/gen3_tracker/git/cli.py +++ b/gen3_tracker/git/cli.py @@ -876,7 +876,7 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool): ) with open(manifest_file, "w") as fp: json.dump(object_ids, fp) - cmd = f"gen3-client download-multiple --no-prompt --profile {config.gen3.profile} --manifest {manifest_file} --numparallel {worker_count}" + cmd = f"data-client download-multiple --no-prompt --profile {config.gen3.profile} --manifest {manifest_file} --numparallel {worker_count}" print(cmd) run_command(cmd, no_capture=True) elif remote == "s3": @@ -1210,7 +1210,7 @@ def ping(config: Config): with CLIOutput(config=config) as output: msgs = [] ok = True - cmd = "gen3-client --version".split() + cmd = "data-client --version".split() gen3_client_installed = subprocess.run(cmd, capture_output=True) if gen3_client_installed.returncode != 0: msgs.append("gen3-client not installed") diff --git a/setup.py b/setup.py index db6f4e7b..62899230 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc22', + version='0.0.7rc23', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown', diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index d0213c4b..539ba4ba 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -65,12 +65,12 @@ def validate_document_in_elastic(did, auth): result = query.graphql_query( query_string=""" query($filter:JSON) { - file(filter:$filter) { - id + document_reference(filter:$filter) { + document_reference_id } } """, - variables={"filter": {"AND": [{"IN": {"id": [did]}}]}}, + variables={"filter": {"AND": [{"IN": {"document_reference_id": [did]}}]}}, ) print(result) - assert result["data"]["file"][0]["id"] == did + assert result["data"]["document_reference"][0]["document_reference_id"] == did diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index b07dec6f..a357e570 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -113,6 +113,10 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: validate_document_in_grip(object_id, auth=auth, project_id=project_id) validate_document_in_elastic(object_id, auth=auth) + """ + I'm not sure why this part doesn't work and I don't really care + since we need to deprecate this part and move to git-drs anyways + # clone the project in new directory clone_dir = Path("clone") os.mkdir(clone_dir) @@ -132,9 +136,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: # check the files exist in the cloned directory run_command("ls -l") + assert Path( "my-project-data/hello.txt" ).exists(), "hello.txt does not exist in the cloned directory." + """ # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) From 551e5f26fd7d38ed3b76484003d10d991fcf97bf Mon Sep 17 00:00:00 2001 From: Quinn Wai Wong <54592956+quinnwai@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:47:20 -0700 Subject: [PATCH 3/7] Bugfix/secondary identifiers (#147) * validate secondary identifiers * just linting * bump version --- gen3_tracker/meta/__init__.py | 94 ++++-- gen3_tracker/meta/cli.py | 152 +++++++--- gen3_tracker/meta/dataframer.py | 18 +- gen3_tracker/meta/entities.py | 10 +- gen3_tracker/meta/skeleton.py | 286 ++++++++++++------ gen3_tracker/meta/validator.py | 36 ++- gen3_tracker/meta/visualizer.py | 12 +- setup.py | 2 +- tests/integration/test_end_to_end_workflow.py | 2 +- 9 files changed, 424 insertions(+), 188 deletions(-) diff --git a/gen3_tracker/meta/__init__.py b/gen3_tracker/meta/__init__.py index 22e9d2e1..ebff37c4 100644 --- a/gen3_tracker/meta/__init__.py +++ b/gen3_tracker/meta/__init__.py @@ -13,7 +13,7 @@ from gen3_tracker.common import is_json_extension, read_json, read_ndjson_file -FHIR_CLASSES = importlib.import_module('fhir.resources') +FHIR_CLASSES = importlib.import_module("fhir.resources") logger = logging.getLogger(__name__) @@ -40,21 +40,27 @@ def validate_resource(cls, val): return val if issubclass(type(val), FHIRAbstractModel): return val - raise TypeError(f"Wrong type for 'resource', was {type(val)} must be subclass of FHIRAbstractModel") + raise TypeError( + f"Wrong type for 'resource', was {type(val)} must be subclass of FHIRAbstractModel" + ) def parse_obj(resource: dict, validate=True) -> ParseResult: - """Load a dictionary into a FHIR model """ + """Load a dictionary into a FHIR model""" try: - assert 'resourceType' in resource, "Dict missing `resourceType`, is it a FHIR dict?" - klass = FHIR_CLASSES.get_fhir_model_class(resource['resourceType']) + assert ( + "resourceType" in resource + ), "Dict missing `resourceType`, is it a FHIR dict?" + klass = FHIR_CLASSES.get_fhir_model_class(resource["resourceType"]) _ = klass.parse_obj(resource) if validate: # trigger object traversal, see monkey patch below, at bottom of file _.dict() return ParseResult(resource=_, exception=None, path=None, resource_id=_.id) except (ValidationError, AssertionError) as e: - return ParseResult(resource=None, exception=e, path=None, resource_id=resource.get('id', None)) + return ParseResult( + resource=None, exception=e, path=None, resource_id=resource.get("id", None) + ) def _entry_iterator(parse_result: ParseResult) -> Iterator[ParseResult]: @@ -68,12 +74,30 @@ def _entry_iterator(parse_result: ParseResult) -> Iterator[ParseResult]: for _ in parse_result.resource.entry: if _ is None: break - if hasattr(_, 'resource') and _.resource: # BundleEntry - yield ParseResult(path=_path, resource=_.resource, offset=offset, exception=None, json_obj=_.resource.dict()) - elif hasattr(_, 'item'): # ListEntry - yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict()) + if hasattr(_, "resource") and _.resource: # BundleEntry + yield ParseResult( + path=_path, + resource=_.resource, + offset=offset, + exception=None, + json_obj=_.resource.dict(), + ) + elif hasattr(_, "item"): # ListEntry + yield ParseResult( + path=_path, + resource=_.item, + offset=offset, + exception=None, + json_obj=_.item.dict(), + ) else: - yield ParseResult(path=_path, resource=_.item, offset=offset, exception=None, json_obj=_.item.dict()) + yield ParseResult( + path=_path, + resource=_.item, + offset=offset, + exception=None, + json_obj=_.item.dict(), + ) offset += 1 pass @@ -85,9 +109,9 @@ def _has_entries(_: ParseResult): return _.resource.resource_type in ["List"] and _.resource.entry is not None -def directory_reader(directory_path: str, - recurse: bool = True, - validate: bool = False) -> Iterator[ParseResult]: +def directory_reader( + directory_path: str, recurse: bool = True, validate: bool = False +) -> Iterator[ParseResult]: """Extract FHIR resources from directory Read any type of json file, return itemized resources by iterating through Bundles and Lists @@ -99,13 +123,19 @@ def directory_reader(directory_path: str, directory_path = directory_path.expanduser() try: - input_files = [_ for _ in pathlib.Path.glob(directory_path.name) if is_json_extension(_.name)] + input_files = [ + _ + for _ in pathlib.Path.glob(directory_path.name) + if is_json_extension(_.name) + ] except TypeError: input_files = [] if len(input_files) == 0: if recurse: - input_files = [_ for _ in directory_path.glob('**/*.*') if is_json_extension(_.name)] + input_files = [ + _ for _ in directory_path.glob("**/*.*") if is_json_extension(_.name) + ] # assert len(input_files) > 0, f"No files found in {directory_path.name}" @@ -124,7 +154,9 @@ def directory_reader(directory_path: str, def aggregate(metadata_path: pathlib.Path | str) -> dict: """Aggregate metadata counts resourceType(count)-count->resourceType(count).""" - nested_dict: Callable[[], defaultdict[str, defaultdict]] = lambda: defaultdict(defaultdict) + nested_dict: Callable[[], defaultdict[str, defaultdict]] = lambda: defaultdict( + defaultdict + ) if not isinstance(metadata_path, pathlib.Path): metadata_path = pathlib.Path(metadata_path) @@ -132,23 +164,23 @@ def aggregate(metadata_path: pathlib.Path | str) -> dict: for path in sorted(metadata_path.glob("*.ndjson")): for _ in read_ndjson_file(path): - resource_type = _['resourceType'] - if 'count' not in summary[resource_type]: - summary[resource_type]['count'] = 0 - summary[resource_type]['count'] += 1 + resource_type = _["resourceType"] + if "count" not in summary[resource_type]: + summary[resource_type]["count"] = 0 + summary[resource_type]["count"] += 1 - refs = nested_lookup('reference', _) + refs = nested_lookup("reference", _) for ref in refs: # A codeable reference is an object with a codeable concept and a reference if isinstance(ref, dict): - ref = ref['reference'] - ref_resource_type = ref.split('/')[0] - if 'references' not in summary[resource_type]: - summary[resource_type]['references'] = nested_dict() - dst = summary[resource_type]['references'][ref_resource_type] - if 'count' not in dst: - dst['count'] = 0 - dst['count'] += 1 + ref = ref["reference"] + ref_resource_type = ref.split("/")[0] + if "references" not in summary[resource_type]: + summary[resource_type]["references"] = nested_dict() + dst = summary[resource_type]["references"][ref_resource_type] + if "count" not in dst: + dst["count"] = 0 + dst["count"] += 1 return summary @@ -176,7 +208,7 @@ def validate_and_transform_graphql_field_name(field_name: str) -> str: graphql_field_regex = r"^[_\w][\w]*$" # \w matches alphanumeric + underscore # 1. Replace invalid characters with underscores - cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', field_name) + cleaned_name = re.sub(r"[^a-zA-Z0-9_]", "_", field_name) # 2. Replace non-compliant characters (not alphanumeric or underscore) with a single underscore # This also handles replacing multiple spaces/hyphens with a single underscore diff --git a/gen3_tracker/meta/cli.py b/gen3_tracker/meta/cli.py index 1b175e3d..b6776097 100644 --- a/gen3_tracker/meta/cli.py +++ b/gen3_tracker/meta/cli.py @@ -1,4 +1,3 @@ - import click import pathlib import sys @@ -10,8 +9,13 @@ @click.group() -@click.option('--project_id', default=None, show_default=True, - help="Gen3 program-project", envvar=f"{ENV_VARIABLE_PREFIX}PROJECT_ID") +@click.option( + "--project_id", + default=None, + show_default=True, + help="Gen3 program-project", + envvar=f"{ENV_VARIABLE_PREFIX}PROJECT_ID", +) @click.pass_context def meta(ctx, project_id): """Manage the META directory.""" @@ -19,10 +23,20 @@ def meta(ctx, project_id): @meta.command() -@click.option('--project_id', default=None, show_default=True, - help="Gen3 program-project", envvar=f"{ENV_VARIABLE_PREFIX}PROJECT_ID") -@click.option('--bundle', is_flag=True, help="Create a Bundle file for deleted records.", default=False) -@click.option('--debug', is_flag=True) +@click.option( + "--project_id", + default=None, + show_default=True, + help="Gen3 program-project", + envvar=f"{ENV_VARIABLE_PREFIX}PROJECT_ID", +) +@click.option( + "--bundle", + is_flag=True, + help="Create a Bundle file for deleted records.", + default=False, +) +@click.option("--debug", is_flag=True) @click.pass_context def init(ctx, project_id, debug, bundle): """Initialize the META directory based on the MANIFEST.""" @@ -31,13 +45,19 @@ def init(ctx, project_id, debug, bundle): from gen3_tracker.meta.skeleton import update_meta_files from gen3_tracker.meta.validator import validate as validate_dir - with Halo(text='Generating', spinner='line', placement='right', color='white'): + with Halo(text="Generating", spinner="line", placement="right", color="white"): config: Config = ctx.obj if not project_id: project_id = config.gen3.project_id - updated_files = update_meta_files(config.dry_run, project_id, create_bundle=bundle) - click.secho(f"Updated {len(updated_files)} metadata files.", fg=INFO_COLOR, file=sys.stderr) - result = validate_dir('META', project_id) + updated_files = update_meta_files( + config.dry_run, project_id, create_bundle=bundle + ) + click.secho( + f"Updated {len(updated_files)} metadata files.", + fg=INFO_COLOR, + file=sys.stderr, + ) + result = validate_dir("META", project_id) click.secho(result, fg=INFO_COLOR, file=sys.stderr) except Exception as e: @@ -48,20 +68,31 @@ def init(ctx, project_id, debug, bundle): @meta.command() -@click.argument('directory', type=click.Path(exists=True), default='META') -@click.option('--debug', is_flag=True, default=False, show_default=True, help='Enable debug mode.') -@click.option('--skip-id-check', is_flag=True, default=False, show_default=True, help='Skip checking that resource IDs are valid for the project.') +@click.argument("directory", type=click.Path(exists=True), default="META") +@click.option( + "--debug", is_flag=True, default=False, show_default=True, help="Enable debug mode." +) +@click.option( + "--skip-id-check", + is_flag=True, + default=False, + show_default=True, + help="Skip checking that resource IDs are valid for the project.", +) @click.pass_obj def validate(ctx, directory, debug, skip_id_check): """Validate FHIR data""" try: from gen3_tracker.meta.validator import validate as validate_dir - with Halo(text='Validating', spinner='line', placement='right', color='white'): + + with Halo(text="Validating", spinner="line", placement="right", color="white"): project_id = ctx.gen3.project_id if not skip_id_check else None result = validate_dir(directory, project_id=project_id) click.secho(result.resources, fg=INFO_COLOR, file=sys.stderr) for _ in result.exceptions: - click.secho(f"{_.path}:{_.offset} {_.exception}", fg=ERROR_COLOR, file=sys.stderr) + click.secho( + f"{_.path}:{_.offset} {_.exception}", fg=ERROR_COLOR, file=sys.stderr + ) if result.exceptions: if debug or ctx.debug: raise result.exceptions[0].exception @@ -73,13 +104,22 @@ def validate(ctx, directory, debug, skip_id_check): @meta.command("graph") -@click.argument("directory_path", - type=click.Path(exists=True, file_okay=False), - default="META", required=False) -@click.argument("output_path", - type=click.Path(file_okay=True), - default="meta.html", required=False) -@click.option('--browser', default=False, show_default=True, is_flag=True, help='Open the graph in a browser.') +@click.argument( + "directory_path", + type=click.Path(exists=True, file_okay=False), + default="META", + required=False, +) +@click.argument( + "output_path", type=click.Path(file_okay=True), default="meta.html", required=False +) +@click.option( + "--browser", + default=False, + show_default=True, + is_flag=True, + help="Open the graph in a browser.", +) @click.pass_obj def render_graph(config: Config, directory_path: str, output_path: str, browser: bool): """Render metadata as a network graph. @@ -92,12 +132,18 @@ def render_graph(config: Config, directory_path: str, output_path: str, browser: from gen3_tracker.meta.visualizer import create_network_graph import webbrowser - assert pathlib.Path(directory_path).exists(), f"Directory {directory_path} does not exist." - with Halo(text='Graphing', spinner='line', placement='right', color='white'): + assert pathlib.Path( + directory_path + ).exists(), f"Directory {directory_path} does not exist." + with Halo(text="Graphing", spinner="line", placement="right", color="white"): output_path = pathlib.Path(output_path) create_network_graph(directory_path, output_path) url = f"file://{output_path.absolute()}" - click.secho(f"Saved {output_path}, open it in your browser to view the network.", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Saved {output_path}, open it in your browser to view the network.", + fg=INFO_COLOR, + file=sys.stderr, + ) if browser: webbrowser.open(url) except Exception as e: @@ -107,19 +153,45 @@ def render_graph(config: Config, directory_path: str, output_path: str, browser: @meta.command("dataframe") -@click.argument('data_type', - required=True, - type=click.Choice(['Specimen', 'DocumentReference', 'ResearchSubject', "MedicationAdministration", "GroupMember"]), - default=None) -@click.argument("directory_path", - type=click.Path(exists=True, file_okay=False), - default="./META", required=False) -@click.argument("output_path", - type=click.Path(file_okay=True), required=False) -@click.option('--dtale', 'launch_dtale', default=False, show_default=True, is_flag=True, help='Open the graph in a browser using the dtale package for interactive data exploration.') -@click.option('--debug', is_flag=True) +@click.argument( + "data_type", + required=True, + type=click.Choice( + [ + "Specimen", + "DocumentReference", + "ResearchSubject", + "MedicationAdministration", + "GroupMember", + ] + ), + default=None, +) +@click.argument( + "directory_path", + type=click.Path(exists=True, file_okay=False), + default="./META", + required=False, +) +@click.argument("output_path", type=click.Path(file_okay=True), required=False) +@click.option( + "--dtale", + "launch_dtale", + default=False, + show_default=True, + is_flag=True, + help="Open the graph in a browser using the dtale package for interactive data exploration.", +) +@click.option("--debug", is_flag=True) @click.pass_obj -def render_df(config: Config, directory_path: str, output_path: str, launch_dtale: bool, data_type: str, debug: bool): +def render_df( + config: Config, + directory_path: str, + output_path: str, + launch_dtale: bool, + data_type: str, + debug: bool, +): """Render a metadata dataframe. \b @@ -128,10 +200,12 @@ def render_df(config: Config, directory_path: str, output_path: str, launch_dtal """ try: from gen3_tracker.meta.dataframer import create_dataframe + df = create_dataframe(directory_path, config.work_dir, data_type) if launch_dtale: import dtale + dtale.show(df, subprocess=False, open_browser=True, port=40000) else: # export to csv @@ -144,4 +218,4 @@ def render_df(config: Config, directory_path: str, output_path: str, launch_dtal raise -meta.add_command(render_df, name='df') +meta.add_command(render_df, name="df") diff --git a/gen3_tracker/meta/dataframer.py b/gen3_tracker/meta/dataframer.py index 2ef94d7d..17c6c498 100644 --- a/gen3_tracker/meta/dataframer.py +++ b/gen3_tracker/meta/dataframer.py @@ -383,7 +383,9 @@ def flattened_procedures(self) -> Generator[dict, None, None]: value = None assert value is not None, f"no value for {resource['id']}" - procedure[validate_and_transform_graphql_field_name(code)] = value + procedure[validate_and_transform_graphql_field_name(code)] = ( + value + ) continue @@ -504,7 +506,9 @@ def flattened_research_subjects(self) -> Generator[dict, None, None]: for condition in conditions: for k, v in traverse(condition).items(): if k not in set(["condition_id", "condition_identifier"]): - flat_research_subject[validate_and_transform_graphql_field_name(k)] = v + flat_research_subject[ + validate_and_transform_graphql_field_name(k) + ] = v yield flat_research_subject @@ -637,7 +641,9 @@ def flattened_group_members(self) -> Generator[dict, None, None]: # for each member in a group, yield a group member dict for member_id in group_resource.members: # unique primary key from group and member ids - group_member_id = str(uuid.uuid5(ACED_NAMESPACE, simplified_group["id"] + "," + member_id)) + group_member_id = str( + uuid.uuid5(ACED_NAMESPACE, simplified_group["id"] + "," + member_id) + ) # group member dict composed of a simple group dict, unique primary key, and unique member_id yield { @@ -678,9 +684,7 @@ def create_dataframe( ) if df.empty: - raise ValueError( - f"Dataframe is empty, are there any {data_type} resources?" - ) + raise ValueError(f"Dataframe is empty, are there any {data_type} resources?") prefix = inflection.underscore(data_type) df = df.rename(columns={col: f"{prefix}_{col}" for col in df.columns}) @@ -742,7 +746,7 @@ def get_subject(db: LocalFHIRDatabase, resource: dict) -> dict: def get_resources_by_reference( db: LocalFHIRDatabase, resource_type: str, reference_field: str, reference_type: str ) -> dict[str, list]: - """given a set of rescode ources of type resource_type, map each unique reference in reference field of type reference_type to its associated resources + """given a set of resources of type resource_type, map each unique reference in reference field of type reference_type to its associated resources ex: use all Observations with a Specimen focus, map Specimen IDs to its list of associated Observations and return the map """ diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index 3e3858ab..eeb1f5a0 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -306,9 +306,11 @@ def identifiers(self) -> dict: base_identifier = { ( "identifier" - if i == 0 or identifier.get("use", "") == "official" - else identifier.get("system").split("/")[-1] + else "identifier_" + + validate_and_transform_graphql_field_name( + identifier.get("system").split("/")[-1] + ) ): identifier.get("value") for i, identifier in enumerate(identifiers) } @@ -326,7 +328,9 @@ def values(self) -> dict: # update the key if code information is available if self.resource.get("code", {}).get("text", None): - source = validate_and_transform_graphql_field_name(self.resource["code"]["text"]) + source = validate_and_transform_graphql_field_name( + self.resource["code"]["text"] + ) return {source: value} diff --git a/gen3_tracker/meta/skeleton.py b/gen3_tracker/meta/skeleton.py index d21e22d5..6880d6b0 100644 --- a/gen3_tracker/meta/skeleton.py +++ b/gen3_tracker/meta/skeleton.py @@ -27,30 +27,37 @@ def _get_system(identifier: str, project_id: str): """Return system component of simplified identifier""" - if '#' in identifier: - return identifier.split('#')[0] - if '|' in identifier: - return identifier.split('|')[0] + if "#" in identifier: + return identifier.split("#")[0] + if "|" in identifier: + return identifier.split("|")[0] # default return f"https://aced-idp.org/{project_id}" def meta_index(): """Read all the ndjson files in the `META` directory and create a dictionary with the id as the key and the official identifier as the value""" - meta_dir = pathlib.Path('META') + meta_dir = pathlib.Path("META") id_dict = {} - for file in meta_dir.glob('*.ndjson'): - with open(file, 'r') as f: + for file in meta_dir.glob("*.ndjson"): + with open(file, "r") as f: for line in f: record = orjson.loads(line) - _id = record.get('id') - resource_type = record.get('resourceType') - if resource_type == 'Bundle': + _id = record.get("id") + resource_type = record.get("resourceType") + if resource_type == "Bundle": break - official_identifier = next((identifier.get('value') for identifier in record.get('identifier', []) if identifier.get('use') == 'official'), None) - if not official_identifier and record.get('identifier'): - official_identifier = record['identifier'][0]['value'] + official_identifier = next( + ( + identifier.get("value") + for identifier in record.get("identifier", []) + if identifier.get("use") == "official" + ), + None, + ) + if not official_identifier and record.get("identifier"): + official_identifier = record["identifier"][0]["value"] if _id and official_identifier: id_dict[f"{resource_type}/{_id}"] = official_identifier @@ -60,14 +67,14 @@ def meta_index(): def get_data_from_meta() -> Generator[int, None, None]: """Read all the ndjson files in the `META` directory and return a generator that produces all records""" - meta_dir = pathlib.Path('META') + meta_dir = pathlib.Path("META") - for file in meta_dir.glob('*.ndjson'): - with open(file, 'r') as f: + for file in meta_dir.glob("*.ndjson"): + with open(file, "r") as f: for line in f: record = orjson.loads(line) - resource_type = record.get('resourceType') - if resource_type == 'Bundle': + resource_type = record.get("resourceType") + if resource_type == "Bundle": break yield record @@ -75,11 +82,13 @@ def get_data_from_meta() -> Generator[int, None, None]: def update_document_reference(document_reference: DocumentReference, dvc_data: DVC): """Update document reference with index record.""" - assert document_reference.resource_type == 'DocumentReference' - assert dvc_data.out.object_id == document_reference.id, f"{dvc_data['did']} != {document_reference.id}" + assert document_reference.resource_type == "DocumentReference" + assert ( + dvc_data.out.object_id == document_reference.id + ), f"{dvc_data['did']} != {document_reference.id}" assert dvc_data.out.modified, f"dvc_data missing modified: {dvc_data}" - document_reference.docStatus = 'final' - document_reference.status = 'current' + document_reference.docStatus = "final" + document_reference.status = "current" document_reference.date = dvc_data.out.modified @@ -89,17 +98,17 @@ def update_document_reference(document_reference: DocumentReference, dvc_data: D else: source_path = dvc_data.out.source_url - source_path = source_path.replace('////', '///') + source_path = source_path.replace("////", "///") attachment.extension = [ { "url": f"http://aced-idp.org/fhir/StructureDefinition/{dvc_data.out.hash}", - "valueString": dvc_data.out.hash_value + "valueString": dvc_data.out.hash_value, }, { "url": "http://aced-idp.org/fhir/StructureDefinition/source_path", - "valueUrl": source_path - } + "valueUrl": source_path, + }, ] attachment.contentType = dvc_data.out.mime @@ -114,14 +123,23 @@ def update_document_reference(document_reference: DocumentReference, dvc_data: D document_reference.content = [content] -def create_id_from_strings(resource_type: str, project_id: str, identifier_string: str) -> str: +def create_id_from_strings( + resource_type: str, project_id: str, identifier_string: str +) -> str: """Create an id from strings.""" if not identifier_string: return None - return str(uuid.uuid5(ACED_NAMESPACE, f"{project_id}/{resource_type}/{_get_system(identifier_string, project_id)}|{identifier_string}")) + return str( + uuid.uuid5( + ACED_NAMESPACE, + f"{project_id}/{resource_type}/{_get_system(identifier_string, project_id)}|{identifier_string}", + ) + ) -def create_skeleton(dvc: dict, project_id: str, meta_index: set[str] = []) -> list[Resource]: +def create_skeleton( + dvc: dict, project_id: str, meta_index: set[str] = [] +) -> list[Resource]: """ Create a skeleton graph for document and ancestors from a set of identifiers. """ @@ -137,77 +155,124 @@ def create_skeleton(dvc: dict, project_id: str, meta_index: set[str] = []) -> li document_reference_id = dvc.out.set_object_id(project_id=project_id) assert project_id, "project_id required" - assert project_id.count('-') == 1, "project_id must be of the form program-project" - program, project = project_id.split('-') + assert project_id.count("-") == 1, "project_id must be of the form program-project" + program, project = project_id.split("-") - research_study = research_subject = observation = specimen = patient = task = document_reference = None + research_study = research_subject = observation = specimen = patient = task = ( + document_reference + ) = None # check if we have already created the resources - research_study_id = create_id_from_strings(resource_type='ResearchStudy', project_id=project_id, identifier_string=project_id) - specimen_id = create_id_from_strings(resource_type='Specimen', project_id=project_id, identifier_string=specimen_identifier) - patient_id = create_id_from_strings(resource_type='Patient', project_id=project_id, identifier_string=patient_identifier) - task_id = create_id_from_strings(resource_type='Task', project_id=project_id, identifier_string=task_identifier) - observation_id = create_id_from_strings(resource_type='Observation', project_id=project_id, identifier_string=observation_identifier) - - _ = f'ResearchStudy/{research_study_id}' + research_study_id = create_id_from_strings( + resource_type="ResearchStudy", + project_id=project_id, + identifier_string=project_id, + ) + specimen_id = create_id_from_strings( + resource_type="Specimen", + project_id=project_id, + identifier_string=specimen_identifier, + ) + patient_id = create_id_from_strings( + resource_type="Patient", + project_id=project_id, + identifier_string=patient_identifier, + ) + task_id = create_id_from_strings( + resource_type="Task", project_id=project_id, identifier_string=task_identifier + ) + observation_id = create_id_from_strings( + resource_type="Observation", + project_id=project_id, + identifier_string=observation_identifier, + ) + + _ = f"ResearchStudy/{research_study_id}" if _ in meta_index: research_study = meta_index[_] - _ = f'Specimen/{specimen_id}' + _ = f"Specimen/{specimen_id}" if _ in meta_index: specimen = meta_index[_] - _ = f'Patient/{patient_id}' + _ = f"Patient/{patient_id}" if _ in meta_index: patient = meta_index[_] - _ = f'Task/{task_id}' + _ = f"Task/{task_id}" if _ in meta_index: task = meta_index[_] - _ = f'Observation/{observation_id}' + _ = f"Observation/{observation_id}" if _ in meta_index: observation = meta_index[_] # create entities - document_reference = DocumentReference(status='current', content=[{'attachment': {'url': "file://"}}]) + document_reference = DocumentReference( + status="current", content=[{"attachment": {"url": "file://"}}] + ) document_reference.id = document_reference_id document_reference.identifier = [ - - Identifier(value=document_reference_id, system=_get_system(document_reference_id, project_id=project_id), - use='official')] + Identifier( + value=document_reference_id, + system=_get_system(document_reference_id, project_id=project_id), + use="official", + ) + ] update_document_reference(document_reference, dvc) if not research_study: - research_study = ResearchStudy(status='active') + research_study = ResearchStudy(status="active") research_study.description = f"Skeleton ResearchStudy for {project_id}" research_study.identifier = [ - Identifier(value=project_id, system=_get_system(project_id, project_id=project_id), - use='official')] + Identifier( + value=project_id, + system=_get_system(project_id, project_id=project_id), + use="official", + ) + ] research_study.id = create_resource_id(research_study, project_id) if not patient and patient_identifier: patient = Patient() - patient.identifier = [Identifier(value=patient_identifier, system=_get_system(patient_identifier, project_id=project_id), use='official')] + patient.identifier = [ + Identifier( + value=patient_identifier, + system=_get_system(patient_identifier, project_id=project_id), + use="official", + ) + ] patient.id = create_resource_id(patient, project_id) research_subject = ResearchSubject( - status='active', - study={'reference': f"ResearchStudy/{research_study_id}"}, - subject={'reference': f"Patient/{patient.id}"} + status="active", + study={"reference": f"ResearchStudy/{research_study_id}"}, + subject={"reference": f"Patient/{patient.id}"}, ) - research_subject.identifier = [Identifier(value=patient_identifier, system=_get_system(patient_identifier, project_id=project_id), use='official')] + research_subject.identifier = [ + Identifier( + value=patient_identifier, + system=_get_system(patient_identifier, project_id=project_id), + use="official", + ) + ] research_subject.id = create_resource_id(research_subject, project_id) patient_id = patient.id if not observation and observation_identifier: - observation = Observation(status='final', code={'text': 'unknown'}) - observation.identifier = [Identifier(value=observation_identifier, system=_get_system(observation_identifier, project_id=project_id), use='official')] + observation = Observation(status="final", code={"text": "unknown"}) + observation.identifier = [ + Identifier( + value=observation_identifier, + system=_get_system(observation_identifier, project_id=project_id), + use="official", + ) + ] observation.id = create_resource_id(observation, project_id) assert patient, "patient required for observation" - observation.subject = {'reference': f"Patient/{patient_id}"} + observation.subject = {"reference": f"Patient/{patient_id}"} if not specimen and specimen_identifier: @@ -216,17 +281,28 @@ def create_skeleton(dvc: dict, project_id: str, meta_index: set[str] = []) -> li # exit(1) specimen = Specimen() - specimen.identifier = [Identifier(value=specimen_identifier, system=_get_system(specimen_identifier, project_id=project_id), use='official')] + specimen.identifier = [ + Identifier( + value=specimen_identifier, + system=_get_system(specimen_identifier, project_id=project_id), + use="official", + ) + ] specimen.id = create_resource_id(specimen, project_id) specimen_id = specimen.id assert patient, "patient required for specimen" - specimen.subject = {'reference': f"Patient/{patient_id}"} + specimen.subject = {"reference": f"Patient/{patient_id}"} if not task and task_identifier: - task = Task(intent='unknown', status='completed') - task.identifier = [Identifier(value=task_identifier, system=_get_system(task_identifier, project_id=project_id), - use='official')] + task = Task(intent="unknown", status="completed") + task.identifier = [ + Identifier( + value=task_identifier, + system=_get_system(task_identifier, project_id=project_id), + use="official", + ) + ] task.id = create_resource_id(task, project_id) task_id = task.id @@ -234,9 +310,9 @@ def create_skeleton(dvc: dict, project_id: str, meta_index: set[str] = []) -> li # assign subject, specimen of observation if observation and specimen and not observation.specimen: - observation.specimen = {'reference': f"Specimen/{specimen_id}"} + observation.specimen = {"reference": f"Specimen/{specimen_id}"} if observation and patient and not observation.subject: - observation.subject = {'reference': f"Patient/{patient_id}"} + observation.subject = {"reference": f"Patient/{patient_id}"} if task: task.input = [] @@ -244,52 +320,67 @@ def create_skeleton(dvc: dict, project_id: str, meta_index: set[str] = []) -> li task.input.append( TaskInput( valueReference={"reference": f"Specimen/{specimen.id}"}, - type={'text': 'Specimen'} + type={"text": "Specimen"}, ) ) if patient: task.input.append( TaskInput( - valueReference={'reference': f"Patient/{patient.id}"}, - type={'text': 'Patient'} + valueReference={"reference": f"Patient/{patient.id}"}, + type={"text": "Patient"}, ) ) task.output = [ TaskOutput( - valueReference={'reference': f"DocumentReference/{document_reference.id}"}, - type={'text': 'DocumentReference'}) + valueReference={ + "reference": f"DocumentReference/{document_reference.id}" + }, + type={"text": "DocumentReference"}, + ) ] # assign document reference subject if observation: - document_reference.subject = {'reference': f"Observation/{observation_id}"} + document_reference.subject = {"reference": f"Observation/{observation_id}"} if specimen and not document_reference.subject: - document_reference.subject = {'reference': f"Specimen/{specimen_id}"} + document_reference.subject = {"reference": f"Specimen/{specimen_id}"} if patient and not document_reference.subject: - document_reference.subject = {'reference': f"Patient/{patient_id}"} + document_reference.subject = {"reference": f"Patient/{patient_id}"} if not document_reference.subject: - document_reference.subject = {'reference': f"ResearchStudy/{research_study_id}"} - - return [_ for _ in [research_study, research_subject, patient, observation, specimen, task, document_reference] if _ and not isinstance(_, str)] + document_reference.subject = {"reference": f"ResearchStudy/{research_study_id}"} + + return [ + _ + for _ in [ + research_study, + research_subject, + patient, + observation, + specimen, + task, + document_reference, + ] + if _ and not isinstance(_, str) + ] def update_meta_files(dry_run=False, project_id=None, create_bundle=False) -> list[str]: """Maintain the META directory.""" assert project_id, "project_id required" - manifest_path = pathlib.Path('MANIFEST') - dvc_files = [_ for _ in manifest_path.rglob('*.dvc')] + manifest_path = pathlib.Path("MANIFEST") + dvc_files = [_ for _ in manifest_path.rglob("*.dvc")] - before_meta_files = [_ for _ in pathlib.Path('META').glob('*.ndjson')] + before_meta_files = [_ for _ in pathlib.Path("META").glob("*.ndjson")] before_meta_index = set(list(meta_index().keys())) emitted_already = [] if not dvc_files: # remove the DocumentReference file if it exists - document_reference_path = pathlib.Path('META/DocumentReference.ndjson') + document_reference_path = pathlib.Path("META/DocumentReference.ndjson") if document_reference_path.exists(): document_reference_path.unlink() - with EmitterContextManager('META') as emitter: + with EmitterContextManager("META") as emitter: for _ in dvc_data(dvc_files): resources = create_skeleton(_, project_id, meta_index()) for resource in resources: @@ -306,33 +397,48 @@ def update_meta_files(dry_run=False, project_id=None, create_bundle=False) -> li if orphaned_meta_index: # create a bundle to tell server about deletes now = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ") - bundle = Bundle(type='transaction', timestamp=now) + bundle = Bundle(type="transaction", timestamp=now) - bundle.identifier = Identifier(value=project_id, system="https://aced-idp.org/project_id", use='official') + bundle.identifier = Identifier( + value=project_id, system="https://aced-idp.org/project_id", use="official" + ) bundle.id = create_resource_id(bundle, project_id) bundle.entry = [] - outcome = OperationOutcome(issue=[{'severity': 'warning', 'code': 'processing', 'diagnostics': 'Meta data items no longer in study.'}]) + outcome = OperationOutcome( + issue=[ + { + "severity": "warning", + "code": "processing", + "diagnostics": "Meta data items no longer in study.", + } + ] + ) bundle.issues = outcome for _ in orphaned_meta_index: bundle_entry = BundleEntry() - bundle_entry.request = BundleEntryRequest(url=_, method='DELETE') + bundle_entry.request = BundleEntryRequest(url=_, method="DELETE") bundle.entry.append(bundle_entry) if create_bundle: - with EmitterContextManager('META') as emitter: - emitter.emit(bundle.resource_type, file_mode='a').write( + with EmitterContextManager("META") as emitter: + emitter.emit(bundle.resource_type, file_mode="a").write( bundle.json(option=orjson.OPT_APPEND_NEWLINE) ) else: if len(orphaned_meta_index): - print(f"Records were orphaned meta index: {orphaned_meta_index}", file=sys.stderr) + print( + f"Records were orphaned meta index: {orphaned_meta_index}", + file=sys.stderr, + ) - after_meta_files = [_ for _ in pathlib.Path('META').glob('*.ndjson')] + after_meta_files = [_ for _ in pathlib.Path("META").glob("*.ndjson")] new_meta_files = [str(_) for _ in after_meta_files if _ not in before_meta_files] if new_meta_files: - run_command(f'git add {" ".join(new_meta_files)}', dry_run=dry_run, no_capture=True) + run_command( + f'git add {" ".join(new_meta_files)}', dry_run=dry_run, no_capture=True + ) return after_meta_files diff --git a/gen3_tracker/meta/validator.py b/gen3_tracker/meta/validator.py index 6630c196..7edb22e7 100644 --- a/gen3_tracker/meta/validator.py +++ b/gen3_tracker/meta/validator.py @@ -24,7 +24,7 @@ class ValidateDirectoryResult(BaseModel): def model_dump(self): """ - temporary until we switch to pydantic2 + temporary until we switch to pydantic2 """ for _ in self.exceptions: _.exception = str(_.exception) @@ -36,10 +36,14 @@ def _check_coding(self: Coding, *args, **kwargs): """MonkeyPatch replacement for dict(), check Coding.""" # note `self` is the Coding assert self.code, f"Missing `code` {self}" - assert (not self.code.startswith("http")), f"`code` should _not_ be a url http {self.code}" + assert not self.code.startswith( + "http" + ), f"`code` should _not_ be a url http {self.code}" assert ":" not in self.code, f"`code` should not contain ':' {self.code}" assert self.system, f"Missing `system` {self}" - assert "%" not in self.system, f"`system` should be a simple url without uuencoding {self.system}" + assert ( + "%" not in self.system + ), f"`system` should be a simple url without uuencoding {self.system}" parsed = urlparse(self.system) assert parsed.scheme, f"`system` is not a URI {self}" assert self.display, f"Missing `display` {self}" @@ -54,7 +58,9 @@ def _check_identifier(self: Identifier, *args, **kwargs): assert self.system, f"Missing `system` {self}" parsed = urlparse(self.system) assert parsed.scheme, f"`system` is not a URI {self}" - assert "%" not in self.system, f"`system` should be a simple url without uuencoding {self.system}" + assert ( + "%" not in self.system + ), f"`system` should be a simple url without uuencoding {self.system}" # call the original dict() method return orig_identifier_dict(self, *args, **kwargs) @@ -63,9 +69,11 @@ def _check_reference(self: Reference, *args, **kwargs): """MonkeyPatch replacement for dict(), check Reference.""" # note `self` is the Identifier assert self.reference, f"Missing `reference` {self}" - assert '/' in self.reference, f"Does not appear to be Relative reference {self}" - assert 'http' not in self.reference, f"Absolute references not supported {self}" - assert len(self.reference.split('/')) == 2, f"Does not appear to be Relative reference {self}" + assert "/" in self.reference, f"Does not appear to be Relative reference {self}" + assert "http" not in self.reference, f"Absolute references not supported {self}" + assert ( + len(self.reference.split("/")) == 2 + ), f"Does not appear to be Relative reference {self}" # call the original dict() method return orig_reference_dict(self, *args, **kwargs) @@ -99,7 +107,7 @@ def validate(directory_path: pathlib.Path, project_id=None) -> ValidateDirectory _ = parse_result.resource ids.append(f"{_.resource_type}/{_.id}") - nested_references = nested_lookup('reference', parse_result.json_obj) + nested_references = nested_lookup("reference", parse_result.json_obj) # https://www.hl7.org/fhir/medicationrequest-definitions.html#MedicationRequest.medication # is a reference to a Medication resource https://www.hl7.org/fhir/references.html#CodeableReference # so it has a reference.reference form, strip it out @@ -113,7 +121,9 @@ def validate(directory_path: pathlib.Path, project_id=None) -> ValidateDirectory ids = set(ids) if not references.issubset(ids): _ = Exception(f"references not found {references - ids}") - _ = ParseResult(resource=None, exception=_, path=directory_path, resource_id=None) + _ = ParseResult( + resource=None, exception=_, path=directory_path, resource_id=None + ) exceptions.append(_) if len(ids) != len(ids_list): # Create a Counter object from ids_list @@ -122,10 +132,14 @@ def validate(directory_path: pathlib.Path, project_id=None) -> ValidateDirectory duplicate_ids = [id_ for id_, count in counter.items() if count > 1] # log it _ = Exception(f"Duplicate ids found {duplicate_ids}") - _ = ParseResult(resource=None, exception=_, path=directory_path, resource_id=None) + _ = ParseResult( + resource=None, exception=_, path=directory_path, resource_id=None + ) exceptions.append(_) - return ValidateDirectoryResult(resources={'summary': dict(resources)}, exceptions=exceptions) + return ValidateDirectoryResult( + resources={"summary": dict(resources)}, exceptions=exceptions + ) # diff --git a/gen3_tracker/meta/visualizer.py b/gen3_tracker/meta/visualizer.py index 53a66f0c..8939010e 100644 --- a/gen3_tracker/meta/visualizer.py +++ b/gen3_tracker/meta/visualizer.py @@ -4,19 +4,21 @@ def _container(): """Create a pyvis container.""" - return Network(notebook=True, cdn_resources='in_line') # filter_menu=True, select_menu=True + return Network( + notebook=True, cdn_resources="in_line" + ) # filter_menu=True, select_menu=True def _load(net: Network, aggregation: dict) -> Network: """Load the aggregation into the visualization network.""" # add vertices for resource_type, _ in aggregation.items(): - assert 'count' in _, _ + assert "count" in _, _ net.add_node(resource_type, label=f"{resource_type}/{_['count']}") # add edges for resource_type, _ in aggregation.items(): - for ref in _.get('references', {}): - count = _['references'][ref]['count'] + for ref in _.get("references", {}): + count = _["references"][ref]["count"] net.add_edge(resource_type, ref, title=count, value=count) return net @@ -32,4 +34,4 @@ def create_network_graph(directory_path: str, output_path: str): # Load it into a pyvis net = _load(_container(), aggregation) net.save_graph(str(output_path)) - net.show_buttons(filter_=['physics']) + net.show_buttons(filter_=["physics"]) diff --git a/setup.py b/setup.py index 62899230..e25f9b07 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc23', + version='0.0.7rc24', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown', diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index a357e570..bac61f6b 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -5,7 +5,7 @@ from click.testing import CliRunner from gen3_tracker.config import ensure_auth, default -from gen3_tracker.git import DVC, run_command +from gen3_tracker.git import DVC from pathlib import Path from tests.integration import validate_document_in_elastic, validate_document_in_grip from tests import run From 92a0db0e6209ce7e441baa6bb1c64b6ea15fe9d5 Mon Sep 17 00:00:00 2001 From: Matthew Peterkort <33436238+matthewpeterkort@users.noreply.github.com> Date: Tue, 14 Oct 2025 09:47:27 -0700 Subject: [PATCH 4/7] Update tests/integration/test_end_to_end_workflow.py Co-authored-by: Quinn Wai Wong <54592956+quinnwai@users.noreply.github.com> --- tests/integration/test_end_to_end_workflow.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index bac61f6b..77ca72be 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -113,10 +113,6 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: validate_document_in_grip(object_id, auth=auth, project_id=project_id) validate_document_in_elastic(object_id, auth=auth) - """ - I'm not sure why this part doesn't work and I don't really care - since we need to deprecate this part and move to git-drs anyways - # clone the project in new directory clone_dir = Path("clone") os.mkdir(clone_dir) From 7fa8df64f01d677f739f3db47d8e284b8d5804c1 Mon Sep 17 00:00:00 2001 From: Matthew Peterkort <33436238+matthewpeterkort@users.noreply.github.com> Date: Tue, 14 Oct 2025 09:47:31 -0700 Subject: [PATCH 5/7] Update tests/integration/test_end_to_end_workflow.py Co-authored-by: Quinn Wai Wong <54592956+quinnwai@users.noreply.github.com> --- tests/integration/test_end_to_end_workflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 77ca72be..b4788f6e 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -136,7 +136,6 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: assert Path( "my-project-data/hello.txt" ).exists(), "hello.txt does not exist in the cloned directory." - """ # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) From 6e63a6ff0cd95b3c8b1ceeb3d326187e3f735240 Mon Sep 17 00:00:00 2001 From: Matthew Peterkort <33436238+matthewpeterkort@users.noreply.github.com> Date: Tue, 14 Oct 2025 09:47:40 -0700 Subject: [PATCH 6/7] Update tests/integration/test_end_to_end_workflow.py Co-authored-by: Quinn Wai Wong <54592956+quinnwai@users.noreply.github.com> --- tests/integration/test_end_to_end_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index b4788f6e..7d9a9574 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -5,7 +5,7 @@ from click.testing import CliRunner from gen3_tracker.config import ensure_auth, default -from gen3_tracker.git import DVC +from gen3_tracker.git import DVC, run_command from pathlib import Path from tests.integration import validate_document_in_elastic, validate_document_in_grip from tests import run From 0f8e16af720266e2b6cb70dd676bf0efe0b573b9 Mon Sep 17 00:00:00 2001 From: Quinn Wai Wong <54592956+quinnwai@users.noreply.github.com> Date: Mon, 20 Oct 2025 11:01:54 -0700 Subject: [PATCH 7/7] Feature/patient and obs flatten (#150) * get subj of subj (patient) in docref * linting * bump * add patient observations to research subject * bump * patch I think * bumperino --- gen3_tracker/meta/dataframer.py | 79 +++++++++---- gen3_tracker/meta/entities.py | 1 + setup.py | 2 +- tests/integration/test_end_to_end_workflow.py | 1 - tests/integration/test_rm_file.py | 107 +++++++++++------- tests/unit/dataframer/test_dataframer.py | 89 +++++++++------ tests/unit/test_indexclient.py | 7 +- tests/unit/test_none_fields.py | 3 +- 8 files changed, 182 insertions(+), 107 deletions(-) diff --git a/gen3_tracker/meta/dataframer.py b/gen3_tracker/meta/dataframer.py index 17c6c498..300a43c7 100644 --- a/gen3_tracker/meta/dataframer.py +++ b/gen3_tracker/meta/dataframer.py @@ -477,17 +477,21 @@ def select_coding(self, resource): def flattened_research_subjects(self) -> Generator[dict, None, None]: - # get all observations with a Observation.subject=Patient, mapped from patient ID to observation + # setup resource_type = "ResearchSubject" - conditions_by_patient_id = get_conditions_by_subject(self, "Patient") + patient_type = "Patient" + cursor = self.connect() + + # grab associated conditions + observations via patient ID at once + conditions_by_patient_id = get_conditions_by_subject(self, patient_type) + observations_by_patient_id = get_observations_by_focus(self, patient_type) # get all ResearchSubjects - cursor = self.connect() cursor.execute( "SELECT * FROM resources where resource_type = ?", (resource_type,) ) - # get research subject and associated .subject patient + # add in new fields to existing research subject for _, _, raw_research_subject in cursor.fetchall(): research_subject = json.loads(raw_research_subject) flat_research_subject = SimplifiedResource.build( @@ -495,9 +499,16 @@ def flattened_research_subjects(self) -> Generator[dict, None, None]: ).simplified # return with .subject (ie Patient) fields - patient = get_subject(self, research_subject) + _, patient = get_subject(self, research_subject) flat_research_subject.update(patient) + # add patient observation values + flat_research_subject = update_with_observations( + flat_research_subject, + patient["patient_id"], + observations_by_patient_id, + ) + # get condition code, eg enrollment diagnosis if patient["patient_id"] in conditions_by_patient_id: conditions = conditions_by_patient_id[patient["patient_id"]] @@ -528,7 +539,7 @@ def flattened_medication_administrations(self) -> Generator[dict, None, None]: resource=medication_administration ).simplified - patient = get_subject(self, medication_administration) + _, patient = get_subject(self, medication_administration) flat_medication_administration.update(patient) yield flat_medication_administration @@ -561,16 +572,17 @@ def flattened_document_reference( flat_doc_ref = SimplifiedResource.build(resource=doc_ref).simplified # extract the corresponding .subject and append its fields - flat_doc_ref.update(get_subject(self, doc_ref)) - # populate observation data associated with the document reference document - if doc_ref["id"] in observation_by_focus_id: - associated_observations = observation_by_focus_id[doc_ref["id"]] + raw_subject, simplified_subject = get_subject(self, doc_ref) + flat_doc_ref.update(simplified_subject) - # TODO: assumes there are no duplicate column names in each observation - for observation in associated_observations: - flat_observation = SimplifiedResource.build(resource=observation).values - flat_doc_ref.update(flat_observation) + # extract the subject of the .subject and append its fields + # eg: a specimen is associated with a patients + _, simplified_subject_of_subject = get_subject(self, raw_subject) + flat_doc_ref.update(simplified_subject_of_subject) + + # populate observation data associated with the document reference document + update_with_observations(flat_doc_ref, doc_ref["id"], observation_by_focus_id) # TODO: test this based on fhir-gdc if "basedOn" in doc_ref: @@ -606,16 +618,11 @@ def flattened_specimen(self, specimen: dict, observation_by_id: dict) -> dict: flat_specimen = SimplifiedResource.build(resource=specimen).simplified # extract its .subject and append its fields (including id) - flat_specimen.update(get_subject(self, specimen)) + _, simplified_subject = get_subject(self, specimen) + flat_specimen.update(simplified_subject) # populate observation codes for each associated observation - if specimen["id"] in observation_by_id: - observations = observation_by_id[specimen["id"]] - - # TODO: assumes there are no duplicate column names in each observation - for observation in observations: - flat_observation = SimplifiedResource.build(resource=observation).values - flat_specimen.update(flat_observation) + update_with_observations(flat_specimen, specimen["id"], observation_by_id) return flat_specimen @@ -725,13 +732,21 @@ def is_number(s): return False +#################### +# MACROS / HELPERS # +#################### + + def get_subject(db: LocalFHIRDatabase, resource: dict) -> dict: - """get the resource's subject field if it exists""" + """ + get the resource's subject if it exists + Return both the raw subject and its simplified version + """ # ensure resource has subject field subject_key = get_nested_value(resource, ["subject", "reference"]) if subject_key is None: - return {} + return {}, {} # traverse the resource of the subject and return its values cursor = db.connect() @@ -740,7 +755,8 @@ def get_subject(db: LocalFHIRDatabase, resource: dict) -> dict: assert row, f"{subject_key} not found in database" _, _, raw_subject = row subject = json.loads(raw_subject) - return traverse(subject) + + return subject, traverse(subject) def get_resources_by_reference( @@ -804,3 +820,16 @@ def get_conditions_by_subject( ) -> dict[str, list]: """get all Conditions that have a subject of resource type subject_type""" return get_resources_by_reference(db, "Condition", "subject", subject_type) + + +def update_with_observations(resource, id, observations_by_id): + """update a resource with the observations associated with the provided ID""" + if id in observations_by_id: + associated_observations = observations_by_id[id] + + # TODO: assumes there are no duplicate column names in each observation + for observation in associated_observations: + flat_observation = SimplifiedResource.build(resource=observation).values + resource.update(flat_observation) + + return resource diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index eeb1f5a0..d415856c 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -517,6 +517,7 @@ def values(self) -> dict: for parent_dict in self.resource["parent"] ] ) + return _values diff --git a/setup.py b/setup.py index e25f9b07..7e9cec66 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc24', + version='0.0.7rc27', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown', diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 7d9a9574..b07dec6f 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -132,7 +132,6 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: # check the files exist in the cloned directory run_command("ls -l") - assert Path( "my-project-data/hello.txt" ).exists(), "hello.txt does not exist in the cloned directory." diff --git a/tests/integration/test_rm_file.py b/tests/integration/test_rm_file.py index ad1d7e7c..4eab1562 100644 --- a/tests/integration/test_rm_file.py +++ b/tests/integration/test_rm_file.py @@ -118,7 +118,7 @@ def test_rm_committed(runner: CliRunner, project_id, tmpdir) -> None: object_id = dvc.object_id auth = ensure_auth(config=default()) - ok = '' + ok = "" try: validate_document_in_grip(object_id, auth=auth, project_id=project_id) except Exception as e: @@ -129,7 +129,7 @@ def test_rm_committed(runner: CliRunner, project_id, tmpdir) -> None: except Exception as e: ok = ok + f" Elastic validation failed: {e}" - assert ok == '', ok + assert ok == "", ok # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) @@ -184,7 +184,10 @@ def test_rm_pushed(runner: CliRunner, project_id, tmpdir) -> None: run( runner, ["--debug", "meta", "init", "--bundle"], - expected_files=[Path("META/DocumentReference.ndjson"), Path("META/Bundle.ndjson")], + expected_files=[ + Path("META/DocumentReference.ndjson"), + Path("META/Bundle.ndjson"), + ], ) # commit the re-created meta @@ -209,7 +212,7 @@ def test_rm_pushed(runner: CliRunner, project_id, tmpdir) -> None: object_id = dvc.object_id auth = ensure_auth(config=default()) - ok = '' + ok = "" try: validate_document_in_grip(object_id, auth=auth, project_id=project_id) @@ -222,18 +225,26 @@ def test_rm_pushed(runner: CliRunner, project_id, tmpdir) -> None: ok = ok + f" Elastic validation failed: {e}" try: - validate_document_in_grip(expected_missing_object_id, auth=auth, project_id=project_id) - ok = ok + f" Grip validation failed should not have found: {expected_missing_object_id}" + validate_document_in_grip( + expected_missing_object_id, auth=auth, project_id=project_id + ) + ok = ( + ok + + f" Grip validation failed should not have found: {expected_missing_object_id}" + ) except Exception: pass try: validate_document_in_elastic(expected_missing_object_id, auth=auth) - ok = ok + f" Elastic validation failed should not have found: {expected_missing_object_id}" + ok = ( + ok + + f" Elastic validation failed should not have found: {expected_missing_object_id}" + ) except Exception: pass - assert ok == '', ok + assert ok == "", ok # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) @@ -288,7 +299,10 @@ def test_rm_commit_all(runner: CliRunner, project_id, tmpdir) -> None: run( runner, ["--debug", "meta", "init", "--bundle"], - expected_files=[Path("META/DocumentReference.ndjson"), Path("META/Bundle.ndjson")], + expected_files=[ + Path("META/DocumentReference.ndjson"), + Path("META/Bundle.ndjson"), + ], ) # commit the re-created meta @@ -313,7 +327,7 @@ def test_rm_commit_all(runner: CliRunner, project_id, tmpdir) -> None: object_id = dvc.object_id auth = ensure_auth(config=default()) - ok = '' + ok = "" try: validate_document_in_grip(object_id, auth=auth, project_id=project_id) @@ -326,18 +340,26 @@ def test_rm_commit_all(runner: CliRunner, project_id, tmpdir) -> None: ok = ok + f" Elastic validation failed: {e}" try: - validate_document_in_grip(expected_missing_object_id, auth=auth, project_id=project_id) - ok = ok + f" Grip validation failed should not have found: {expected_missing_object_id}" + validate_document_in_grip( + expected_missing_object_id, auth=auth, project_id=project_id + ) + ok = ( + ok + + f" Grip validation failed should not have found: {expected_missing_object_id}" + ) except Exception: pass try: validate_document_in_elastic(expected_missing_object_id, auth=auth) - ok = ok + f" Elastic validation failed should not have found: {expected_missing_object_id}" + ok = ( + ok + + f" Elastic validation failed should not have found: {expected_missing_object_id}" + ) except Exception: pass - assert ok == '', ok + assert ok == "", ok # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) @@ -373,33 +395,20 @@ def test_rm_pushed_links(runner: CliRunner, project_id, tmpdir) -> None: # Get the path of the platform temporary directory e.g. /tmp # we use the actual string '/tmp' as opposed to using the tempfile module provided in tmpdit # to ensure we can link to a file outside the project working dir - temp_dir = '/tmp' - if os.environ.get('TMP', None): - temp_dir = os.environ.get('TMP') + temp_dir = "/tmp" + if os.environ.get("TMP", None): + temp_dir = os.environ.get("TMP") test_file = Path(temp_dir) / "hello-g3t-integration-test.txt" test_file.write_text("hello\n") os.symlink(str(test_file), "hello4.txt") - run( - runner, - ["--debug", "add", "hello.txt"] - ) - run( - runner, - ["--debug", "add", "hello2.txt"] - ) + run(runner, ["--debug", "add", "hello.txt"]) + run(runner, ["--debug", "add", "hello2.txt"]) # should fail since the target file does not exist - run( - runner, - ["--debug", "add", "hello3.txt"], - expected_exit_code=1 - ) + run(runner, ["--debug", "add", "hello3.txt"], expected_exit_code=1) # should work since the target file exists - run( - runner, - ["--debug", "add", "hello4.txt"] - ) + run(runner, ["--debug", "add", "hello4.txt"]) # create the meta files run( @@ -435,7 +444,10 @@ def test_rm_pushed_links(runner: CliRunner, project_id, tmpdir) -> None: run( runner, ["--debug", "meta", "init", "--bundle"], - expected_files=[Path("META/DocumentReference.ndjson"), Path("META/Bundle.ndjson")], + expected_files=[ + Path("META/DocumentReference.ndjson"), + Path("META/Bundle.ndjson"), + ], ) # commit the re-created meta @@ -463,7 +475,7 @@ def test_rm_pushed_links(runner: CliRunner, project_id, tmpdir) -> None: object_id = dvc.object_id auth = ensure_auth(config=default()) - ok = '' + ok = "" try: validate_document_in_grip(object_id, auth=auth, project_id=project_id) @@ -476,18 +488,26 @@ def test_rm_pushed_links(runner: CliRunner, project_id, tmpdir) -> None: ok = ok + f" Elastic validation failed: {e}" try: - validate_document_in_grip(expected_missing_object_id, auth=auth, project_id=project_id) - ok = ok + f" Grip validation failed should not have found: {expected_missing_object_id}" + validate_document_in_grip( + expected_missing_object_id, auth=auth, project_id=project_id + ) + ok = ( + ok + + f" Grip validation failed should not have found: {expected_missing_object_id}" + ) except Exception: pass try: validate_document_in_elastic(expected_missing_object_id, auth=auth) - ok = ok + f" Elastic validation failed should not have found: {expected_missing_object_id}" + ok = ( + ok + + f" Elastic validation failed should not have found: {expected_missing_object_id}" + ) except Exception: pass - assert ok == '', ok + assert ok == "", ok # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) @@ -517,7 +537,12 @@ def read_dvc(file_path="MANIFEST/my-project-data/hello.txt.dvc"): return dvc -def _create_project(project_id, runner, add_files=True, files=("my-project-data/hello.txt", "my-project-data/hello2.txt")) -> list[str]: +def _create_project( + project_id, + runner, + add_files=True, + files=("my-project-data/hello.txt", "my-project-data/hello2.txt"), +) -> list[str]: """Create a project and add files to it.""" assert os.environ.get( diff --git a/tests/unit/dataframer/test_dataframer.py b/tests/unit/dataframer/test_dataframer.py index b5a9f692..cc890bf4 100644 --- a/tests/unit/dataframer/test_dataframer.py +++ b/tests/unit/dataframer/test_dataframer.py @@ -5,7 +5,10 @@ from gen3_tracker.common import read_ndjson_file -from gen3_tracker.meta.dataframer import LocalFHIRDatabase, validate_and_transform_graphql_field_name +from gen3_tracker.meta.dataframer import ( + LocalFHIRDatabase, + validate_and_transform_graphql_field_name, +) from gen3_tracker.meta.entities import SimplifiedResource from pathlib import Path @@ -274,7 +277,16 @@ def htan_resources(htan_db): @pytest.fixture() -def docref_row(simplified_resources, document_reference_key): +def patient_row_as_subject(): + return { + "patient_active": True, + "patient_id": "bc4e1aa6-cb52-40e9-8f20-594d9c84f920", + "patient_identifier": "patientX_1234", + } + + +@pytest.fixture() +def docref_row(simplified_resources, document_reference_key, patient_row_as_subject): """Based on metadata files, create expected DocumentReference row, populated with any Observations that focus on it""" return { **simplified_resources[document_reference_key], @@ -296,22 +308,23 @@ def docref_row(simplified_resources, document_reference_key): "specimen_id": "60c67a06-ea2d-4d24-9249-418dc77a16a9", "specimen_identifier": "specimen_1234_labA", "specimen_processing": "Double-Spun", + **patient_row_as_subject, } @pytest.fixture() -def research_subject_row(simplified_resources, research_subject_key): +def research_subject_row( + simplified_resources, research_subject_key, patient_row_as_subject +): """Based on metadata files, create an expected Observations dataframe""" return { **simplified_resources[research_subject_key], - "patient_active": True, - "patient_id": "bc4e1aa6-cb52-40e9-8f20-594d9c84f920", - "patient_identifier": "patientX_1234", + **patient_row_as_subject, } @pytest.fixture() -def specimen_row(simplified_resources, specimen_key): +def specimen_row(simplified_resources, specimen_key, patient_row_as_subject): return { **simplified_resources[specimen_key], "sample_type": "Primary Solid Tumor", @@ -325,9 +338,7 @@ def specimen_row(simplified_resources, specimen_key): "biopsy_procedure_type": "Biopsy - Core", "biopsy_anatomical_location": "top axillary lymph node", "percent_tumor": "30", - "patient_identifier": "patientX_1234", - "patient_id": "bc4e1aa6-cb52-40e9-8f20-594d9c84f920", - "patient_active": True, + **patient_row_as_subject, } @@ -377,7 +388,9 @@ def test_htan_simplified(htan_resources): simplified = SimplifiedResource.build(resource=resource).simplified for key, value in simplified.items(): transformed_key = validate_and_transform_graphql_field_name(key) - assert key == transformed_key, f'Key "{key}" in {resource_type} was not transformed to valid GraphQL. Should be "{transformed_key}"' + assert ( + key == transformed_key + ), f'Key "{key}" in {resource_type} was not transformed to valid GraphQL. Should be "{transformed_key}"' def test_flattened_document_references(local_db, docref_row): @@ -413,30 +426,36 @@ def test_flattened_research_subjects(local_db, research_subject_row): # Using pytest.mark.parametrize to test multiple inputs and expected outputs -@pytest.mark.parametrize("input_name, expected_output", [ - ("user_name", "user_name"), - ("123fieldName", "_123fieldName"), - ("product-id", "product_id"), - ("item Name", "item_Name"), - ("my_field_with spaces and!@", "my_field_with_spaces_and__"), - ("__typename", "_typename"), # Valid, but reserved for introspection - ("__schema", "_schema"), # Valid, but reserved for introspection - ("__type", "_type", ), # Valid, but reserved for introspection - ("validFieldName", "validFieldName"), - ("anotherValid_Field", "anotherValid_Field"), - ("field_with_hyphen-and-space", "field_with_hyphen_and_space"), - ("", "_"), # becomes a single underscore - (" leading_space", "__leading_space"), - ("trailing_space ", "trailing_space__"), - ("some.field", "some_field"), - ("0_number_start", "_0_number_start"), - ("Cell Morphology Assessment", "Cell_Morphology_Assessment"), - ("Image ID", "Image_ID"), - ("Pixels BigEndian", "Pixels_BigEndian"), - ("Fixative Type", "Fixative_Type"), - ("Storage Method", "Storage_Method"), - ("Tumor Tissue Type", "Tumor_Tissue_Type"), -]) +@pytest.mark.parametrize( + "input_name, expected_output", + [ + ("user_name", "user_name"), + ("123fieldName", "_123fieldName"), + ("product-id", "product_id"), + ("item Name", "item_Name"), + ("my_field_with spaces and!@", "my_field_with_spaces_and__"), + ("__typename", "_typename"), # Valid, but reserved for introspection + ("__schema", "_schema"), # Valid, but reserved for introspection + ( + "__type", + "_type", + ), # Valid, but reserved for introspection + ("validFieldName", "validFieldName"), + ("anotherValid_Field", "anotherValid_Field"), + ("field_with_hyphen-and-space", "field_with_hyphen_and_space"), + ("", "_"), # becomes a single underscore + (" leading_space", "__leading_space"), + ("trailing_space ", "trailing_space__"), + ("some.field", "some_field"), + ("0_number_start", "_0_number_start"), + ("Cell Morphology Assessment", "Cell_Morphology_Assessment"), + ("Image ID", "Image_ID"), + ("Pixels BigEndian", "Pixels_BigEndian"), + ("Fixative Type", "Fixative_Type"), + ("Storage Method", "Storage_Method"), + ("Tumor Tissue Type", "Tumor_Tissue_Type"), + ], +) def test_validate_and_transform_graphql_field_name(input_name, expected_output): """ Tests the validate_and_transform_graphql_field_name function with various inputs. diff --git a/tests/unit/test_indexclient.py b/tests/unit/test_indexclient.py index 6fcab885..22e8c9df 100644 --- a/tests/unit/test_indexclient.py +++ b/tests/unit/test_indexclient.py @@ -10,7 +10,10 @@ def index_client(): Fixture to provide an index client for testing. This is a placeholder and should be replaced with actual client initialization. """ - with patch('gen3.auth.Gen3Auth.get_access_token', return_value="accesstoken:///mock_access_token"): + with patch( + "gen3.auth.Gen3Auth.get_access_token", + return_value="accesstoken:///mock_access_token", + ): yield Gen3Index(auth_provider=Gen3Auth(endpoint="https://example.com/auth")) @@ -33,4 +36,4 @@ def test_authorization_header_present(index_client: Gen3Index): auth: Gen3Auth = mock_get.call_args[1].get("auth", None) assert auth is not None, "Auth object should not be None" auth_value = auth._get_auth_value() - assert auth_value == 'bearer accesstoken:///mock_access_token' + assert auth_value == "bearer accesstoken:///mock_access_token" diff --git a/tests/unit/test_none_fields.py b/tests/unit/test_none_fields.py index 40245b09..8e9394bc 100644 --- a/tests/unit/test_none_fields.py +++ b/tests/unit/test_none_fields.py @@ -1,8 +1,7 @@ - - def test_none(): """Test None fields.""" from fhir.resources.patient import Patient + patient_dict = {"multipleBirthInteger": None, "name": None} patient = Patient.validate(patient_dict) assert patient